sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 21 22 23def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 24 if len(args) == 1 and args[0].is_star: 25 return exp.StarMap(this=args[0]) 26 27 keys = [] 28 values = [] 29 for i in range(0, len(args), 2): 30 keys.append(args[i]) 31 values.append(args[i + 1]) 32 33 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 34 35 36def build_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 50 # Default argument order is base, expression 51 this = seq_get(args, 0) 52 expression = seq_get(args, 1) 53 54 if expression: 55 if not dialect.LOG_BASE_FIRST: 56 this, expression = expression, this 57 return exp.Log(this=this, expression=expression) 58 59 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 60 61 62def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 63 def _builder(args: t.List, dialect: Dialect) -> E: 64 expression = expr_type( 65 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 66 ) 67 if len(args) > 2 and expr_type is exp.JSONExtract: 68 expression.set("expressions", args[2:]) 69 70 return expression 71 72 return _builder 73 74 75class _Parser(type): 76 def __new__(cls, clsname, bases, attrs): 77 klass = super().__new__(cls, clsname, bases, attrs) 78 79 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 80 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 81 82 return klass 83 84 85class Parser(metaclass=_Parser): 86 """ 87 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 88 89 Args: 90 error_level: The desired error level. 91 Default: ErrorLevel.IMMEDIATE 92 error_message_context: The amount of context to capture from a query string when displaying 93 the error message (in number of characters). 94 Default: 100 95 max_errors: Maximum number of error messages to include in a raised ParseError. 96 This is only relevant if error_level is ErrorLevel.RAISE. 97 Default: 3 98 """ 99 100 FUNCTIONS: t.Dict[str, t.Callable] = { 101 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 102 "CONCAT": lambda args, dialect: exp.Concat( 103 expressions=args, 104 safe=not dialect.STRICT_STRING_CONCAT, 105 coalesce=dialect.CONCAT_COALESCE, 106 ), 107 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 108 expressions=args, 109 safe=not dialect.STRICT_STRING_CONCAT, 110 coalesce=dialect.CONCAT_COALESCE, 111 ), 112 "DATE_TO_DATE_STR": lambda args: exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 117 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 118 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 119 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 120 "LIKE": build_like, 121 "LOG": build_logarithm, 122 "TIME_TO_TIME_STR": lambda args: exp.Cast( 123 this=seq_get(args, 0), 124 to=exp.DataType(this=exp.DataType.Type.TEXT), 125 ), 126 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 127 this=exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 start=exp.Literal.number(1), 132 length=exp.Literal.number(10), 133 ), 134 "VAR_MAP": build_var_map, 135 } 136 137 NO_PAREN_FUNCTIONS = { 138 TokenType.CURRENT_DATE: exp.CurrentDate, 139 TokenType.CURRENT_DATETIME: exp.CurrentDate, 140 TokenType.CURRENT_TIME: exp.CurrentTime, 141 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 142 TokenType.CURRENT_USER: exp.CurrentUser, 143 } 144 145 STRUCT_TYPE_TOKENS = { 146 TokenType.NESTED, 147 TokenType.STRUCT, 148 } 149 150 NESTED_TYPE_TOKENS = { 151 TokenType.ARRAY, 152 TokenType.LOWCARDINALITY, 153 TokenType.MAP, 154 TokenType.NULLABLE, 155 *STRUCT_TYPE_TOKENS, 156 } 157 158 ENUM_TYPE_TOKENS = { 159 TokenType.ENUM, 160 TokenType.ENUM8, 161 TokenType.ENUM16, 162 } 163 164 AGGREGATE_TYPE_TOKENS = { 165 TokenType.AGGREGATEFUNCTION, 166 TokenType.SIMPLEAGGREGATEFUNCTION, 167 } 168 169 TYPE_TOKENS = { 170 TokenType.BIT, 171 TokenType.BOOLEAN, 172 TokenType.TINYINT, 173 TokenType.UTINYINT, 174 TokenType.SMALLINT, 175 TokenType.USMALLINT, 176 TokenType.INT, 177 TokenType.UINT, 178 TokenType.BIGINT, 179 TokenType.UBIGINT, 180 TokenType.INT128, 181 TokenType.UINT128, 182 TokenType.INT256, 183 TokenType.UINT256, 184 TokenType.MEDIUMINT, 185 TokenType.UMEDIUMINT, 186 TokenType.FIXEDSTRING, 187 TokenType.FLOAT, 188 TokenType.DOUBLE, 189 TokenType.CHAR, 190 TokenType.NCHAR, 191 TokenType.VARCHAR, 192 TokenType.NVARCHAR, 193 TokenType.BPCHAR, 194 TokenType.TEXT, 195 TokenType.MEDIUMTEXT, 196 TokenType.LONGTEXT, 197 TokenType.MEDIUMBLOB, 198 TokenType.LONGBLOB, 199 TokenType.BINARY, 200 TokenType.VARBINARY, 201 TokenType.JSON, 202 TokenType.JSONB, 203 TokenType.INTERVAL, 204 TokenType.TINYBLOB, 205 TokenType.TINYTEXT, 206 TokenType.TIME, 207 TokenType.TIMETZ, 208 TokenType.TIMESTAMP, 209 TokenType.TIMESTAMP_S, 210 TokenType.TIMESTAMP_MS, 211 TokenType.TIMESTAMP_NS, 212 TokenType.TIMESTAMPTZ, 213 TokenType.TIMESTAMPLTZ, 214 TokenType.DATETIME, 215 TokenType.DATETIME64, 216 TokenType.DATE, 217 TokenType.DATE32, 218 TokenType.INT4RANGE, 219 TokenType.INT4MULTIRANGE, 220 TokenType.INT8RANGE, 221 TokenType.INT8MULTIRANGE, 222 TokenType.NUMRANGE, 223 TokenType.NUMMULTIRANGE, 224 TokenType.TSRANGE, 225 TokenType.TSMULTIRANGE, 226 TokenType.TSTZRANGE, 227 TokenType.TSTZMULTIRANGE, 228 TokenType.DATERANGE, 229 TokenType.DATEMULTIRANGE, 230 TokenType.DECIMAL, 231 TokenType.UDECIMAL, 232 TokenType.BIGDECIMAL, 233 TokenType.UUID, 234 TokenType.GEOGRAPHY, 235 TokenType.GEOMETRY, 236 TokenType.HLLSKETCH, 237 TokenType.HSTORE, 238 TokenType.PSEUDO_TYPE, 239 TokenType.SUPER, 240 TokenType.SERIAL, 241 TokenType.SMALLSERIAL, 242 TokenType.BIGSERIAL, 243 TokenType.XML, 244 TokenType.YEAR, 245 TokenType.UNIQUEIDENTIFIER, 246 TokenType.USERDEFINED, 247 TokenType.MONEY, 248 TokenType.SMALLMONEY, 249 TokenType.ROWVERSION, 250 TokenType.IMAGE, 251 TokenType.VARIANT, 252 TokenType.OBJECT, 253 TokenType.OBJECT_IDENTIFIER, 254 TokenType.INET, 255 TokenType.IPADDRESS, 256 TokenType.IPPREFIX, 257 TokenType.IPV4, 258 TokenType.IPV6, 259 TokenType.UNKNOWN, 260 TokenType.NULL, 261 *ENUM_TYPE_TOKENS, 262 *NESTED_TYPE_TOKENS, 263 *AGGREGATE_TYPE_TOKENS, 264 } 265 266 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 267 TokenType.BIGINT: TokenType.UBIGINT, 268 TokenType.INT: TokenType.UINT, 269 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 270 TokenType.SMALLINT: TokenType.USMALLINT, 271 TokenType.TINYINT: TokenType.UTINYINT, 272 TokenType.DECIMAL: TokenType.UDECIMAL, 273 } 274 275 SUBQUERY_PREDICATES = { 276 TokenType.ANY: exp.Any, 277 TokenType.ALL: exp.All, 278 TokenType.EXISTS: exp.Exists, 279 TokenType.SOME: exp.Any, 280 } 281 282 RESERVED_TOKENS = { 283 *Tokenizer.SINGLE_TOKENS.values(), 284 TokenType.SELECT, 285 } 286 287 DB_CREATABLES = { 288 TokenType.DATABASE, 289 TokenType.SCHEMA, 290 TokenType.TABLE, 291 TokenType.VIEW, 292 TokenType.MODEL, 293 TokenType.DICTIONARY, 294 TokenType.STORAGE_INTEGRATION, 295 } 296 297 CREATABLES = { 298 TokenType.COLUMN, 299 TokenType.CONSTRAINT, 300 TokenType.FUNCTION, 301 TokenType.INDEX, 302 TokenType.PROCEDURE, 303 TokenType.FOREIGN_KEY, 304 *DB_CREATABLES, 305 } 306 307 # Tokens that can represent identifiers 308 ID_VAR_TOKENS = { 309 TokenType.VAR, 310 TokenType.ANTI, 311 TokenType.APPLY, 312 TokenType.ASC, 313 TokenType.AUTO_INCREMENT, 314 TokenType.BEGIN, 315 TokenType.BPCHAR, 316 TokenType.CACHE, 317 TokenType.CASE, 318 TokenType.COLLATE, 319 TokenType.COMMAND, 320 TokenType.COMMENT, 321 TokenType.COMMIT, 322 TokenType.CONSTRAINT, 323 TokenType.DEFAULT, 324 TokenType.DELETE, 325 TokenType.DESC, 326 TokenType.DESCRIBE, 327 TokenType.DICTIONARY, 328 TokenType.DIV, 329 TokenType.END, 330 TokenType.EXECUTE, 331 TokenType.ESCAPE, 332 TokenType.FALSE, 333 TokenType.FIRST, 334 TokenType.FILTER, 335 TokenType.FINAL, 336 TokenType.FORMAT, 337 TokenType.FULL, 338 TokenType.IS, 339 TokenType.ISNULL, 340 TokenType.INTERVAL, 341 TokenType.KEEP, 342 TokenType.KILL, 343 TokenType.LEFT, 344 TokenType.LOAD, 345 TokenType.MERGE, 346 TokenType.NATURAL, 347 TokenType.NEXT, 348 TokenType.OFFSET, 349 TokenType.OPERATOR, 350 TokenType.ORDINALITY, 351 TokenType.OVERLAPS, 352 TokenType.OVERWRITE, 353 TokenType.PARTITION, 354 TokenType.PERCENT, 355 TokenType.PIVOT, 356 TokenType.PRAGMA, 357 TokenType.RANGE, 358 TokenType.RECURSIVE, 359 TokenType.REFERENCES, 360 TokenType.REFRESH, 361 TokenType.REPLACE, 362 TokenType.RIGHT, 363 TokenType.ROW, 364 TokenType.ROWS, 365 TokenType.SEMI, 366 TokenType.SET, 367 TokenType.SETTINGS, 368 TokenType.SHOW, 369 TokenType.TEMPORARY, 370 TokenType.TOP, 371 TokenType.TRUE, 372 TokenType.TRUNCATE, 373 TokenType.UNIQUE, 374 TokenType.UNPIVOT, 375 TokenType.UPDATE, 376 TokenType.USE, 377 TokenType.VOLATILE, 378 TokenType.WINDOW, 379 *CREATABLES, 380 *SUBQUERY_PREDICATES, 381 *TYPE_TOKENS, 382 *NO_PAREN_FUNCTIONS, 383 } 384 385 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 386 387 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 388 TokenType.ANTI, 389 TokenType.APPLY, 390 TokenType.ASOF, 391 TokenType.FULL, 392 TokenType.LEFT, 393 TokenType.LOCK, 394 TokenType.NATURAL, 395 TokenType.OFFSET, 396 TokenType.RIGHT, 397 TokenType.SEMI, 398 TokenType.WINDOW, 399 } 400 401 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 402 403 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 404 405 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 406 407 FUNC_TOKENS = { 408 TokenType.COLLATE, 409 TokenType.COMMAND, 410 TokenType.CURRENT_DATE, 411 TokenType.CURRENT_DATETIME, 412 TokenType.CURRENT_TIMESTAMP, 413 TokenType.CURRENT_TIME, 414 TokenType.CURRENT_USER, 415 TokenType.FILTER, 416 TokenType.FIRST, 417 TokenType.FORMAT, 418 TokenType.GLOB, 419 TokenType.IDENTIFIER, 420 TokenType.INDEX, 421 TokenType.ISNULL, 422 TokenType.ILIKE, 423 TokenType.INSERT, 424 TokenType.LIKE, 425 TokenType.MERGE, 426 TokenType.OFFSET, 427 TokenType.PRIMARY_KEY, 428 TokenType.RANGE, 429 TokenType.REPLACE, 430 TokenType.RLIKE, 431 TokenType.ROW, 432 TokenType.UNNEST, 433 TokenType.VAR, 434 TokenType.LEFT, 435 TokenType.RIGHT, 436 TokenType.DATE, 437 TokenType.DATETIME, 438 TokenType.TABLE, 439 TokenType.TIMESTAMP, 440 TokenType.TIMESTAMPTZ, 441 TokenType.TRUNCATE, 442 TokenType.WINDOW, 443 TokenType.XOR, 444 *TYPE_TOKENS, 445 *SUBQUERY_PREDICATES, 446 } 447 448 CONJUNCTION = { 449 TokenType.AND: exp.And, 450 TokenType.OR: exp.Or, 451 } 452 453 EQUALITY = { 454 TokenType.COLON_EQ: exp.PropertyEQ, 455 TokenType.EQ: exp.EQ, 456 TokenType.NEQ: exp.NEQ, 457 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 458 } 459 460 COMPARISON = { 461 TokenType.GT: exp.GT, 462 TokenType.GTE: exp.GTE, 463 TokenType.LT: exp.LT, 464 TokenType.LTE: exp.LTE, 465 } 466 467 BITWISE = { 468 TokenType.AMP: exp.BitwiseAnd, 469 TokenType.CARET: exp.BitwiseXor, 470 TokenType.PIPE: exp.BitwiseOr, 471 } 472 473 TERM = { 474 TokenType.DASH: exp.Sub, 475 TokenType.PLUS: exp.Add, 476 TokenType.MOD: exp.Mod, 477 TokenType.COLLATE: exp.Collate, 478 } 479 480 FACTOR = { 481 TokenType.DIV: exp.IntDiv, 482 TokenType.LR_ARROW: exp.Distance, 483 TokenType.SLASH: exp.Div, 484 TokenType.STAR: exp.Mul, 485 } 486 487 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 488 489 TIMES = { 490 TokenType.TIME, 491 TokenType.TIMETZ, 492 } 493 494 TIMESTAMPS = { 495 TokenType.TIMESTAMP, 496 TokenType.TIMESTAMPTZ, 497 TokenType.TIMESTAMPLTZ, 498 *TIMES, 499 } 500 501 SET_OPERATIONS = { 502 TokenType.UNION, 503 TokenType.INTERSECT, 504 TokenType.EXCEPT, 505 } 506 507 JOIN_METHODS = { 508 TokenType.NATURAL, 509 TokenType.ASOF, 510 } 511 512 JOIN_SIDES = { 513 TokenType.LEFT, 514 TokenType.RIGHT, 515 TokenType.FULL, 516 } 517 518 JOIN_KINDS = { 519 TokenType.INNER, 520 TokenType.OUTER, 521 TokenType.CROSS, 522 TokenType.SEMI, 523 TokenType.ANTI, 524 } 525 526 JOIN_HINTS: t.Set[str] = set() 527 528 LAMBDAS = { 529 TokenType.ARROW: lambda self, expressions: self.expression( 530 exp.Lambda, 531 this=self._replace_lambda( 532 self._parse_conjunction(), 533 {node.name for node in expressions}, 534 ), 535 expressions=expressions, 536 ), 537 TokenType.FARROW: lambda self, expressions: self.expression( 538 exp.Kwarg, 539 this=exp.var(expressions[0].name), 540 expression=self._parse_conjunction(), 541 ), 542 } 543 544 COLUMN_OPERATORS = { 545 TokenType.DOT: None, 546 TokenType.DCOLON: lambda self, this, to: self.expression( 547 exp.Cast if self.STRICT_CAST else exp.TryCast, 548 this=this, 549 to=to, 550 ), 551 TokenType.ARROW: lambda self, this, path: self.expression( 552 exp.JSONExtract, 553 this=this, 554 expression=self.dialect.to_json_path(path), 555 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 556 ), 557 TokenType.DARROW: lambda self, this, path: self.expression( 558 exp.JSONExtractScalar, 559 this=this, 560 expression=self.dialect.to_json_path(path), 561 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 562 ), 563 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 564 exp.JSONBExtract, 565 this=this, 566 expression=path, 567 ), 568 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 569 exp.JSONBExtractScalar, 570 this=this, 571 expression=path, 572 ), 573 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 574 exp.JSONBContains, 575 this=this, 576 expression=key, 577 ), 578 } 579 580 EXPRESSION_PARSERS = { 581 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 582 exp.Column: lambda self: self._parse_column(), 583 exp.Condition: lambda self: self._parse_conjunction(), 584 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 585 exp.Expression: lambda self: self._parse_expression(), 586 exp.From: lambda self: self._parse_from(), 587 exp.Group: lambda self: self._parse_group(), 588 exp.Having: lambda self: self._parse_having(), 589 exp.Identifier: lambda self: self._parse_id_var(), 590 exp.Join: lambda self: self._parse_join(), 591 exp.Lambda: lambda self: self._parse_lambda(), 592 exp.Lateral: lambda self: self._parse_lateral(), 593 exp.Limit: lambda self: self._parse_limit(), 594 exp.Offset: lambda self: self._parse_offset(), 595 exp.Order: lambda self: self._parse_order(), 596 exp.Ordered: lambda self: self._parse_ordered(), 597 exp.Properties: lambda self: self._parse_properties(), 598 exp.Qualify: lambda self: self._parse_qualify(), 599 exp.Returning: lambda self: self._parse_returning(), 600 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 601 exp.Table: lambda self: self._parse_table_parts(), 602 exp.TableAlias: lambda self: self._parse_table_alias(), 603 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 604 exp.Where: lambda self: self._parse_where(), 605 exp.Window: lambda self: self._parse_named_window(), 606 exp.With: lambda self: self._parse_with(), 607 "JOIN_TYPE": lambda self: self._parse_join_parts(), 608 } 609 610 STATEMENT_PARSERS = { 611 TokenType.ALTER: lambda self: self._parse_alter(), 612 TokenType.BEGIN: lambda self: self._parse_transaction(), 613 TokenType.CACHE: lambda self: self._parse_cache(), 614 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 615 TokenType.COMMENT: lambda self: self._parse_comment(), 616 TokenType.CREATE: lambda self: self._parse_create(), 617 TokenType.DELETE: lambda self: self._parse_delete(), 618 TokenType.DESC: lambda self: self._parse_describe(), 619 TokenType.DESCRIBE: lambda self: self._parse_describe(), 620 TokenType.DROP: lambda self: self._parse_drop(), 621 TokenType.INSERT: lambda self: self._parse_insert(), 622 TokenType.KILL: lambda self: self._parse_kill(), 623 TokenType.LOAD: lambda self: self._parse_load(), 624 TokenType.MERGE: lambda self: self._parse_merge(), 625 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 626 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 627 TokenType.REFRESH: lambda self: self._parse_refresh(), 628 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 629 TokenType.SET: lambda self: self._parse_set(), 630 TokenType.UNCACHE: lambda self: self._parse_uncache(), 631 TokenType.UPDATE: lambda self: self._parse_update(), 632 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 633 TokenType.USE: lambda self: self.expression( 634 exp.Use, 635 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 636 this=self._parse_table(schema=False), 637 ), 638 } 639 640 UNARY_PARSERS = { 641 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 642 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 643 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 644 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 645 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 646 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 647 } 648 649 STRING_PARSERS = { 650 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 651 exp.RawString, this=token.text 652 ), 653 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 654 exp.National, this=token.text 655 ), 656 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 657 TokenType.STRING: lambda self, token: self.expression( 658 exp.Literal, this=token.text, is_string=True 659 ), 660 TokenType.UNICODE_STRING: lambda self, token: self.expression( 661 exp.UnicodeString, 662 this=token.text, 663 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 664 ), 665 } 666 667 NUMERIC_PARSERS = { 668 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 669 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 670 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 671 TokenType.NUMBER: lambda self, token: self.expression( 672 exp.Literal, this=token.text, is_string=False 673 ), 674 } 675 676 PRIMARY_PARSERS = { 677 **STRING_PARSERS, 678 **NUMERIC_PARSERS, 679 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 680 TokenType.NULL: lambda self, _: self.expression(exp.Null), 681 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 682 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 683 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 684 TokenType.STAR: lambda self, _: self.expression( 685 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 686 ), 687 } 688 689 PLACEHOLDER_PARSERS = { 690 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 691 TokenType.PARAMETER: lambda self: self._parse_parameter(), 692 TokenType.COLON: lambda self: ( 693 self.expression(exp.Placeholder, this=self._prev.text) 694 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 695 else None 696 ), 697 } 698 699 RANGE_PARSERS = { 700 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 701 TokenType.GLOB: binary_range_parser(exp.Glob), 702 TokenType.ILIKE: binary_range_parser(exp.ILike), 703 TokenType.IN: lambda self, this: self._parse_in(this), 704 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 705 TokenType.IS: lambda self, this: self._parse_is(this), 706 TokenType.LIKE: binary_range_parser(exp.Like), 707 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 708 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 709 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 710 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 711 } 712 713 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 714 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 715 "AUTO": lambda self: self._parse_auto_property(), 716 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 717 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 718 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 719 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 720 "CHECKSUM": lambda self: self._parse_checksum(), 721 "CLUSTER BY": lambda self: self._parse_cluster(), 722 "CLUSTERED": lambda self: self._parse_clustered_by(), 723 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 724 exp.CollateProperty, **kwargs 725 ), 726 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 727 "CONTAINS": lambda self: self._parse_contains_property(), 728 "COPY": lambda self: self._parse_copy_property(), 729 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 730 "DEFINER": lambda self: self._parse_definer(), 731 "DETERMINISTIC": lambda self: self.expression( 732 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 733 ), 734 "DISTKEY": lambda self: self._parse_distkey(), 735 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 736 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 737 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 738 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 739 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 740 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 741 "FREESPACE": lambda self: self._parse_freespace(), 742 "HEAP": lambda self: self.expression(exp.HeapProperty), 743 "IMMUTABLE": lambda self: self.expression( 744 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 745 ), 746 "INHERITS": lambda self: self.expression( 747 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 748 ), 749 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 750 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 751 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 752 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 753 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 754 "LIKE": lambda self: self._parse_create_like(), 755 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 756 "LOCK": lambda self: self._parse_locking(), 757 "LOCKING": lambda self: self._parse_locking(), 758 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 759 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 760 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 761 "MODIFIES": lambda self: self._parse_modifies_property(), 762 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 763 "NO": lambda self: self._parse_no_property(), 764 "ON": lambda self: self._parse_on_property(), 765 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 766 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 767 "PARTITION": lambda self: self._parse_partitioned_of(), 768 "PARTITION BY": lambda self: self._parse_partitioned_by(), 769 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 770 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 771 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 772 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 773 "READS": lambda self: self._parse_reads_property(), 774 "REMOTE": lambda self: self._parse_remote_with_connection(), 775 "RETURNS": lambda self: self._parse_returns(), 776 "ROW": lambda self: self._parse_row(), 777 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 778 "SAMPLE": lambda self: self.expression( 779 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 780 ), 781 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 782 "SETTINGS": lambda self: self.expression( 783 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 784 ), 785 "SORTKEY": lambda self: self._parse_sortkey(), 786 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 787 "STABLE": lambda self: self.expression( 788 exp.StabilityProperty, this=exp.Literal.string("STABLE") 789 ), 790 "STORED": lambda self: self._parse_stored(), 791 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 792 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 793 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 794 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 795 "TO": lambda self: self._parse_to_table(), 796 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 797 "TRANSFORM": lambda self: self.expression( 798 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 799 ), 800 "TTL": lambda self: self._parse_ttl(), 801 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 802 "VOLATILE": lambda self: self._parse_volatile_property(), 803 "WITH": lambda self: self._parse_with_property(), 804 } 805 806 CONSTRAINT_PARSERS = { 807 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 808 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 809 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 810 "CHARACTER SET": lambda self: self.expression( 811 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 812 ), 813 "CHECK": lambda self: self.expression( 814 exp.CheckColumnConstraint, 815 this=self._parse_wrapped(self._parse_conjunction), 816 enforced=self._match_text_seq("ENFORCED"), 817 ), 818 "COLLATE": lambda self: self.expression( 819 exp.CollateColumnConstraint, this=self._parse_var() 820 ), 821 "COMMENT": lambda self: self.expression( 822 exp.CommentColumnConstraint, this=self._parse_string() 823 ), 824 "COMPRESS": lambda self: self._parse_compress(), 825 "CLUSTERED": lambda self: self.expression( 826 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 827 ), 828 "NONCLUSTERED": lambda self: self.expression( 829 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 830 ), 831 "DEFAULT": lambda self: self.expression( 832 exp.DefaultColumnConstraint, this=self._parse_bitwise() 833 ), 834 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 835 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 836 "FORMAT": lambda self: self.expression( 837 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 838 ), 839 "GENERATED": lambda self: self._parse_generated_as_identity(), 840 "IDENTITY": lambda self: self._parse_auto_increment(), 841 "INLINE": lambda self: self._parse_inline(), 842 "LIKE": lambda self: self._parse_create_like(), 843 "NOT": lambda self: self._parse_not_constraint(), 844 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 845 "ON": lambda self: ( 846 self._match(TokenType.UPDATE) 847 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 848 ) 849 or self.expression(exp.OnProperty, this=self._parse_id_var()), 850 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 851 "PERIOD": lambda self: self._parse_period_for_system_time(), 852 "PRIMARY KEY": lambda self: self._parse_primary_key(), 853 "REFERENCES": lambda self: self._parse_references(match=False), 854 "TITLE": lambda self: self.expression( 855 exp.TitleColumnConstraint, this=self._parse_var_or_string() 856 ), 857 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 858 "UNIQUE": lambda self: self._parse_unique(), 859 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 860 "WITH": lambda self: self.expression( 861 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 862 ), 863 } 864 865 ALTER_PARSERS = { 866 "ADD": lambda self: self._parse_alter_table_add(), 867 "ALTER": lambda self: self._parse_alter_table_alter(), 868 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 869 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 870 "DROP": lambda self: self._parse_alter_table_drop(), 871 "RENAME": lambda self: self._parse_alter_table_rename(), 872 } 873 874 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 875 876 NO_PAREN_FUNCTION_PARSERS = { 877 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 878 "CASE": lambda self: self._parse_case(), 879 "IF": lambda self: self._parse_if(), 880 "NEXT": lambda self: self._parse_next_value_for(), 881 } 882 883 INVALID_FUNC_NAME_TOKENS = { 884 TokenType.IDENTIFIER, 885 TokenType.STRING, 886 } 887 888 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 889 890 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 891 892 FUNCTION_PARSERS = { 893 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 894 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 895 "DECODE": lambda self: self._parse_decode(), 896 "EXTRACT": lambda self: self._parse_extract(), 897 "JSON_OBJECT": lambda self: self._parse_json_object(), 898 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 899 "JSON_TABLE": lambda self: self._parse_json_table(), 900 "MATCH": lambda self: self._parse_match_against(), 901 "OPENJSON": lambda self: self._parse_open_json(), 902 "POSITION": lambda self: self._parse_position(), 903 "PREDICT": lambda self: self._parse_predict(), 904 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 905 "STRING_AGG": lambda self: self._parse_string_agg(), 906 "SUBSTRING": lambda self: self._parse_substring(), 907 "TRIM": lambda self: self._parse_trim(), 908 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 909 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 910 } 911 912 QUERY_MODIFIER_PARSERS = { 913 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 914 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 915 TokenType.WHERE: lambda self: ("where", self._parse_where()), 916 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 917 TokenType.HAVING: lambda self: ("having", self._parse_having()), 918 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 919 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 920 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 921 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 922 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 923 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 924 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 925 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 926 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 927 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 928 TokenType.CLUSTER_BY: lambda self: ( 929 "cluster", 930 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 931 ), 932 TokenType.DISTRIBUTE_BY: lambda self: ( 933 "distribute", 934 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 935 ), 936 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 937 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 938 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 939 } 940 941 SET_PARSERS = { 942 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 943 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 944 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 945 "TRANSACTION": lambda self: self._parse_set_transaction(), 946 } 947 948 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 949 950 TYPE_LITERAL_PARSERS = { 951 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 952 } 953 954 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 955 956 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 957 958 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 959 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 960 "ISOLATION": ( 961 ("LEVEL", "REPEATABLE", "READ"), 962 ("LEVEL", "READ", "COMMITTED"), 963 ("LEVEL", "READ", "UNCOMITTED"), 964 ("LEVEL", "SERIALIZABLE"), 965 ), 966 "READ": ("WRITE", "ONLY"), 967 } 968 969 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 970 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 971 ) 972 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 973 974 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 975 976 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 977 978 CLONE_KEYWORDS = {"CLONE", "COPY"} 979 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 980 981 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 982 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 983 984 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 985 986 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 987 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 988 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 989 990 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 991 992 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 993 994 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 995 996 DISTINCT_TOKENS = {TokenType.DISTINCT} 997 998 NULL_TOKENS = {TokenType.NULL} 999 1000 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1001 1002 STRICT_CAST = True 1003 1004 PREFIXED_PIVOT_COLUMNS = False 1005 IDENTIFY_PIVOT_STRINGS = False 1006 1007 LOG_DEFAULTS_TO_LN = False 1008 1009 # Whether ADD is present for each column added by ALTER TABLE 1010 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1011 1012 # Whether the table sample clause expects CSV syntax 1013 TABLESAMPLE_CSV = False 1014 1015 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1016 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1017 1018 # Whether the TRIM function expects the characters to trim as its first argument 1019 TRIM_PATTERN_FIRST = False 1020 1021 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1022 STRING_ALIASES = False 1023 1024 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1025 MODIFIERS_ATTACHED_TO_UNION = True 1026 UNION_MODIFIERS = {"order", "limit", "offset"} 1027 1028 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1029 NO_PAREN_IF_COMMANDS = True 1030 1031 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1032 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1033 1034 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1035 # If this is True and '(' is not found, the keyword will be treated as an identifier 1036 VALUES_FOLLOWED_BY_PAREN = True 1037 1038 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1039 SUPPORTS_IMPLICIT_UNNEST = False 1040 1041 __slots__ = ( 1042 "error_level", 1043 "error_message_context", 1044 "max_errors", 1045 "dialect", 1046 "sql", 1047 "errors", 1048 "_tokens", 1049 "_index", 1050 "_curr", 1051 "_next", 1052 "_prev", 1053 "_prev_comments", 1054 ) 1055 1056 # Autofilled 1057 SHOW_TRIE: t.Dict = {} 1058 SET_TRIE: t.Dict = {} 1059 1060 def __init__( 1061 self, 1062 error_level: t.Optional[ErrorLevel] = None, 1063 error_message_context: int = 100, 1064 max_errors: int = 3, 1065 dialect: DialectType = None, 1066 ): 1067 from sqlglot.dialects import Dialect 1068 1069 self.error_level = error_level or ErrorLevel.IMMEDIATE 1070 self.error_message_context = error_message_context 1071 self.max_errors = max_errors 1072 self.dialect = Dialect.get_or_raise(dialect) 1073 self.reset() 1074 1075 def reset(self): 1076 self.sql = "" 1077 self.errors = [] 1078 self._tokens = [] 1079 self._index = 0 1080 self._curr = None 1081 self._next = None 1082 self._prev = None 1083 self._prev_comments = None 1084 1085 def parse( 1086 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1087 ) -> t.List[t.Optional[exp.Expression]]: 1088 """ 1089 Parses a list of tokens and returns a list of syntax trees, one tree 1090 per parsed SQL statement. 1091 1092 Args: 1093 raw_tokens: The list of tokens. 1094 sql: The original SQL string, used to produce helpful debug messages. 1095 1096 Returns: 1097 The list of the produced syntax trees. 1098 """ 1099 return self._parse( 1100 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1101 ) 1102 1103 def parse_into( 1104 self, 1105 expression_types: exp.IntoType, 1106 raw_tokens: t.List[Token], 1107 sql: t.Optional[str] = None, 1108 ) -> t.List[t.Optional[exp.Expression]]: 1109 """ 1110 Parses a list of tokens into a given Expression type. If a collection of Expression 1111 types is given instead, this method will try to parse the token list into each one 1112 of them, stopping at the first for which the parsing succeeds. 1113 1114 Args: 1115 expression_types: The expression type(s) to try and parse the token list into. 1116 raw_tokens: The list of tokens. 1117 sql: The original SQL string, used to produce helpful debug messages. 1118 1119 Returns: 1120 The target Expression. 1121 """ 1122 errors = [] 1123 for expression_type in ensure_list(expression_types): 1124 parser = self.EXPRESSION_PARSERS.get(expression_type) 1125 if not parser: 1126 raise TypeError(f"No parser registered for {expression_type}") 1127 1128 try: 1129 return self._parse(parser, raw_tokens, sql) 1130 except ParseError as e: 1131 e.errors[0]["into_expression"] = expression_type 1132 errors.append(e) 1133 1134 raise ParseError( 1135 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1136 errors=merge_errors(errors), 1137 ) from errors[-1] 1138 1139 def _parse( 1140 self, 1141 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1142 raw_tokens: t.List[Token], 1143 sql: t.Optional[str] = None, 1144 ) -> t.List[t.Optional[exp.Expression]]: 1145 self.reset() 1146 self.sql = sql or "" 1147 1148 total = len(raw_tokens) 1149 chunks: t.List[t.List[Token]] = [[]] 1150 1151 for i, token in enumerate(raw_tokens): 1152 if token.token_type == TokenType.SEMICOLON: 1153 if i < total - 1: 1154 chunks.append([]) 1155 else: 1156 chunks[-1].append(token) 1157 1158 expressions = [] 1159 1160 for tokens in chunks: 1161 self._index = -1 1162 self._tokens = tokens 1163 self._advance() 1164 1165 expressions.append(parse_method(self)) 1166 1167 if self._index < len(self._tokens): 1168 self.raise_error("Invalid expression / Unexpected token") 1169 1170 self.check_errors() 1171 1172 return expressions 1173 1174 def check_errors(self) -> None: 1175 """Logs or raises any found errors, depending on the chosen error level setting.""" 1176 if self.error_level == ErrorLevel.WARN: 1177 for error in self.errors: 1178 logger.error(str(error)) 1179 elif self.error_level == ErrorLevel.RAISE and self.errors: 1180 raise ParseError( 1181 concat_messages(self.errors, self.max_errors), 1182 errors=merge_errors(self.errors), 1183 ) 1184 1185 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1186 """ 1187 Appends an error in the list of recorded errors or raises it, depending on the chosen 1188 error level setting. 1189 """ 1190 token = token or self._curr or self._prev or Token.string("") 1191 start = token.start 1192 end = token.end + 1 1193 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1194 highlight = self.sql[start:end] 1195 end_context = self.sql[end : end + self.error_message_context] 1196 1197 error = ParseError.new( 1198 f"{message}. Line {token.line}, Col: {token.col}.\n" 1199 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1200 description=message, 1201 line=token.line, 1202 col=token.col, 1203 start_context=start_context, 1204 highlight=highlight, 1205 end_context=end_context, 1206 ) 1207 1208 if self.error_level == ErrorLevel.IMMEDIATE: 1209 raise error 1210 1211 self.errors.append(error) 1212 1213 def expression( 1214 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1215 ) -> E: 1216 """ 1217 Creates a new, validated Expression. 1218 1219 Args: 1220 exp_class: The expression class to instantiate. 1221 comments: An optional list of comments to attach to the expression. 1222 kwargs: The arguments to set for the expression along with their respective values. 1223 1224 Returns: 1225 The target expression. 1226 """ 1227 instance = exp_class(**kwargs) 1228 instance.add_comments(comments) if comments else self._add_comments(instance) 1229 return self.validate_expression(instance) 1230 1231 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1232 if expression and self._prev_comments: 1233 expression.add_comments(self._prev_comments) 1234 self._prev_comments = None 1235 1236 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1237 """ 1238 Validates an Expression, making sure that all its mandatory arguments are set. 1239 1240 Args: 1241 expression: The expression to validate. 1242 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1243 1244 Returns: 1245 The validated expression. 1246 """ 1247 if self.error_level != ErrorLevel.IGNORE: 1248 for error_message in expression.error_messages(args): 1249 self.raise_error(error_message) 1250 1251 return expression 1252 1253 def _find_sql(self, start: Token, end: Token) -> str: 1254 return self.sql[start.start : end.end + 1] 1255 1256 def _is_connected(self) -> bool: 1257 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1258 1259 def _advance(self, times: int = 1) -> None: 1260 self._index += times 1261 self._curr = seq_get(self._tokens, self._index) 1262 self._next = seq_get(self._tokens, self._index + 1) 1263 1264 if self._index > 0: 1265 self._prev = self._tokens[self._index - 1] 1266 self._prev_comments = self._prev.comments 1267 else: 1268 self._prev = None 1269 self._prev_comments = None 1270 1271 def _retreat(self, index: int) -> None: 1272 if index != self._index: 1273 self._advance(index - self._index) 1274 1275 def _warn_unsupported(self) -> None: 1276 if len(self._tokens) <= 1: 1277 return 1278 1279 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1280 # interested in emitting a warning for the one being currently processed. 1281 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1282 1283 logger.warning( 1284 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1285 ) 1286 1287 def _parse_command(self) -> exp.Command: 1288 self._warn_unsupported() 1289 return self.expression( 1290 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1291 ) 1292 1293 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1294 start = self._prev 1295 exists = self._parse_exists() if allow_exists else None 1296 1297 self._match(TokenType.ON) 1298 1299 kind = self._match_set(self.CREATABLES) and self._prev 1300 if not kind: 1301 return self._parse_as_command(start) 1302 1303 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1304 this = self._parse_user_defined_function(kind=kind.token_type) 1305 elif kind.token_type == TokenType.TABLE: 1306 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1307 elif kind.token_type == TokenType.COLUMN: 1308 this = self._parse_column() 1309 else: 1310 this = self._parse_id_var() 1311 1312 self._match(TokenType.IS) 1313 1314 return self.expression( 1315 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1316 ) 1317 1318 def _parse_to_table( 1319 self, 1320 ) -> exp.ToTableProperty: 1321 table = self._parse_table_parts(schema=True) 1322 return self.expression(exp.ToTableProperty, this=table) 1323 1324 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1325 def _parse_ttl(self) -> exp.Expression: 1326 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1327 this = self._parse_bitwise() 1328 1329 if self._match_text_seq("DELETE"): 1330 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1331 if self._match_text_seq("RECOMPRESS"): 1332 return self.expression( 1333 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1334 ) 1335 if self._match_text_seq("TO", "DISK"): 1336 return self.expression( 1337 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1338 ) 1339 if self._match_text_seq("TO", "VOLUME"): 1340 return self.expression( 1341 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1342 ) 1343 1344 return this 1345 1346 expressions = self._parse_csv(_parse_ttl_action) 1347 where = self._parse_where() 1348 group = self._parse_group() 1349 1350 aggregates = None 1351 if group and self._match(TokenType.SET): 1352 aggregates = self._parse_csv(self._parse_set_item) 1353 1354 return self.expression( 1355 exp.MergeTreeTTL, 1356 expressions=expressions, 1357 where=where, 1358 group=group, 1359 aggregates=aggregates, 1360 ) 1361 1362 def _parse_statement(self) -> t.Optional[exp.Expression]: 1363 if self._curr is None: 1364 return None 1365 1366 if self._match_set(self.STATEMENT_PARSERS): 1367 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1368 1369 if self._match_set(Tokenizer.COMMANDS): 1370 return self._parse_command() 1371 1372 expression = self._parse_expression() 1373 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1374 return self._parse_query_modifiers(expression) 1375 1376 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1377 start = self._prev 1378 temporary = self._match(TokenType.TEMPORARY) 1379 materialized = self._match_text_seq("MATERIALIZED") 1380 1381 kind = self._match_set(self.CREATABLES) and self._prev.text 1382 if not kind: 1383 return self._parse_as_command(start) 1384 1385 return self.expression( 1386 exp.Drop, 1387 comments=start.comments, 1388 exists=exists or self._parse_exists(), 1389 this=self._parse_table( 1390 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1391 ), 1392 kind=kind, 1393 temporary=temporary, 1394 materialized=materialized, 1395 cascade=self._match_text_seq("CASCADE"), 1396 constraints=self._match_text_seq("CONSTRAINTS"), 1397 purge=self._match_text_seq("PURGE"), 1398 ) 1399 1400 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1401 return ( 1402 self._match_text_seq("IF") 1403 and (not not_ or self._match(TokenType.NOT)) 1404 and self._match(TokenType.EXISTS) 1405 ) 1406 1407 def _parse_create(self) -> exp.Create | exp.Command: 1408 # Note: this can't be None because we've matched a statement parser 1409 start = self._prev 1410 comments = self._prev_comments 1411 1412 replace = ( 1413 start.token_type == TokenType.REPLACE 1414 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1415 or self._match_pair(TokenType.OR, TokenType.ALTER) 1416 ) 1417 unique = self._match(TokenType.UNIQUE) 1418 1419 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1420 self._advance() 1421 1422 properties = None 1423 create_token = self._match_set(self.CREATABLES) and self._prev 1424 1425 if not create_token: 1426 # exp.Properties.Location.POST_CREATE 1427 properties = self._parse_properties() 1428 create_token = self._match_set(self.CREATABLES) and self._prev 1429 1430 if not properties or not create_token: 1431 return self._parse_as_command(start) 1432 1433 exists = self._parse_exists(not_=True) 1434 this = None 1435 expression: t.Optional[exp.Expression] = None 1436 indexes = None 1437 no_schema_binding = None 1438 begin = None 1439 end = None 1440 clone = None 1441 1442 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1443 nonlocal properties 1444 if properties and temp_props: 1445 properties.expressions.extend(temp_props.expressions) 1446 elif temp_props: 1447 properties = temp_props 1448 1449 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1450 this = self._parse_user_defined_function(kind=create_token.token_type) 1451 1452 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1453 extend_props(self._parse_properties()) 1454 1455 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1456 1457 if not expression: 1458 if self._match(TokenType.COMMAND): 1459 expression = self._parse_as_command(self._prev) 1460 else: 1461 begin = self._match(TokenType.BEGIN) 1462 return_ = self._match_text_seq("RETURN") 1463 1464 if self._match(TokenType.STRING, advance=False): 1465 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1466 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1467 expression = self._parse_string() 1468 extend_props(self._parse_properties()) 1469 else: 1470 expression = self._parse_statement() 1471 1472 end = self._match_text_seq("END") 1473 1474 if return_: 1475 expression = self.expression(exp.Return, this=expression) 1476 elif create_token.token_type == TokenType.INDEX: 1477 this = self._parse_index(index=self._parse_id_var()) 1478 elif create_token.token_type in self.DB_CREATABLES: 1479 table_parts = self._parse_table_parts( 1480 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1481 ) 1482 1483 # exp.Properties.Location.POST_NAME 1484 self._match(TokenType.COMMA) 1485 extend_props(self._parse_properties(before=True)) 1486 1487 this = self._parse_schema(this=table_parts) 1488 1489 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1490 extend_props(self._parse_properties()) 1491 1492 self._match(TokenType.ALIAS) 1493 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1494 # exp.Properties.Location.POST_ALIAS 1495 extend_props(self._parse_properties()) 1496 1497 expression = self._parse_ddl_select() 1498 1499 if create_token.token_type == TokenType.TABLE: 1500 # exp.Properties.Location.POST_EXPRESSION 1501 extend_props(self._parse_properties()) 1502 1503 indexes = [] 1504 while True: 1505 index = self._parse_index() 1506 1507 # exp.Properties.Location.POST_INDEX 1508 extend_props(self._parse_properties()) 1509 1510 if not index: 1511 break 1512 else: 1513 self._match(TokenType.COMMA) 1514 indexes.append(index) 1515 elif create_token.token_type == TokenType.VIEW: 1516 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1517 no_schema_binding = True 1518 1519 shallow = self._match_text_seq("SHALLOW") 1520 1521 if self._match_texts(self.CLONE_KEYWORDS): 1522 copy = self._prev.text.lower() == "copy" 1523 clone = self.expression( 1524 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1525 ) 1526 1527 if self._curr: 1528 return self._parse_as_command(start) 1529 1530 return self.expression( 1531 exp.Create, 1532 comments=comments, 1533 this=this, 1534 kind=create_token.text.upper(), 1535 replace=replace, 1536 unique=unique, 1537 expression=expression, 1538 exists=exists, 1539 properties=properties, 1540 indexes=indexes, 1541 no_schema_binding=no_schema_binding, 1542 begin=begin, 1543 end=end, 1544 clone=clone, 1545 ) 1546 1547 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1548 # only used for teradata currently 1549 self._match(TokenType.COMMA) 1550 1551 kwargs = { 1552 "no": self._match_text_seq("NO"), 1553 "dual": self._match_text_seq("DUAL"), 1554 "before": self._match_text_seq("BEFORE"), 1555 "default": self._match_text_seq("DEFAULT"), 1556 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1557 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1558 "after": self._match_text_seq("AFTER"), 1559 "minimum": self._match_texts(("MIN", "MINIMUM")), 1560 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1561 } 1562 1563 if self._match_texts(self.PROPERTY_PARSERS): 1564 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1565 try: 1566 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1567 except TypeError: 1568 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1569 1570 return None 1571 1572 def _parse_property(self) -> t.Optional[exp.Expression]: 1573 if self._match_texts(self.PROPERTY_PARSERS): 1574 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1575 1576 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1577 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1578 1579 if self._match_text_seq("COMPOUND", "SORTKEY"): 1580 return self._parse_sortkey(compound=True) 1581 1582 if self._match_text_seq("SQL", "SECURITY"): 1583 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1584 1585 index = self._index 1586 key = self._parse_column() 1587 1588 if not self._match(TokenType.EQ): 1589 self._retreat(index) 1590 return None 1591 1592 return self.expression( 1593 exp.Property, 1594 this=key.to_dot() if isinstance(key, exp.Column) else key, 1595 value=self._parse_column() or self._parse_var(any_token=True), 1596 ) 1597 1598 def _parse_stored(self) -> exp.FileFormatProperty: 1599 self._match(TokenType.ALIAS) 1600 1601 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1602 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1603 1604 return self.expression( 1605 exp.FileFormatProperty, 1606 this=( 1607 self.expression( 1608 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1609 ) 1610 if input_format or output_format 1611 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1612 ), 1613 ) 1614 1615 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1616 self._match(TokenType.EQ) 1617 self._match(TokenType.ALIAS) 1618 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1619 1620 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1621 properties = [] 1622 while True: 1623 if before: 1624 prop = self._parse_property_before() 1625 else: 1626 prop = self._parse_property() 1627 1628 if not prop: 1629 break 1630 for p in ensure_list(prop): 1631 properties.append(p) 1632 1633 if properties: 1634 return self.expression(exp.Properties, expressions=properties) 1635 1636 return None 1637 1638 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1639 return self.expression( 1640 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1641 ) 1642 1643 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1644 if self._index >= 2: 1645 pre_volatile_token = self._tokens[self._index - 2] 1646 else: 1647 pre_volatile_token = None 1648 1649 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1650 return exp.VolatileProperty() 1651 1652 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1653 1654 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1655 self._match_pair(TokenType.EQ, TokenType.ON) 1656 1657 prop = self.expression(exp.WithSystemVersioningProperty) 1658 if self._match(TokenType.L_PAREN): 1659 self._match_text_seq("HISTORY_TABLE", "=") 1660 prop.set("this", self._parse_table_parts()) 1661 1662 if self._match(TokenType.COMMA): 1663 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1664 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1665 1666 self._match_r_paren() 1667 1668 return prop 1669 1670 def _parse_with_property( 1671 self, 1672 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1673 if self._match(TokenType.L_PAREN, advance=False): 1674 return self._parse_wrapped_csv(self._parse_property) 1675 1676 if self._match_text_seq("JOURNAL"): 1677 return self._parse_withjournaltable() 1678 1679 if self._match_text_seq("DATA"): 1680 return self._parse_withdata(no=False) 1681 elif self._match_text_seq("NO", "DATA"): 1682 return self._parse_withdata(no=True) 1683 1684 if not self._next: 1685 return None 1686 1687 return self._parse_withisolatedloading() 1688 1689 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1690 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1691 self._match(TokenType.EQ) 1692 1693 user = self._parse_id_var() 1694 self._match(TokenType.PARAMETER) 1695 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1696 1697 if not user or not host: 1698 return None 1699 1700 return exp.DefinerProperty(this=f"{user}@{host}") 1701 1702 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1703 self._match(TokenType.TABLE) 1704 self._match(TokenType.EQ) 1705 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1706 1707 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1708 return self.expression(exp.LogProperty, no=no) 1709 1710 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1711 return self.expression(exp.JournalProperty, **kwargs) 1712 1713 def _parse_checksum(self) -> exp.ChecksumProperty: 1714 self._match(TokenType.EQ) 1715 1716 on = None 1717 if self._match(TokenType.ON): 1718 on = True 1719 elif self._match_text_seq("OFF"): 1720 on = False 1721 1722 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1723 1724 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1725 return self.expression( 1726 exp.Cluster, 1727 expressions=( 1728 self._parse_wrapped_csv(self._parse_ordered) 1729 if wrapped 1730 else self._parse_csv(self._parse_ordered) 1731 ), 1732 ) 1733 1734 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1735 self._match_text_seq("BY") 1736 1737 self._match_l_paren() 1738 expressions = self._parse_csv(self._parse_column) 1739 self._match_r_paren() 1740 1741 if self._match_text_seq("SORTED", "BY"): 1742 self._match_l_paren() 1743 sorted_by = self._parse_csv(self._parse_ordered) 1744 self._match_r_paren() 1745 else: 1746 sorted_by = None 1747 1748 self._match(TokenType.INTO) 1749 buckets = self._parse_number() 1750 self._match_text_seq("BUCKETS") 1751 1752 return self.expression( 1753 exp.ClusteredByProperty, 1754 expressions=expressions, 1755 sorted_by=sorted_by, 1756 buckets=buckets, 1757 ) 1758 1759 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1760 if not self._match_text_seq("GRANTS"): 1761 self._retreat(self._index - 1) 1762 return None 1763 1764 return self.expression(exp.CopyGrantsProperty) 1765 1766 def _parse_freespace(self) -> exp.FreespaceProperty: 1767 self._match(TokenType.EQ) 1768 return self.expression( 1769 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1770 ) 1771 1772 def _parse_mergeblockratio( 1773 self, no: bool = False, default: bool = False 1774 ) -> exp.MergeBlockRatioProperty: 1775 if self._match(TokenType.EQ): 1776 return self.expression( 1777 exp.MergeBlockRatioProperty, 1778 this=self._parse_number(), 1779 percent=self._match(TokenType.PERCENT), 1780 ) 1781 1782 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1783 1784 def _parse_datablocksize( 1785 self, 1786 default: t.Optional[bool] = None, 1787 minimum: t.Optional[bool] = None, 1788 maximum: t.Optional[bool] = None, 1789 ) -> exp.DataBlocksizeProperty: 1790 self._match(TokenType.EQ) 1791 size = self._parse_number() 1792 1793 units = None 1794 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1795 units = self._prev.text 1796 1797 return self.expression( 1798 exp.DataBlocksizeProperty, 1799 size=size, 1800 units=units, 1801 default=default, 1802 minimum=minimum, 1803 maximum=maximum, 1804 ) 1805 1806 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1807 self._match(TokenType.EQ) 1808 always = self._match_text_seq("ALWAYS") 1809 manual = self._match_text_seq("MANUAL") 1810 never = self._match_text_seq("NEVER") 1811 default = self._match_text_seq("DEFAULT") 1812 1813 autotemp = None 1814 if self._match_text_seq("AUTOTEMP"): 1815 autotemp = self._parse_schema() 1816 1817 return self.expression( 1818 exp.BlockCompressionProperty, 1819 always=always, 1820 manual=manual, 1821 never=never, 1822 default=default, 1823 autotemp=autotemp, 1824 ) 1825 1826 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1827 no = self._match_text_seq("NO") 1828 concurrent = self._match_text_seq("CONCURRENT") 1829 self._match_text_seq("ISOLATED", "LOADING") 1830 for_all = self._match_text_seq("FOR", "ALL") 1831 for_insert = self._match_text_seq("FOR", "INSERT") 1832 for_none = self._match_text_seq("FOR", "NONE") 1833 return self.expression( 1834 exp.IsolatedLoadingProperty, 1835 no=no, 1836 concurrent=concurrent, 1837 for_all=for_all, 1838 for_insert=for_insert, 1839 for_none=for_none, 1840 ) 1841 1842 def _parse_locking(self) -> exp.LockingProperty: 1843 if self._match(TokenType.TABLE): 1844 kind = "TABLE" 1845 elif self._match(TokenType.VIEW): 1846 kind = "VIEW" 1847 elif self._match(TokenType.ROW): 1848 kind = "ROW" 1849 elif self._match_text_seq("DATABASE"): 1850 kind = "DATABASE" 1851 else: 1852 kind = None 1853 1854 if kind in ("DATABASE", "TABLE", "VIEW"): 1855 this = self._parse_table_parts() 1856 else: 1857 this = None 1858 1859 if self._match(TokenType.FOR): 1860 for_or_in = "FOR" 1861 elif self._match(TokenType.IN): 1862 for_or_in = "IN" 1863 else: 1864 for_or_in = None 1865 1866 if self._match_text_seq("ACCESS"): 1867 lock_type = "ACCESS" 1868 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1869 lock_type = "EXCLUSIVE" 1870 elif self._match_text_seq("SHARE"): 1871 lock_type = "SHARE" 1872 elif self._match_text_seq("READ"): 1873 lock_type = "READ" 1874 elif self._match_text_seq("WRITE"): 1875 lock_type = "WRITE" 1876 elif self._match_text_seq("CHECKSUM"): 1877 lock_type = "CHECKSUM" 1878 else: 1879 lock_type = None 1880 1881 override = self._match_text_seq("OVERRIDE") 1882 1883 return self.expression( 1884 exp.LockingProperty, 1885 this=this, 1886 kind=kind, 1887 for_or_in=for_or_in, 1888 lock_type=lock_type, 1889 override=override, 1890 ) 1891 1892 def _parse_partition_by(self) -> t.List[exp.Expression]: 1893 if self._match(TokenType.PARTITION_BY): 1894 return self._parse_csv(self._parse_conjunction) 1895 return [] 1896 1897 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1898 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1899 if self._match_text_seq("MINVALUE"): 1900 return exp.var("MINVALUE") 1901 if self._match_text_seq("MAXVALUE"): 1902 return exp.var("MAXVALUE") 1903 return self._parse_bitwise() 1904 1905 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1906 expression = None 1907 from_expressions = None 1908 to_expressions = None 1909 1910 if self._match(TokenType.IN): 1911 this = self._parse_wrapped_csv(self._parse_bitwise) 1912 elif self._match(TokenType.FROM): 1913 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1914 self._match_text_seq("TO") 1915 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1916 elif self._match_text_seq("WITH", "(", "MODULUS"): 1917 this = self._parse_number() 1918 self._match_text_seq(",", "REMAINDER") 1919 expression = self._parse_number() 1920 self._match_r_paren() 1921 else: 1922 self.raise_error("Failed to parse partition bound spec.") 1923 1924 return self.expression( 1925 exp.PartitionBoundSpec, 1926 this=this, 1927 expression=expression, 1928 from_expressions=from_expressions, 1929 to_expressions=to_expressions, 1930 ) 1931 1932 # https://www.postgresql.org/docs/current/sql-createtable.html 1933 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1934 if not self._match_text_seq("OF"): 1935 self._retreat(self._index - 1) 1936 return None 1937 1938 this = self._parse_table(schema=True) 1939 1940 if self._match(TokenType.DEFAULT): 1941 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1942 elif self._match_text_seq("FOR", "VALUES"): 1943 expression = self._parse_partition_bound_spec() 1944 else: 1945 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1946 1947 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1948 1949 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1950 self._match(TokenType.EQ) 1951 return self.expression( 1952 exp.PartitionedByProperty, 1953 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1954 ) 1955 1956 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1957 if self._match_text_seq("AND", "STATISTICS"): 1958 statistics = True 1959 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1960 statistics = False 1961 else: 1962 statistics = None 1963 1964 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1965 1966 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1967 if self._match_text_seq("SQL"): 1968 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1969 return None 1970 1971 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1972 if self._match_text_seq("SQL", "DATA"): 1973 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1974 return None 1975 1976 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1977 if self._match_text_seq("PRIMARY", "INDEX"): 1978 return exp.NoPrimaryIndexProperty() 1979 if self._match_text_seq("SQL"): 1980 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1981 return None 1982 1983 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1984 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1985 return exp.OnCommitProperty() 1986 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1987 return exp.OnCommitProperty(delete=True) 1988 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1989 1990 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1991 if self._match_text_seq("SQL", "DATA"): 1992 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1993 return None 1994 1995 def _parse_distkey(self) -> exp.DistKeyProperty: 1996 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1997 1998 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1999 table = self._parse_table(schema=True) 2000 2001 options = [] 2002 while self._match_texts(("INCLUDING", "EXCLUDING")): 2003 this = self._prev.text.upper() 2004 2005 id_var = self._parse_id_var() 2006 if not id_var: 2007 return None 2008 2009 options.append( 2010 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2011 ) 2012 2013 return self.expression(exp.LikeProperty, this=table, expressions=options) 2014 2015 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2016 return self.expression( 2017 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2018 ) 2019 2020 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2021 self._match(TokenType.EQ) 2022 return self.expression( 2023 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2024 ) 2025 2026 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2027 self._match_text_seq("WITH", "CONNECTION") 2028 return self.expression( 2029 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2030 ) 2031 2032 def _parse_returns(self) -> exp.ReturnsProperty: 2033 value: t.Optional[exp.Expression] 2034 is_table = self._match(TokenType.TABLE) 2035 2036 if is_table: 2037 if self._match(TokenType.LT): 2038 value = self.expression( 2039 exp.Schema, 2040 this="TABLE", 2041 expressions=self._parse_csv(self._parse_struct_types), 2042 ) 2043 if not self._match(TokenType.GT): 2044 self.raise_error("Expecting >") 2045 else: 2046 value = self._parse_schema(exp.var("TABLE")) 2047 else: 2048 value = self._parse_types() 2049 2050 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2051 2052 def _parse_describe(self) -> exp.Describe: 2053 kind = self._match_set(self.CREATABLES) and self._prev.text 2054 extended = self._match_text_seq("EXTENDED") 2055 this = self._parse_table(schema=True) 2056 properties = self._parse_properties() 2057 expressions = properties.expressions if properties else None 2058 return self.expression( 2059 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2060 ) 2061 2062 def _parse_insert(self) -> exp.Insert: 2063 comments = ensure_list(self._prev_comments) 2064 hint = self._parse_hint() 2065 overwrite = self._match(TokenType.OVERWRITE) 2066 ignore = self._match(TokenType.IGNORE) 2067 local = self._match_text_seq("LOCAL") 2068 alternative = None 2069 2070 if self._match_text_seq("DIRECTORY"): 2071 this: t.Optional[exp.Expression] = self.expression( 2072 exp.Directory, 2073 this=self._parse_var_or_string(), 2074 local=local, 2075 row_format=self._parse_row_format(match_row=True), 2076 ) 2077 else: 2078 if self._match(TokenType.OR): 2079 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2080 2081 self._match(TokenType.INTO) 2082 comments += ensure_list(self._prev_comments) 2083 self._match(TokenType.TABLE) 2084 this = self._parse_table(schema=True) 2085 2086 returning = self._parse_returning() 2087 2088 return self.expression( 2089 exp.Insert, 2090 comments=comments, 2091 hint=hint, 2092 this=this, 2093 by_name=self._match_text_seq("BY", "NAME"), 2094 exists=self._parse_exists(), 2095 partition=self._parse_partition(), 2096 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2097 and self._parse_conjunction(), 2098 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2099 conflict=self._parse_on_conflict(), 2100 returning=returning or self._parse_returning(), 2101 overwrite=overwrite, 2102 alternative=alternative, 2103 ignore=ignore, 2104 ) 2105 2106 def _parse_kill(self) -> exp.Kill: 2107 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2108 2109 return self.expression( 2110 exp.Kill, 2111 this=self._parse_primary(), 2112 kind=kind, 2113 ) 2114 2115 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2116 conflict = self._match_text_seq("ON", "CONFLICT") 2117 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2118 2119 if not conflict and not duplicate: 2120 return None 2121 2122 conflict_keys = None 2123 constraint = None 2124 2125 if conflict: 2126 if self._match_text_seq("ON", "CONSTRAINT"): 2127 constraint = self._parse_id_var() 2128 elif self._match(TokenType.L_PAREN): 2129 conflict_keys = self._parse_csv(self._parse_id_var) 2130 self._match_r_paren() 2131 2132 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2133 if self._prev.token_type == TokenType.UPDATE: 2134 self._match(TokenType.SET) 2135 expressions = self._parse_csv(self._parse_equality) 2136 else: 2137 expressions = None 2138 2139 return self.expression( 2140 exp.OnConflict, 2141 duplicate=duplicate, 2142 expressions=expressions, 2143 action=action, 2144 conflict_keys=conflict_keys, 2145 constraint=constraint, 2146 ) 2147 2148 def _parse_returning(self) -> t.Optional[exp.Returning]: 2149 if not self._match(TokenType.RETURNING): 2150 return None 2151 return self.expression( 2152 exp.Returning, 2153 expressions=self._parse_csv(self._parse_expression), 2154 into=self._match(TokenType.INTO) and self._parse_table_part(), 2155 ) 2156 2157 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2158 if not self._match(TokenType.FORMAT): 2159 return None 2160 return self._parse_row_format() 2161 2162 def _parse_row_format( 2163 self, match_row: bool = False 2164 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2165 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2166 return None 2167 2168 if self._match_text_seq("SERDE"): 2169 this = self._parse_string() 2170 2171 serde_properties = None 2172 if self._match(TokenType.SERDE_PROPERTIES): 2173 serde_properties = self.expression( 2174 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2175 ) 2176 2177 return self.expression( 2178 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2179 ) 2180 2181 self._match_text_seq("DELIMITED") 2182 2183 kwargs = {} 2184 2185 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2186 kwargs["fields"] = self._parse_string() 2187 if self._match_text_seq("ESCAPED", "BY"): 2188 kwargs["escaped"] = self._parse_string() 2189 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2190 kwargs["collection_items"] = self._parse_string() 2191 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2192 kwargs["map_keys"] = self._parse_string() 2193 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2194 kwargs["lines"] = self._parse_string() 2195 if self._match_text_seq("NULL", "DEFINED", "AS"): 2196 kwargs["null"] = self._parse_string() 2197 2198 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2199 2200 def _parse_load(self) -> exp.LoadData | exp.Command: 2201 if self._match_text_seq("DATA"): 2202 local = self._match_text_seq("LOCAL") 2203 self._match_text_seq("INPATH") 2204 inpath = self._parse_string() 2205 overwrite = self._match(TokenType.OVERWRITE) 2206 self._match_pair(TokenType.INTO, TokenType.TABLE) 2207 2208 return self.expression( 2209 exp.LoadData, 2210 this=self._parse_table(schema=True), 2211 local=local, 2212 overwrite=overwrite, 2213 inpath=inpath, 2214 partition=self._parse_partition(), 2215 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2216 serde=self._match_text_seq("SERDE") and self._parse_string(), 2217 ) 2218 return self._parse_as_command(self._prev) 2219 2220 def _parse_delete(self) -> exp.Delete: 2221 # This handles MySQL's "Multiple-Table Syntax" 2222 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2223 tables = None 2224 comments = self._prev_comments 2225 if not self._match(TokenType.FROM, advance=False): 2226 tables = self._parse_csv(self._parse_table) or None 2227 2228 returning = self._parse_returning() 2229 2230 return self.expression( 2231 exp.Delete, 2232 comments=comments, 2233 tables=tables, 2234 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2235 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2236 where=self._parse_where(), 2237 returning=returning or self._parse_returning(), 2238 limit=self._parse_limit(), 2239 ) 2240 2241 def _parse_update(self) -> exp.Update: 2242 comments = self._prev_comments 2243 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2244 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2245 returning = self._parse_returning() 2246 return self.expression( 2247 exp.Update, 2248 comments=comments, 2249 **{ # type: ignore 2250 "this": this, 2251 "expressions": expressions, 2252 "from": self._parse_from(joins=True), 2253 "where": self._parse_where(), 2254 "returning": returning or self._parse_returning(), 2255 "order": self._parse_order(), 2256 "limit": self._parse_limit(), 2257 }, 2258 ) 2259 2260 def _parse_uncache(self) -> exp.Uncache: 2261 if not self._match(TokenType.TABLE): 2262 self.raise_error("Expecting TABLE after UNCACHE") 2263 2264 return self.expression( 2265 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2266 ) 2267 2268 def _parse_cache(self) -> exp.Cache: 2269 lazy = self._match_text_seq("LAZY") 2270 self._match(TokenType.TABLE) 2271 table = self._parse_table(schema=True) 2272 2273 options = [] 2274 if self._match_text_seq("OPTIONS"): 2275 self._match_l_paren() 2276 k = self._parse_string() 2277 self._match(TokenType.EQ) 2278 v = self._parse_string() 2279 options = [k, v] 2280 self._match_r_paren() 2281 2282 self._match(TokenType.ALIAS) 2283 return self.expression( 2284 exp.Cache, 2285 this=table, 2286 lazy=lazy, 2287 options=options, 2288 expression=self._parse_select(nested=True), 2289 ) 2290 2291 def _parse_partition(self) -> t.Optional[exp.Partition]: 2292 if not self._match(TokenType.PARTITION): 2293 return None 2294 2295 return self.expression( 2296 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2297 ) 2298 2299 def _parse_value(self) -> exp.Tuple: 2300 if self._match(TokenType.L_PAREN): 2301 expressions = self._parse_csv(self._parse_expression) 2302 self._match_r_paren() 2303 return self.expression(exp.Tuple, expressions=expressions) 2304 2305 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2306 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2307 2308 def _parse_projections(self) -> t.List[exp.Expression]: 2309 return self._parse_expressions() 2310 2311 def _parse_select( 2312 self, 2313 nested: bool = False, 2314 table: bool = False, 2315 parse_subquery_alias: bool = True, 2316 parse_set_operation: bool = True, 2317 ) -> t.Optional[exp.Expression]: 2318 cte = self._parse_with() 2319 2320 if cte: 2321 this = self._parse_statement() 2322 2323 if not this: 2324 self.raise_error("Failed to parse any statement following CTE") 2325 return cte 2326 2327 if "with" in this.arg_types: 2328 this.set("with", cte) 2329 else: 2330 self.raise_error(f"{this.key} does not support CTE") 2331 this = cte 2332 2333 return this 2334 2335 # duckdb supports leading with FROM x 2336 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2337 2338 if self._match(TokenType.SELECT): 2339 comments = self._prev_comments 2340 2341 hint = self._parse_hint() 2342 all_ = self._match(TokenType.ALL) 2343 distinct = self._match_set(self.DISTINCT_TOKENS) 2344 2345 kind = ( 2346 self._match(TokenType.ALIAS) 2347 and self._match_texts(("STRUCT", "VALUE")) 2348 and self._prev.text.upper() 2349 ) 2350 2351 if distinct: 2352 distinct = self.expression( 2353 exp.Distinct, 2354 on=self._parse_value() if self._match(TokenType.ON) else None, 2355 ) 2356 2357 if all_ and distinct: 2358 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2359 2360 limit = self._parse_limit(top=True) 2361 projections = self._parse_projections() 2362 2363 this = self.expression( 2364 exp.Select, 2365 kind=kind, 2366 hint=hint, 2367 distinct=distinct, 2368 expressions=projections, 2369 limit=limit, 2370 ) 2371 this.comments = comments 2372 2373 into = self._parse_into() 2374 if into: 2375 this.set("into", into) 2376 2377 if not from_: 2378 from_ = self._parse_from() 2379 2380 if from_: 2381 this.set("from", from_) 2382 2383 this = self._parse_query_modifiers(this) 2384 elif (table or nested) and self._match(TokenType.L_PAREN): 2385 if self._match(TokenType.PIVOT): 2386 this = self._parse_simplified_pivot() 2387 elif self._match(TokenType.FROM): 2388 this = exp.select("*").from_( 2389 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2390 ) 2391 else: 2392 this = ( 2393 self._parse_table() 2394 if table 2395 else self._parse_select(nested=True, parse_set_operation=False) 2396 ) 2397 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2398 2399 self._match_r_paren() 2400 2401 # We return early here so that the UNION isn't attached to the subquery by the 2402 # following call to _parse_set_operations, but instead becomes the parent node 2403 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2404 elif self._match(TokenType.VALUES, advance=False): 2405 this = self._parse_derived_table_values() 2406 elif from_: 2407 this = exp.select("*").from_(from_.this, copy=False) 2408 else: 2409 this = None 2410 2411 if parse_set_operation: 2412 return self._parse_set_operations(this) 2413 return this 2414 2415 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2416 if not skip_with_token and not self._match(TokenType.WITH): 2417 return None 2418 2419 comments = self._prev_comments 2420 recursive = self._match(TokenType.RECURSIVE) 2421 2422 expressions = [] 2423 while True: 2424 expressions.append(self._parse_cte()) 2425 2426 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2427 break 2428 else: 2429 self._match(TokenType.WITH) 2430 2431 return self.expression( 2432 exp.With, comments=comments, expressions=expressions, recursive=recursive 2433 ) 2434 2435 def _parse_cte(self) -> exp.CTE: 2436 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2437 if not alias or not alias.this: 2438 self.raise_error("Expected CTE to have alias") 2439 2440 self._match(TokenType.ALIAS) 2441 return self.expression( 2442 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2443 ) 2444 2445 def _parse_table_alias( 2446 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2447 ) -> t.Optional[exp.TableAlias]: 2448 any_token = self._match(TokenType.ALIAS) 2449 alias = ( 2450 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2451 or self._parse_string_as_identifier() 2452 ) 2453 2454 index = self._index 2455 if self._match(TokenType.L_PAREN): 2456 columns = self._parse_csv(self._parse_function_parameter) 2457 self._match_r_paren() if columns else self._retreat(index) 2458 else: 2459 columns = None 2460 2461 if not alias and not columns: 2462 return None 2463 2464 return self.expression(exp.TableAlias, this=alias, columns=columns) 2465 2466 def _parse_subquery( 2467 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2468 ) -> t.Optional[exp.Subquery]: 2469 if not this: 2470 return None 2471 2472 return self.expression( 2473 exp.Subquery, 2474 this=this, 2475 pivots=self._parse_pivots(), 2476 alias=self._parse_table_alias() if parse_alias else None, 2477 ) 2478 2479 def _implicit_unnests_to_explicit(self, this: E) -> E: 2480 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2481 2482 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2483 for i, join in enumerate(this.args.get("joins") or []): 2484 table = join.this 2485 normalized_table = table.copy() 2486 normalized_table.meta["maybe_column"] = True 2487 normalized_table = _norm(normalized_table, dialect=self.dialect) 2488 2489 if isinstance(table, exp.Table) and not join.args.get("on"): 2490 if normalized_table.parts[0].name in refs: 2491 table_as_column = table.to_column() 2492 unnest = exp.Unnest(expressions=[table_as_column]) 2493 2494 # Table.to_column creates a parent Alias node that we want to convert to 2495 # a TableAlias and attach to the Unnest, so it matches the parser's output 2496 if isinstance(table.args.get("alias"), exp.TableAlias): 2497 table_as_column.replace(table_as_column.this) 2498 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2499 2500 table.replace(unnest) 2501 2502 refs.add(normalized_table.alias_or_name) 2503 2504 return this 2505 2506 def _parse_query_modifiers( 2507 self, this: t.Optional[exp.Expression] 2508 ) -> t.Optional[exp.Expression]: 2509 if isinstance(this, (exp.Query, exp.Table)): 2510 for join in iter(self._parse_join, None): 2511 this.append("joins", join) 2512 for lateral in iter(self._parse_lateral, None): 2513 this.append("laterals", lateral) 2514 2515 while True: 2516 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2517 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2518 key, expression = parser(self) 2519 2520 if expression: 2521 this.set(key, expression) 2522 if key == "limit": 2523 offset = expression.args.pop("offset", None) 2524 2525 if offset: 2526 offset = exp.Offset(expression=offset) 2527 this.set("offset", offset) 2528 2529 limit_by_expressions = expression.expressions 2530 expression.set("expressions", None) 2531 offset.set("expressions", limit_by_expressions) 2532 continue 2533 break 2534 2535 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2536 this = self._implicit_unnests_to_explicit(this) 2537 2538 return this 2539 2540 def _parse_hint(self) -> t.Optional[exp.Hint]: 2541 if self._match(TokenType.HINT): 2542 hints = [] 2543 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2544 hints.extend(hint) 2545 2546 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2547 self.raise_error("Expected */ after HINT") 2548 2549 return self.expression(exp.Hint, expressions=hints) 2550 2551 return None 2552 2553 def _parse_into(self) -> t.Optional[exp.Into]: 2554 if not self._match(TokenType.INTO): 2555 return None 2556 2557 temp = self._match(TokenType.TEMPORARY) 2558 unlogged = self._match_text_seq("UNLOGGED") 2559 self._match(TokenType.TABLE) 2560 2561 return self.expression( 2562 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2563 ) 2564 2565 def _parse_from( 2566 self, joins: bool = False, skip_from_token: bool = False 2567 ) -> t.Optional[exp.From]: 2568 if not skip_from_token and not self._match(TokenType.FROM): 2569 return None 2570 2571 return self.expression( 2572 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2573 ) 2574 2575 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2576 if not self._match(TokenType.MATCH_RECOGNIZE): 2577 return None 2578 2579 self._match_l_paren() 2580 2581 partition = self._parse_partition_by() 2582 order = self._parse_order() 2583 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2584 2585 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2586 rows = exp.var("ONE ROW PER MATCH") 2587 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2588 text = "ALL ROWS PER MATCH" 2589 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2590 text += " SHOW EMPTY MATCHES" 2591 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2592 text += " OMIT EMPTY MATCHES" 2593 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2594 text += " WITH UNMATCHED ROWS" 2595 rows = exp.var(text) 2596 else: 2597 rows = None 2598 2599 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2600 text = "AFTER MATCH SKIP" 2601 if self._match_text_seq("PAST", "LAST", "ROW"): 2602 text += " PAST LAST ROW" 2603 elif self._match_text_seq("TO", "NEXT", "ROW"): 2604 text += " TO NEXT ROW" 2605 elif self._match_text_seq("TO", "FIRST"): 2606 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2607 elif self._match_text_seq("TO", "LAST"): 2608 text += f" TO LAST {self._advance_any().text}" # type: ignore 2609 after = exp.var(text) 2610 else: 2611 after = None 2612 2613 if self._match_text_seq("PATTERN"): 2614 self._match_l_paren() 2615 2616 if not self._curr: 2617 self.raise_error("Expecting )", self._curr) 2618 2619 paren = 1 2620 start = self._curr 2621 2622 while self._curr and paren > 0: 2623 if self._curr.token_type == TokenType.L_PAREN: 2624 paren += 1 2625 if self._curr.token_type == TokenType.R_PAREN: 2626 paren -= 1 2627 2628 end = self._prev 2629 self._advance() 2630 2631 if paren > 0: 2632 self.raise_error("Expecting )", self._curr) 2633 2634 pattern = exp.var(self._find_sql(start, end)) 2635 else: 2636 pattern = None 2637 2638 define = ( 2639 self._parse_csv(self._parse_name_as_expression) 2640 if self._match_text_seq("DEFINE") 2641 else None 2642 ) 2643 2644 self._match_r_paren() 2645 2646 return self.expression( 2647 exp.MatchRecognize, 2648 partition_by=partition, 2649 order=order, 2650 measures=measures, 2651 rows=rows, 2652 after=after, 2653 pattern=pattern, 2654 define=define, 2655 alias=self._parse_table_alias(), 2656 ) 2657 2658 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2659 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2660 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2661 cross_apply = False 2662 2663 if cross_apply is not None: 2664 this = self._parse_select(table=True) 2665 view = None 2666 outer = None 2667 elif self._match(TokenType.LATERAL): 2668 this = self._parse_select(table=True) 2669 view = self._match(TokenType.VIEW) 2670 outer = self._match(TokenType.OUTER) 2671 else: 2672 return None 2673 2674 if not this: 2675 this = ( 2676 self._parse_unnest() 2677 or self._parse_function() 2678 or self._parse_id_var(any_token=False) 2679 ) 2680 2681 while self._match(TokenType.DOT): 2682 this = exp.Dot( 2683 this=this, 2684 expression=self._parse_function() or self._parse_id_var(any_token=False), 2685 ) 2686 2687 if view: 2688 table = self._parse_id_var(any_token=False) 2689 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2690 table_alias: t.Optional[exp.TableAlias] = self.expression( 2691 exp.TableAlias, this=table, columns=columns 2692 ) 2693 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2694 # We move the alias from the lateral's child node to the lateral itself 2695 table_alias = this.args["alias"].pop() 2696 else: 2697 table_alias = self._parse_table_alias() 2698 2699 return self.expression( 2700 exp.Lateral, 2701 this=this, 2702 view=view, 2703 outer=outer, 2704 alias=table_alias, 2705 cross_apply=cross_apply, 2706 ) 2707 2708 def _parse_join_parts( 2709 self, 2710 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2711 return ( 2712 self._match_set(self.JOIN_METHODS) and self._prev, 2713 self._match_set(self.JOIN_SIDES) and self._prev, 2714 self._match_set(self.JOIN_KINDS) and self._prev, 2715 ) 2716 2717 def _parse_join( 2718 self, skip_join_token: bool = False, parse_bracket: bool = False 2719 ) -> t.Optional[exp.Join]: 2720 if self._match(TokenType.COMMA): 2721 return self.expression(exp.Join, this=self._parse_table()) 2722 2723 index = self._index 2724 method, side, kind = self._parse_join_parts() 2725 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2726 join = self._match(TokenType.JOIN) 2727 2728 if not skip_join_token and not join: 2729 self._retreat(index) 2730 kind = None 2731 method = None 2732 side = None 2733 2734 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2735 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2736 2737 if not skip_join_token and not join and not outer_apply and not cross_apply: 2738 return None 2739 2740 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2741 2742 if method: 2743 kwargs["method"] = method.text 2744 if side: 2745 kwargs["side"] = side.text 2746 if kind: 2747 kwargs["kind"] = kind.text 2748 if hint: 2749 kwargs["hint"] = hint 2750 2751 if self._match(TokenType.ON): 2752 kwargs["on"] = self._parse_conjunction() 2753 elif self._match(TokenType.USING): 2754 kwargs["using"] = self._parse_wrapped_id_vars() 2755 elif not (kind and kind.token_type == TokenType.CROSS): 2756 index = self._index 2757 join = self._parse_join() 2758 2759 if join and self._match(TokenType.ON): 2760 kwargs["on"] = self._parse_conjunction() 2761 elif join and self._match(TokenType.USING): 2762 kwargs["using"] = self._parse_wrapped_id_vars() 2763 else: 2764 join = None 2765 self._retreat(index) 2766 2767 kwargs["this"].set("joins", [join] if join else None) 2768 2769 comments = [c for token in (method, side, kind) if token for c in token.comments] 2770 return self.expression(exp.Join, comments=comments, **kwargs) 2771 2772 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2773 this = self._parse_conjunction() 2774 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2775 return this 2776 2777 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2778 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2779 2780 return this 2781 2782 def _parse_index( 2783 self, 2784 index: t.Optional[exp.Expression] = None, 2785 ) -> t.Optional[exp.Index]: 2786 if index: 2787 unique = None 2788 primary = None 2789 amp = None 2790 2791 self._match(TokenType.ON) 2792 self._match(TokenType.TABLE) # hive 2793 table = self._parse_table_parts(schema=True) 2794 else: 2795 unique = self._match(TokenType.UNIQUE) 2796 primary = self._match_text_seq("PRIMARY") 2797 amp = self._match_text_seq("AMP") 2798 2799 if not self._match(TokenType.INDEX): 2800 return None 2801 2802 index = self._parse_id_var() 2803 table = None 2804 2805 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2806 2807 if self._match(TokenType.L_PAREN, advance=False): 2808 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2809 else: 2810 columns = None 2811 2812 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2813 2814 return self.expression( 2815 exp.Index, 2816 this=index, 2817 table=table, 2818 using=using, 2819 columns=columns, 2820 unique=unique, 2821 primary=primary, 2822 amp=amp, 2823 include=include, 2824 partition_by=self._parse_partition_by(), 2825 where=self._parse_where(), 2826 ) 2827 2828 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2829 hints: t.List[exp.Expression] = [] 2830 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2831 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2832 hints.append( 2833 self.expression( 2834 exp.WithTableHint, 2835 expressions=self._parse_csv( 2836 lambda: self._parse_function() or self._parse_var(any_token=True) 2837 ), 2838 ) 2839 ) 2840 self._match_r_paren() 2841 else: 2842 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2843 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2844 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2845 2846 self._match_texts(("INDEX", "KEY")) 2847 if self._match(TokenType.FOR): 2848 hint.set("target", self._advance_any() and self._prev.text.upper()) 2849 2850 hint.set("expressions", self._parse_wrapped_id_vars()) 2851 hints.append(hint) 2852 2853 return hints or None 2854 2855 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2856 return ( 2857 (not schema and self._parse_function(optional_parens=False)) 2858 or self._parse_id_var(any_token=False) 2859 or self._parse_string_as_identifier() 2860 or self._parse_placeholder() 2861 ) 2862 2863 def _parse_table_parts( 2864 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2865 ) -> exp.Table: 2866 catalog = None 2867 db = None 2868 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2869 2870 while self._match(TokenType.DOT): 2871 if catalog: 2872 # This allows nesting the table in arbitrarily many dot expressions if needed 2873 table = self.expression( 2874 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2875 ) 2876 else: 2877 catalog = db 2878 db = table 2879 # "" used for tsql FROM a..b case 2880 table = self._parse_table_part(schema=schema) or "" 2881 2882 if ( 2883 wildcard 2884 and self._is_connected() 2885 and (isinstance(table, exp.Identifier) or not table) 2886 and self._match(TokenType.STAR) 2887 ): 2888 if isinstance(table, exp.Identifier): 2889 table.args["this"] += "*" 2890 else: 2891 table = exp.Identifier(this="*") 2892 2893 if is_db_reference: 2894 catalog = db 2895 db = table 2896 table = None 2897 2898 if not table and not is_db_reference: 2899 self.raise_error(f"Expected table name but got {self._curr}") 2900 if not db and is_db_reference: 2901 self.raise_error(f"Expected database name but got {self._curr}") 2902 2903 return self.expression( 2904 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2905 ) 2906 2907 def _parse_table( 2908 self, 2909 schema: bool = False, 2910 joins: bool = False, 2911 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2912 parse_bracket: bool = False, 2913 is_db_reference: bool = False, 2914 ) -> t.Optional[exp.Expression]: 2915 lateral = self._parse_lateral() 2916 if lateral: 2917 return lateral 2918 2919 unnest = self._parse_unnest() 2920 if unnest: 2921 return unnest 2922 2923 values = self._parse_derived_table_values() 2924 if values: 2925 return values 2926 2927 subquery = self._parse_select(table=True) 2928 if subquery: 2929 if not subquery.args.get("pivots"): 2930 subquery.set("pivots", self._parse_pivots()) 2931 return subquery 2932 2933 bracket = parse_bracket and self._parse_bracket(None) 2934 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2935 2936 only = self._match(TokenType.ONLY) 2937 2938 this = t.cast( 2939 exp.Expression, 2940 bracket 2941 or self._parse_bracket( 2942 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2943 ), 2944 ) 2945 2946 if only: 2947 this.set("only", only) 2948 2949 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 2950 self._match_text_seq("*") 2951 2952 if schema: 2953 return self._parse_schema(this=this) 2954 2955 version = self._parse_version() 2956 2957 if version: 2958 this.set("version", version) 2959 2960 if self.dialect.ALIAS_POST_TABLESAMPLE: 2961 table_sample = self._parse_table_sample() 2962 2963 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2964 if alias: 2965 this.set("alias", alias) 2966 2967 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2968 return self.expression( 2969 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2970 ) 2971 2972 this.set("hints", self._parse_table_hints()) 2973 2974 if not this.args.get("pivots"): 2975 this.set("pivots", self._parse_pivots()) 2976 2977 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2978 table_sample = self._parse_table_sample() 2979 2980 if table_sample: 2981 table_sample.set("this", this) 2982 this = table_sample 2983 2984 if joins: 2985 for join in iter(self._parse_join, None): 2986 this.append("joins", join) 2987 2988 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2989 this.set("ordinality", True) 2990 this.set("alias", self._parse_table_alias()) 2991 2992 return this 2993 2994 def _parse_version(self) -> t.Optional[exp.Version]: 2995 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2996 this = "TIMESTAMP" 2997 elif self._match(TokenType.VERSION_SNAPSHOT): 2998 this = "VERSION" 2999 else: 3000 return None 3001 3002 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3003 kind = self._prev.text.upper() 3004 start = self._parse_bitwise() 3005 self._match_texts(("TO", "AND")) 3006 end = self._parse_bitwise() 3007 expression: t.Optional[exp.Expression] = self.expression( 3008 exp.Tuple, expressions=[start, end] 3009 ) 3010 elif self._match_text_seq("CONTAINED", "IN"): 3011 kind = "CONTAINED IN" 3012 expression = self.expression( 3013 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3014 ) 3015 elif self._match(TokenType.ALL): 3016 kind = "ALL" 3017 expression = None 3018 else: 3019 self._match_text_seq("AS", "OF") 3020 kind = "AS OF" 3021 expression = self._parse_type() 3022 3023 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3024 3025 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3026 if not self._match(TokenType.UNNEST): 3027 return None 3028 3029 expressions = self._parse_wrapped_csv(self._parse_equality) 3030 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3031 3032 alias = self._parse_table_alias() if with_alias else None 3033 3034 if alias: 3035 if self.dialect.UNNEST_COLUMN_ONLY: 3036 if alias.args.get("columns"): 3037 self.raise_error("Unexpected extra column alias in unnest.") 3038 3039 alias.set("columns", [alias.this]) 3040 alias.set("this", None) 3041 3042 columns = alias.args.get("columns") or [] 3043 if offset and len(expressions) < len(columns): 3044 offset = columns.pop() 3045 3046 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3047 self._match(TokenType.ALIAS) 3048 offset = self._parse_id_var( 3049 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3050 ) or exp.to_identifier("offset") 3051 3052 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3053 3054 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3055 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3056 if not is_derived and not self._match_text_seq("VALUES"): 3057 return None 3058 3059 expressions = self._parse_csv(self._parse_value) 3060 alias = self._parse_table_alias() 3061 3062 if is_derived: 3063 self._match_r_paren() 3064 3065 return self.expression( 3066 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3067 ) 3068 3069 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3070 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3071 as_modifier and self._match_text_seq("USING", "SAMPLE") 3072 ): 3073 return None 3074 3075 bucket_numerator = None 3076 bucket_denominator = None 3077 bucket_field = None 3078 percent = None 3079 size = None 3080 seed = None 3081 3082 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3083 matched_l_paren = self._match(TokenType.L_PAREN) 3084 3085 if self.TABLESAMPLE_CSV: 3086 num = None 3087 expressions = self._parse_csv(self._parse_primary) 3088 else: 3089 expressions = None 3090 num = ( 3091 self._parse_factor() 3092 if self._match(TokenType.NUMBER, advance=False) 3093 else self._parse_primary() or self._parse_placeholder() 3094 ) 3095 3096 if self._match_text_seq("BUCKET"): 3097 bucket_numerator = self._parse_number() 3098 self._match_text_seq("OUT", "OF") 3099 bucket_denominator = bucket_denominator = self._parse_number() 3100 self._match(TokenType.ON) 3101 bucket_field = self._parse_field() 3102 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3103 percent = num 3104 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3105 size = num 3106 else: 3107 percent = num 3108 3109 if matched_l_paren: 3110 self._match_r_paren() 3111 3112 if self._match(TokenType.L_PAREN): 3113 method = self._parse_var(upper=True) 3114 seed = self._match(TokenType.COMMA) and self._parse_number() 3115 self._match_r_paren() 3116 elif self._match_texts(("SEED", "REPEATABLE")): 3117 seed = self._parse_wrapped(self._parse_number) 3118 3119 return self.expression( 3120 exp.TableSample, 3121 expressions=expressions, 3122 method=method, 3123 bucket_numerator=bucket_numerator, 3124 bucket_denominator=bucket_denominator, 3125 bucket_field=bucket_field, 3126 percent=percent, 3127 size=size, 3128 seed=seed, 3129 ) 3130 3131 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3132 return list(iter(self._parse_pivot, None)) or None 3133 3134 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3135 return list(iter(self._parse_join, None)) or None 3136 3137 # https://duckdb.org/docs/sql/statements/pivot 3138 def _parse_simplified_pivot(self) -> exp.Pivot: 3139 def _parse_on() -> t.Optional[exp.Expression]: 3140 this = self._parse_bitwise() 3141 return self._parse_in(this) if self._match(TokenType.IN) else this 3142 3143 this = self._parse_table() 3144 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3145 using = self._match(TokenType.USING) and self._parse_csv( 3146 lambda: self._parse_alias(self._parse_function()) 3147 ) 3148 group = self._parse_group() 3149 return self.expression( 3150 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3151 ) 3152 3153 def _parse_pivot_in(self) -> exp.In: 3154 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3155 this = self._parse_conjunction() 3156 3157 self._match(TokenType.ALIAS) 3158 alias = self._parse_field() 3159 if alias: 3160 return self.expression(exp.PivotAlias, this=this, alias=alias) 3161 3162 return this 3163 3164 value = self._parse_column() 3165 3166 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3167 self.raise_error("Expecting IN (") 3168 3169 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3170 3171 self._match_r_paren() 3172 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3173 3174 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3175 index = self._index 3176 include_nulls = None 3177 3178 if self._match(TokenType.PIVOT): 3179 unpivot = False 3180 elif self._match(TokenType.UNPIVOT): 3181 unpivot = True 3182 3183 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3184 if self._match_text_seq("INCLUDE", "NULLS"): 3185 include_nulls = True 3186 elif self._match_text_seq("EXCLUDE", "NULLS"): 3187 include_nulls = False 3188 else: 3189 return None 3190 3191 expressions = [] 3192 3193 if not self._match(TokenType.L_PAREN): 3194 self._retreat(index) 3195 return None 3196 3197 if unpivot: 3198 expressions = self._parse_csv(self._parse_column) 3199 else: 3200 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3201 3202 if not expressions: 3203 self.raise_error("Failed to parse PIVOT's aggregation list") 3204 3205 if not self._match(TokenType.FOR): 3206 self.raise_error("Expecting FOR") 3207 3208 field = self._parse_pivot_in() 3209 3210 self._match_r_paren() 3211 3212 pivot = self.expression( 3213 exp.Pivot, 3214 expressions=expressions, 3215 field=field, 3216 unpivot=unpivot, 3217 include_nulls=include_nulls, 3218 ) 3219 3220 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3221 pivot.set("alias", self._parse_table_alias()) 3222 3223 if not unpivot: 3224 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3225 3226 columns: t.List[exp.Expression] = [] 3227 for fld in pivot.args["field"].expressions: 3228 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3229 for name in names: 3230 if self.PREFIXED_PIVOT_COLUMNS: 3231 name = f"{name}_{field_name}" if name else field_name 3232 else: 3233 name = f"{field_name}_{name}" if name else field_name 3234 3235 columns.append(exp.to_identifier(name)) 3236 3237 pivot.set("columns", columns) 3238 3239 return pivot 3240 3241 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3242 return [agg.alias for agg in aggregations] 3243 3244 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3245 if not skip_where_token and not self._match(TokenType.PREWHERE): 3246 return None 3247 3248 return self.expression( 3249 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3250 ) 3251 3252 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3253 if not skip_where_token and not self._match(TokenType.WHERE): 3254 return None 3255 3256 return self.expression( 3257 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3258 ) 3259 3260 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3261 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3262 return None 3263 3264 elements = defaultdict(list) 3265 3266 if self._match(TokenType.ALL): 3267 return self.expression(exp.Group, all=True) 3268 3269 while True: 3270 expressions = self._parse_csv(self._parse_conjunction) 3271 if expressions: 3272 elements["expressions"].extend(expressions) 3273 3274 grouping_sets = self._parse_grouping_sets() 3275 if grouping_sets: 3276 elements["grouping_sets"].extend(grouping_sets) 3277 3278 rollup = None 3279 cube = None 3280 totals = None 3281 3282 index = self._index 3283 with_ = self._match(TokenType.WITH) 3284 if self._match(TokenType.ROLLUP): 3285 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3286 elements["rollup"].extend(ensure_list(rollup)) 3287 3288 if self._match(TokenType.CUBE): 3289 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3290 elements["cube"].extend(ensure_list(cube)) 3291 3292 if self._match_text_seq("TOTALS"): 3293 totals = True 3294 elements["totals"] = True # type: ignore 3295 3296 if not (grouping_sets or rollup or cube or totals): 3297 if with_: 3298 self._retreat(index) 3299 break 3300 3301 return self.expression(exp.Group, **elements) # type: ignore 3302 3303 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3304 if not self._match(TokenType.GROUPING_SETS): 3305 return None 3306 3307 return self._parse_wrapped_csv(self._parse_grouping_set) 3308 3309 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3310 if self._match(TokenType.L_PAREN): 3311 grouping_set = self._parse_csv(self._parse_column) 3312 self._match_r_paren() 3313 return self.expression(exp.Tuple, expressions=grouping_set) 3314 3315 return self._parse_column() 3316 3317 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3318 if not skip_having_token and not self._match(TokenType.HAVING): 3319 return None 3320 return self.expression(exp.Having, this=self._parse_conjunction()) 3321 3322 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3323 if not self._match(TokenType.QUALIFY): 3324 return None 3325 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3326 3327 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3328 if skip_start_token: 3329 start = None 3330 elif self._match(TokenType.START_WITH): 3331 start = self._parse_conjunction() 3332 else: 3333 return None 3334 3335 self._match(TokenType.CONNECT_BY) 3336 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3337 exp.Prior, this=self._parse_bitwise() 3338 ) 3339 connect = self._parse_conjunction() 3340 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3341 3342 if not start and self._match(TokenType.START_WITH): 3343 start = self._parse_conjunction() 3344 3345 return self.expression(exp.Connect, start=start, connect=connect) 3346 3347 def _parse_name_as_expression(self) -> exp.Alias: 3348 return self.expression( 3349 exp.Alias, 3350 alias=self._parse_id_var(any_token=True), 3351 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3352 ) 3353 3354 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3355 if self._match_text_seq("INTERPOLATE"): 3356 return self._parse_wrapped_csv(self._parse_name_as_expression) 3357 return None 3358 3359 def _parse_order( 3360 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3361 ) -> t.Optional[exp.Expression]: 3362 siblings = None 3363 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3364 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3365 return this 3366 3367 siblings = True 3368 3369 return self.expression( 3370 exp.Order, 3371 this=this, 3372 expressions=self._parse_csv(self._parse_ordered), 3373 interpolate=self._parse_interpolate(), 3374 siblings=siblings, 3375 ) 3376 3377 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3378 if not self._match(token): 3379 return None 3380 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3381 3382 def _parse_ordered( 3383 self, parse_method: t.Optional[t.Callable] = None 3384 ) -> t.Optional[exp.Ordered]: 3385 this = parse_method() if parse_method else self._parse_conjunction() 3386 if not this: 3387 return None 3388 3389 asc = self._match(TokenType.ASC) 3390 desc = self._match(TokenType.DESC) or (asc and False) 3391 3392 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3393 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3394 3395 nulls_first = is_nulls_first or False 3396 explicitly_null_ordered = is_nulls_first or is_nulls_last 3397 3398 if ( 3399 not explicitly_null_ordered 3400 and ( 3401 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3402 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3403 ) 3404 and self.dialect.NULL_ORDERING != "nulls_are_last" 3405 ): 3406 nulls_first = True 3407 3408 if self._match_text_seq("WITH", "FILL"): 3409 with_fill = self.expression( 3410 exp.WithFill, 3411 **{ # type: ignore 3412 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3413 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3414 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3415 }, 3416 ) 3417 else: 3418 with_fill = None 3419 3420 return self.expression( 3421 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3422 ) 3423 3424 def _parse_limit( 3425 self, this: t.Optional[exp.Expression] = None, top: bool = False 3426 ) -> t.Optional[exp.Expression]: 3427 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3428 comments = self._prev_comments 3429 if top: 3430 limit_paren = self._match(TokenType.L_PAREN) 3431 expression = self._parse_term() if limit_paren else self._parse_number() 3432 3433 if limit_paren: 3434 self._match_r_paren() 3435 else: 3436 expression = self._parse_term() 3437 3438 if self._match(TokenType.COMMA): 3439 offset = expression 3440 expression = self._parse_term() 3441 else: 3442 offset = None 3443 3444 limit_exp = self.expression( 3445 exp.Limit, 3446 this=this, 3447 expression=expression, 3448 offset=offset, 3449 comments=comments, 3450 expressions=self._parse_limit_by(), 3451 ) 3452 3453 return limit_exp 3454 3455 if self._match(TokenType.FETCH): 3456 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3457 direction = self._prev.text.upper() if direction else "FIRST" 3458 3459 count = self._parse_field(tokens=self.FETCH_TOKENS) 3460 percent = self._match(TokenType.PERCENT) 3461 3462 self._match_set((TokenType.ROW, TokenType.ROWS)) 3463 3464 only = self._match_text_seq("ONLY") 3465 with_ties = self._match_text_seq("WITH", "TIES") 3466 3467 if only and with_ties: 3468 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3469 3470 return self.expression( 3471 exp.Fetch, 3472 direction=direction, 3473 count=count, 3474 percent=percent, 3475 with_ties=with_ties, 3476 ) 3477 3478 return this 3479 3480 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3481 if not self._match(TokenType.OFFSET): 3482 return this 3483 3484 count = self._parse_term() 3485 self._match_set((TokenType.ROW, TokenType.ROWS)) 3486 3487 return self.expression( 3488 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3489 ) 3490 3491 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3492 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3493 3494 def _parse_locks(self) -> t.List[exp.Lock]: 3495 locks = [] 3496 while True: 3497 if self._match_text_seq("FOR", "UPDATE"): 3498 update = True 3499 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3500 "LOCK", "IN", "SHARE", "MODE" 3501 ): 3502 update = False 3503 else: 3504 break 3505 3506 expressions = None 3507 if self._match_text_seq("OF"): 3508 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3509 3510 wait: t.Optional[bool | exp.Expression] = None 3511 if self._match_text_seq("NOWAIT"): 3512 wait = True 3513 elif self._match_text_seq("WAIT"): 3514 wait = self._parse_primary() 3515 elif self._match_text_seq("SKIP", "LOCKED"): 3516 wait = False 3517 3518 locks.append( 3519 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3520 ) 3521 3522 return locks 3523 3524 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3525 while this and self._match_set(self.SET_OPERATIONS): 3526 token_type = self._prev.token_type 3527 3528 if token_type == TokenType.UNION: 3529 operation = exp.Union 3530 elif token_type == TokenType.EXCEPT: 3531 operation = exp.Except 3532 else: 3533 operation = exp.Intersect 3534 3535 comments = self._prev.comments 3536 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3537 by_name = self._match_text_seq("BY", "NAME") 3538 expression = self._parse_select(nested=True, parse_set_operation=False) 3539 3540 this = self.expression( 3541 operation, 3542 comments=comments, 3543 this=this, 3544 distinct=distinct, 3545 by_name=by_name, 3546 expression=expression, 3547 ) 3548 3549 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3550 expression = this.expression 3551 3552 if expression: 3553 for arg in self.UNION_MODIFIERS: 3554 expr = expression.args.get(arg) 3555 if expr: 3556 this.set(arg, expr.pop()) 3557 3558 return this 3559 3560 def _parse_expression(self) -> t.Optional[exp.Expression]: 3561 return self._parse_alias(self._parse_conjunction()) 3562 3563 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3564 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3565 3566 def _parse_equality(self) -> t.Optional[exp.Expression]: 3567 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3568 3569 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3570 return self._parse_tokens(self._parse_range, self.COMPARISON) 3571 3572 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3573 this = this or self._parse_bitwise() 3574 negate = self._match(TokenType.NOT) 3575 3576 if self._match_set(self.RANGE_PARSERS): 3577 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3578 if not expression: 3579 return this 3580 3581 this = expression 3582 elif self._match(TokenType.ISNULL): 3583 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3584 3585 # Postgres supports ISNULL and NOTNULL for conditions. 3586 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3587 if self._match(TokenType.NOTNULL): 3588 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3589 this = self.expression(exp.Not, this=this) 3590 3591 if negate: 3592 this = self.expression(exp.Not, this=this) 3593 3594 if self._match(TokenType.IS): 3595 this = self._parse_is(this) 3596 3597 return this 3598 3599 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3600 index = self._index - 1 3601 negate = self._match(TokenType.NOT) 3602 3603 if self._match_text_seq("DISTINCT", "FROM"): 3604 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3605 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3606 3607 expression = self._parse_null() or self._parse_boolean() 3608 if not expression: 3609 self._retreat(index) 3610 return None 3611 3612 this = self.expression(exp.Is, this=this, expression=expression) 3613 return self.expression(exp.Not, this=this) if negate else this 3614 3615 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3616 unnest = self._parse_unnest(with_alias=False) 3617 if unnest: 3618 this = self.expression(exp.In, this=this, unnest=unnest) 3619 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3620 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3621 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3622 3623 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3624 this = self.expression(exp.In, this=this, query=expressions[0]) 3625 else: 3626 this = self.expression(exp.In, this=this, expressions=expressions) 3627 3628 if matched_l_paren: 3629 self._match_r_paren(this) 3630 elif not self._match(TokenType.R_BRACKET, expression=this): 3631 self.raise_error("Expecting ]") 3632 else: 3633 this = self.expression(exp.In, this=this, field=self._parse_field()) 3634 3635 return this 3636 3637 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3638 low = self._parse_bitwise() 3639 self._match(TokenType.AND) 3640 high = self._parse_bitwise() 3641 return self.expression(exp.Between, this=this, low=low, high=high) 3642 3643 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3644 if not self._match(TokenType.ESCAPE): 3645 return this 3646 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3647 3648 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3649 index = self._index 3650 3651 if not self._match(TokenType.INTERVAL) and match_interval: 3652 return None 3653 3654 if self._match(TokenType.STRING, advance=False): 3655 this = self._parse_primary() 3656 else: 3657 this = self._parse_term() 3658 3659 if not this or ( 3660 isinstance(this, exp.Column) 3661 and not this.table 3662 and not this.this.quoted 3663 and this.name.upper() == "IS" 3664 ): 3665 self._retreat(index) 3666 return None 3667 3668 unit = self._parse_function() or ( 3669 not self._match(TokenType.ALIAS, advance=False) 3670 and self._parse_var(any_token=True, upper=True) 3671 ) 3672 3673 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3674 # each INTERVAL expression into this canonical form so it's easy to transpile 3675 if this and this.is_number: 3676 this = exp.Literal.string(this.name) 3677 elif this and this.is_string: 3678 parts = this.name.split() 3679 3680 if len(parts) == 2: 3681 if unit: 3682 # This is not actually a unit, it's something else (e.g. a "window side") 3683 unit = None 3684 self._retreat(self._index - 1) 3685 3686 this = exp.Literal.string(parts[0]) 3687 unit = self.expression(exp.Var, this=parts[1].upper()) 3688 3689 return self.expression(exp.Interval, this=this, unit=unit) 3690 3691 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3692 this = self._parse_term() 3693 3694 while True: 3695 if self._match_set(self.BITWISE): 3696 this = self.expression( 3697 self.BITWISE[self._prev.token_type], 3698 this=this, 3699 expression=self._parse_term(), 3700 ) 3701 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3702 this = self.expression( 3703 exp.DPipe, 3704 this=this, 3705 expression=self._parse_term(), 3706 safe=not self.dialect.STRICT_STRING_CONCAT, 3707 ) 3708 elif self._match(TokenType.DQMARK): 3709 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3710 elif self._match_pair(TokenType.LT, TokenType.LT): 3711 this = self.expression( 3712 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3713 ) 3714 elif self._match_pair(TokenType.GT, TokenType.GT): 3715 this = self.expression( 3716 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3717 ) 3718 else: 3719 break 3720 3721 return this 3722 3723 def _parse_term(self) -> t.Optional[exp.Expression]: 3724 return self._parse_tokens(self._parse_factor, self.TERM) 3725 3726 def _parse_factor(self) -> t.Optional[exp.Expression]: 3727 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3728 this = parse_method() 3729 3730 while self._match_set(self.FACTOR): 3731 this = self.expression( 3732 self.FACTOR[self._prev.token_type], 3733 this=this, 3734 comments=self._prev_comments, 3735 expression=parse_method(), 3736 ) 3737 if isinstance(this, exp.Div): 3738 this.args["typed"] = self.dialect.TYPED_DIVISION 3739 this.args["safe"] = self.dialect.SAFE_DIVISION 3740 3741 return this 3742 3743 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3744 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3745 3746 def _parse_unary(self) -> t.Optional[exp.Expression]: 3747 if self._match_set(self.UNARY_PARSERS): 3748 return self.UNARY_PARSERS[self._prev.token_type](self) 3749 return self._parse_at_time_zone(self._parse_type()) 3750 3751 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3752 interval = parse_interval and self._parse_interval() 3753 if interval: 3754 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3755 while True: 3756 index = self._index 3757 self._match(TokenType.PLUS) 3758 3759 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3760 self._retreat(index) 3761 break 3762 3763 interval = self.expression( # type: ignore 3764 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3765 ) 3766 3767 return interval 3768 3769 index = self._index 3770 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3771 this = self._parse_column() 3772 3773 if data_type: 3774 if isinstance(this, exp.Literal): 3775 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3776 if parser: 3777 return parser(self, this, data_type) 3778 return self.expression(exp.Cast, this=this, to=data_type) 3779 if not data_type.expressions: 3780 self._retreat(index) 3781 return self._parse_column() 3782 return self._parse_column_ops(data_type) 3783 3784 return this and self._parse_column_ops(this) 3785 3786 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3787 this = self._parse_type() 3788 if not this: 3789 return None 3790 3791 return self.expression( 3792 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3793 ) 3794 3795 def _parse_types( 3796 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3797 ) -> t.Optional[exp.Expression]: 3798 index = self._index 3799 3800 prefix = self._match_text_seq("SYSUDTLIB", ".") 3801 3802 if not self._match_set(self.TYPE_TOKENS): 3803 identifier = allow_identifiers and self._parse_id_var( 3804 any_token=False, tokens=(TokenType.VAR,) 3805 ) 3806 if identifier: 3807 tokens = self.dialect.tokenize(identifier.name) 3808 3809 if len(tokens) != 1: 3810 self.raise_error("Unexpected identifier", self._prev) 3811 3812 if tokens[0].token_type in self.TYPE_TOKENS: 3813 self._prev = tokens[0] 3814 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3815 type_name = identifier.name 3816 3817 while self._match(TokenType.DOT): 3818 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3819 3820 return exp.DataType.build(type_name, udt=True) 3821 else: 3822 self._retreat(self._index - 1) 3823 return None 3824 else: 3825 return None 3826 3827 type_token = self._prev.token_type 3828 3829 if type_token == TokenType.PSEUDO_TYPE: 3830 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3831 3832 if type_token == TokenType.OBJECT_IDENTIFIER: 3833 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3834 3835 nested = type_token in self.NESTED_TYPE_TOKENS 3836 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3837 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3838 expressions = None 3839 maybe_func = False 3840 3841 if self._match(TokenType.L_PAREN): 3842 if is_struct: 3843 expressions = self._parse_csv(self._parse_struct_types) 3844 elif nested: 3845 expressions = self._parse_csv( 3846 lambda: self._parse_types( 3847 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3848 ) 3849 ) 3850 elif type_token in self.ENUM_TYPE_TOKENS: 3851 expressions = self._parse_csv(self._parse_equality) 3852 elif is_aggregate: 3853 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3854 any_token=False, tokens=(TokenType.VAR,) 3855 ) 3856 if not func_or_ident or not self._match(TokenType.COMMA): 3857 return None 3858 expressions = self._parse_csv( 3859 lambda: self._parse_types( 3860 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3861 ) 3862 ) 3863 expressions.insert(0, func_or_ident) 3864 else: 3865 expressions = self._parse_csv(self._parse_type_size) 3866 3867 if not expressions or not self._match(TokenType.R_PAREN): 3868 self._retreat(index) 3869 return None 3870 3871 maybe_func = True 3872 3873 this: t.Optional[exp.Expression] = None 3874 values: t.Optional[t.List[exp.Expression]] = None 3875 3876 if nested and self._match(TokenType.LT): 3877 if is_struct: 3878 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3879 else: 3880 expressions = self._parse_csv( 3881 lambda: self._parse_types( 3882 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3883 ) 3884 ) 3885 3886 if not self._match(TokenType.GT): 3887 self.raise_error("Expecting >") 3888 3889 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3890 values = self._parse_csv(self._parse_conjunction) 3891 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3892 3893 if type_token in self.TIMESTAMPS: 3894 if self._match_text_seq("WITH", "TIME", "ZONE"): 3895 maybe_func = False 3896 tz_type = ( 3897 exp.DataType.Type.TIMETZ 3898 if type_token in self.TIMES 3899 else exp.DataType.Type.TIMESTAMPTZ 3900 ) 3901 this = exp.DataType(this=tz_type, expressions=expressions) 3902 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3903 maybe_func = False 3904 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3905 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3906 maybe_func = False 3907 elif type_token == TokenType.INTERVAL: 3908 unit = self._parse_var() 3909 3910 if self._match_text_seq("TO"): 3911 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3912 else: 3913 span = None 3914 3915 if span or not unit: 3916 this = self.expression( 3917 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3918 ) 3919 else: 3920 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3921 3922 if maybe_func and check_func: 3923 index2 = self._index 3924 peek = self._parse_string() 3925 3926 if not peek: 3927 self._retreat(index) 3928 return None 3929 3930 self._retreat(index2) 3931 3932 if not this: 3933 if self._match_text_seq("UNSIGNED"): 3934 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3935 if not unsigned_type_token: 3936 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3937 3938 type_token = unsigned_type_token or type_token 3939 3940 this = exp.DataType( 3941 this=exp.DataType.Type[type_token.value], 3942 expressions=expressions, 3943 nested=nested, 3944 values=values, 3945 prefix=prefix, 3946 ) 3947 3948 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3949 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3950 3951 return this 3952 3953 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3954 index = self._index 3955 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3956 self._match(TokenType.COLON) 3957 column_def = self._parse_column_def(this) 3958 3959 if type_required and ( 3960 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3961 ): 3962 self._retreat(index) 3963 return self._parse_types() 3964 3965 return column_def 3966 3967 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3968 if not self._match_text_seq("AT", "TIME", "ZONE"): 3969 return this 3970 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3971 3972 def _parse_column(self) -> t.Optional[exp.Expression]: 3973 this = self._parse_column_reference() 3974 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3975 3976 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3977 this = self._parse_field() 3978 if ( 3979 not this 3980 and self._match(TokenType.VALUES, advance=False) 3981 and self.VALUES_FOLLOWED_BY_PAREN 3982 and (not self._next or self._next.token_type != TokenType.L_PAREN) 3983 ): 3984 this = self._parse_id_var() 3985 3986 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 3987 3988 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3989 this = self._parse_bracket(this) 3990 3991 while self._match_set(self.COLUMN_OPERATORS): 3992 op_token = self._prev.token_type 3993 op = self.COLUMN_OPERATORS.get(op_token) 3994 3995 if op_token == TokenType.DCOLON: 3996 field = self._parse_types() 3997 if not field: 3998 self.raise_error("Expected type") 3999 elif op and self._curr: 4000 field = self._parse_column_reference() 4001 else: 4002 field = self._parse_field(anonymous_func=True, any_token=True) 4003 4004 if isinstance(field, exp.Func): 4005 # bigquery allows function calls like x.y.count(...) 4006 # SAFE.SUBSTR(...) 4007 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4008 this = self._replace_columns_with_dots(this) 4009 4010 if op: 4011 this = op(self, this, field) 4012 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4013 this = self.expression( 4014 exp.Column, 4015 this=field, 4016 table=this.this, 4017 db=this.args.get("table"), 4018 catalog=this.args.get("db"), 4019 ) 4020 else: 4021 this = self.expression(exp.Dot, this=this, expression=field) 4022 this = self._parse_bracket(this) 4023 return this 4024 4025 def _parse_primary(self) -> t.Optional[exp.Expression]: 4026 if self._match_set(self.PRIMARY_PARSERS): 4027 token_type = self._prev.token_type 4028 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4029 4030 if token_type == TokenType.STRING: 4031 expressions = [primary] 4032 while self._match(TokenType.STRING): 4033 expressions.append(exp.Literal.string(self._prev.text)) 4034 4035 if len(expressions) > 1: 4036 return self.expression(exp.Concat, expressions=expressions) 4037 4038 return primary 4039 4040 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4041 return exp.Literal.number(f"0.{self._prev.text}") 4042 4043 if self._match(TokenType.L_PAREN): 4044 comments = self._prev_comments 4045 query = self._parse_select() 4046 4047 if query: 4048 expressions = [query] 4049 else: 4050 expressions = self._parse_expressions() 4051 4052 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4053 4054 if isinstance(this, exp.UNWRAPPED_QUERIES): 4055 this = self._parse_set_operations( 4056 self._parse_subquery(this=this, parse_alias=False) 4057 ) 4058 elif len(expressions) > 1: 4059 this = self.expression(exp.Tuple, expressions=expressions) 4060 else: 4061 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 4062 4063 if this: 4064 this.add_comments(comments) 4065 4066 self._match_r_paren(expression=this) 4067 return this 4068 4069 return None 4070 4071 def _parse_field( 4072 self, 4073 any_token: bool = False, 4074 tokens: t.Optional[t.Collection[TokenType]] = None, 4075 anonymous_func: bool = False, 4076 ) -> t.Optional[exp.Expression]: 4077 return ( 4078 self._parse_primary() 4079 or self._parse_function(anonymous=anonymous_func) 4080 or self._parse_id_var(any_token=any_token, tokens=tokens) 4081 ) 4082 4083 def _parse_function( 4084 self, 4085 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4086 anonymous: bool = False, 4087 optional_parens: bool = True, 4088 ) -> t.Optional[exp.Expression]: 4089 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4090 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4091 fn_syntax = False 4092 if ( 4093 self._match(TokenType.L_BRACE, advance=False) 4094 and self._next 4095 and self._next.text.upper() == "FN" 4096 ): 4097 self._advance(2) 4098 fn_syntax = True 4099 4100 func = self._parse_function_call( 4101 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4102 ) 4103 4104 if fn_syntax: 4105 self._match(TokenType.R_BRACE) 4106 4107 return func 4108 4109 def _parse_function_call( 4110 self, 4111 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4112 anonymous: bool = False, 4113 optional_parens: bool = True, 4114 ) -> t.Optional[exp.Expression]: 4115 if not self._curr: 4116 return None 4117 4118 comments = self._curr.comments 4119 token_type = self._curr.token_type 4120 this = self._curr.text 4121 upper = this.upper() 4122 4123 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4124 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4125 self._advance() 4126 return parser(self) 4127 4128 if not self._next or self._next.token_type != TokenType.L_PAREN: 4129 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4130 self._advance() 4131 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4132 4133 return None 4134 4135 if token_type not in self.FUNC_TOKENS: 4136 return None 4137 4138 self._advance(2) 4139 4140 parser = self.FUNCTION_PARSERS.get(upper) 4141 if parser and not anonymous: 4142 this = parser(self) 4143 else: 4144 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4145 4146 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4147 this = self.expression(subquery_predicate, this=self._parse_select()) 4148 self._match_r_paren() 4149 return this 4150 4151 if functions is None: 4152 functions = self.FUNCTIONS 4153 4154 function = functions.get(upper) 4155 4156 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4157 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4158 4159 if alias: 4160 args = self._kv_to_prop_eq(args) 4161 4162 if function and not anonymous: 4163 if "dialect" in function.__code__.co_varnames: 4164 func = function(args, dialect=self.dialect) 4165 else: 4166 func = function(args) 4167 4168 func = self.validate_expression(func, args) 4169 if not self.dialect.NORMALIZE_FUNCTIONS: 4170 func.meta["name"] = this 4171 4172 this = func 4173 else: 4174 if token_type == TokenType.IDENTIFIER: 4175 this = exp.Identifier(this=this, quoted=True) 4176 this = self.expression(exp.Anonymous, this=this, expressions=args) 4177 4178 if isinstance(this, exp.Expression): 4179 this.add_comments(comments) 4180 4181 self._match_r_paren(this) 4182 return self._parse_window(this) 4183 4184 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4185 transformed = [] 4186 4187 for e in expressions: 4188 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4189 if isinstance(e, exp.Alias): 4190 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4191 4192 if not isinstance(e, exp.PropertyEQ): 4193 e = self.expression( 4194 exp.PropertyEQ, this=exp.to_identifier(e.name), expression=e.expression 4195 ) 4196 4197 if isinstance(e.this, exp.Column): 4198 e.this.replace(e.this.this) 4199 4200 transformed.append(e) 4201 4202 return transformed 4203 4204 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4205 return self._parse_column_def(self._parse_id_var()) 4206 4207 def _parse_user_defined_function( 4208 self, kind: t.Optional[TokenType] = None 4209 ) -> t.Optional[exp.Expression]: 4210 this = self._parse_id_var() 4211 4212 while self._match(TokenType.DOT): 4213 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4214 4215 if not self._match(TokenType.L_PAREN): 4216 return this 4217 4218 expressions = self._parse_csv(self._parse_function_parameter) 4219 self._match_r_paren() 4220 return self.expression( 4221 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4222 ) 4223 4224 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4225 literal = self._parse_primary() 4226 if literal: 4227 return self.expression(exp.Introducer, this=token.text, expression=literal) 4228 4229 return self.expression(exp.Identifier, this=token.text) 4230 4231 def _parse_session_parameter(self) -> exp.SessionParameter: 4232 kind = None 4233 this = self._parse_id_var() or self._parse_primary() 4234 4235 if this and self._match(TokenType.DOT): 4236 kind = this.name 4237 this = self._parse_var() or self._parse_primary() 4238 4239 return self.expression(exp.SessionParameter, this=this, kind=kind) 4240 4241 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4242 index = self._index 4243 4244 if self._match(TokenType.L_PAREN): 4245 expressions = t.cast( 4246 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4247 ) 4248 4249 if not self._match(TokenType.R_PAREN): 4250 self._retreat(index) 4251 else: 4252 expressions = [self._parse_id_var()] 4253 4254 if self._match_set(self.LAMBDAS): 4255 return self.LAMBDAS[self._prev.token_type](self, expressions) 4256 4257 self._retreat(index) 4258 4259 this: t.Optional[exp.Expression] 4260 4261 if self._match(TokenType.DISTINCT): 4262 this = self.expression( 4263 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4264 ) 4265 else: 4266 this = self._parse_select_or_expression(alias=alias) 4267 4268 return self._parse_limit( 4269 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4270 ) 4271 4272 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4273 index = self._index 4274 4275 if not self.errors: 4276 try: 4277 if self._parse_select(nested=True): 4278 return this 4279 except ParseError: 4280 pass 4281 finally: 4282 self.errors.clear() 4283 self._retreat(index) 4284 4285 if not self._match(TokenType.L_PAREN): 4286 return this 4287 4288 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4289 4290 self._match_r_paren() 4291 return self.expression(exp.Schema, this=this, expressions=args) 4292 4293 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4294 return self._parse_column_def(self._parse_field(any_token=True)) 4295 4296 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4297 # column defs are not really columns, they're identifiers 4298 if isinstance(this, exp.Column): 4299 this = this.this 4300 4301 kind = self._parse_types(schema=True) 4302 4303 if self._match_text_seq("FOR", "ORDINALITY"): 4304 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4305 4306 constraints: t.List[exp.Expression] = [] 4307 4308 if not kind and self._match(TokenType.ALIAS): 4309 constraints.append( 4310 self.expression( 4311 exp.ComputedColumnConstraint, 4312 this=self._parse_conjunction(), 4313 persisted=self._match_text_seq("PERSISTED"), 4314 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4315 ) 4316 ) 4317 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4318 self._match(TokenType.ALIAS) 4319 constraints.append( 4320 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4321 ) 4322 4323 while True: 4324 constraint = self._parse_column_constraint() 4325 if not constraint: 4326 break 4327 constraints.append(constraint) 4328 4329 if not kind and not constraints: 4330 return this 4331 4332 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4333 4334 def _parse_auto_increment( 4335 self, 4336 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4337 start = None 4338 increment = None 4339 4340 if self._match(TokenType.L_PAREN, advance=False): 4341 args = self._parse_wrapped_csv(self._parse_bitwise) 4342 start = seq_get(args, 0) 4343 increment = seq_get(args, 1) 4344 elif self._match_text_seq("START"): 4345 start = self._parse_bitwise() 4346 self._match_text_seq("INCREMENT") 4347 increment = self._parse_bitwise() 4348 4349 if start and increment: 4350 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4351 4352 return exp.AutoIncrementColumnConstraint() 4353 4354 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4355 if not self._match_text_seq("REFRESH"): 4356 self._retreat(self._index - 1) 4357 return None 4358 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4359 4360 def _parse_compress(self) -> exp.CompressColumnConstraint: 4361 if self._match(TokenType.L_PAREN, advance=False): 4362 return self.expression( 4363 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4364 ) 4365 4366 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4367 4368 def _parse_generated_as_identity( 4369 self, 4370 ) -> ( 4371 exp.GeneratedAsIdentityColumnConstraint 4372 | exp.ComputedColumnConstraint 4373 | exp.GeneratedAsRowColumnConstraint 4374 ): 4375 if self._match_text_seq("BY", "DEFAULT"): 4376 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4377 this = self.expression( 4378 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4379 ) 4380 else: 4381 self._match_text_seq("ALWAYS") 4382 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4383 4384 self._match(TokenType.ALIAS) 4385 4386 if self._match_text_seq("ROW"): 4387 start = self._match_text_seq("START") 4388 if not start: 4389 self._match(TokenType.END) 4390 hidden = self._match_text_seq("HIDDEN") 4391 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4392 4393 identity = self._match_text_seq("IDENTITY") 4394 4395 if self._match(TokenType.L_PAREN): 4396 if self._match(TokenType.START_WITH): 4397 this.set("start", self._parse_bitwise()) 4398 if self._match_text_seq("INCREMENT", "BY"): 4399 this.set("increment", self._parse_bitwise()) 4400 if self._match_text_seq("MINVALUE"): 4401 this.set("minvalue", self._parse_bitwise()) 4402 if self._match_text_seq("MAXVALUE"): 4403 this.set("maxvalue", self._parse_bitwise()) 4404 4405 if self._match_text_seq("CYCLE"): 4406 this.set("cycle", True) 4407 elif self._match_text_seq("NO", "CYCLE"): 4408 this.set("cycle", False) 4409 4410 if not identity: 4411 this.set("expression", self._parse_bitwise()) 4412 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4413 args = self._parse_csv(self._parse_bitwise) 4414 this.set("start", seq_get(args, 0)) 4415 this.set("increment", seq_get(args, 1)) 4416 4417 self._match_r_paren() 4418 4419 return this 4420 4421 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4422 self._match_text_seq("LENGTH") 4423 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4424 4425 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4426 if self._match_text_seq("NULL"): 4427 return self.expression(exp.NotNullColumnConstraint) 4428 if self._match_text_seq("CASESPECIFIC"): 4429 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4430 if self._match_text_seq("FOR", "REPLICATION"): 4431 return self.expression(exp.NotForReplicationColumnConstraint) 4432 return None 4433 4434 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4435 if self._match(TokenType.CONSTRAINT): 4436 this = self._parse_id_var() 4437 else: 4438 this = None 4439 4440 if self._match_texts(self.CONSTRAINT_PARSERS): 4441 return self.expression( 4442 exp.ColumnConstraint, 4443 this=this, 4444 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4445 ) 4446 4447 return this 4448 4449 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4450 if not self._match(TokenType.CONSTRAINT): 4451 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4452 4453 return self.expression( 4454 exp.Constraint, 4455 this=self._parse_id_var(), 4456 expressions=self._parse_unnamed_constraints(), 4457 ) 4458 4459 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4460 constraints = [] 4461 while True: 4462 constraint = self._parse_unnamed_constraint() or self._parse_function() 4463 if not constraint: 4464 break 4465 constraints.append(constraint) 4466 4467 return constraints 4468 4469 def _parse_unnamed_constraint( 4470 self, constraints: t.Optional[t.Collection[str]] = None 4471 ) -> t.Optional[exp.Expression]: 4472 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4473 constraints or self.CONSTRAINT_PARSERS 4474 ): 4475 return None 4476 4477 constraint = self._prev.text.upper() 4478 if constraint not in self.CONSTRAINT_PARSERS: 4479 self.raise_error(f"No parser found for schema constraint {constraint}.") 4480 4481 return self.CONSTRAINT_PARSERS[constraint](self) 4482 4483 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4484 self._match_text_seq("KEY") 4485 return self.expression( 4486 exp.UniqueColumnConstraint, 4487 this=self._parse_schema(self._parse_id_var(any_token=False)), 4488 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4489 on_conflict=self._parse_on_conflict(), 4490 ) 4491 4492 def _parse_key_constraint_options(self) -> t.List[str]: 4493 options = [] 4494 while True: 4495 if not self._curr: 4496 break 4497 4498 if self._match(TokenType.ON): 4499 action = None 4500 on = self._advance_any() and self._prev.text 4501 4502 if self._match_text_seq("NO", "ACTION"): 4503 action = "NO ACTION" 4504 elif self._match_text_seq("CASCADE"): 4505 action = "CASCADE" 4506 elif self._match_text_seq("RESTRICT"): 4507 action = "RESTRICT" 4508 elif self._match_pair(TokenType.SET, TokenType.NULL): 4509 action = "SET NULL" 4510 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4511 action = "SET DEFAULT" 4512 else: 4513 self.raise_error("Invalid key constraint") 4514 4515 options.append(f"ON {on} {action}") 4516 elif self._match_text_seq("NOT", "ENFORCED"): 4517 options.append("NOT ENFORCED") 4518 elif self._match_text_seq("DEFERRABLE"): 4519 options.append("DEFERRABLE") 4520 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4521 options.append("INITIALLY DEFERRED") 4522 elif self._match_text_seq("NORELY"): 4523 options.append("NORELY") 4524 elif self._match_text_seq("MATCH", "FULL"): 4525 options.append("MATCH FULL") 4526 else: 4527 break 4528 4529 return options 4530 4531 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4532 if match and not self._match(TokenType.REFERENCES): 4533 return None 4534 4535 expressions = None 4536 this = self._parse_table(schema=True) 4537 options = self._parse_key_constraint_options() 4538 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4539 4540 def _parse_foreign_key(self) -> exp.ForeignKey: 4541 expressions = self._parse_wrapped_id_vars() 4542 reference = self._parse_references() 4543 options = {} 4544 4545 while self._match(TokenType.ON): 4546 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4547 self.raise_error("Expected DELETE or UPDATE") 4548 4549 kind = self._prev.text.lower() 4550 4551 if self._match_text_seq("NO", "ACTION"): 4552 action = "NO ACTION" 4553 elif self._match(TokenType.SET): 4554 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4555 action = "SET " + self._prev.text.upper() 4556 else: 4557 self._advance() 4558 action = self._prev.text.upper() 4559 4560 options[kind] = action 4561 4562 return self.expression( 4563 exp.ForeignKey, 4564 expressions=expressions, 4565 reference=reference, 4566 **options, # type: ignore 4567 ) 4568 4569 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4570 return self._parse_field() 4571 4572 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4573 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4574 self._retreat(self._index - 1) 4575 return None 4576 4577 id_vars = self._parse_wrapped_id_vars() 4578 return self.expression( 4579 exp.PeriodForSystemTimeConstraint, 4580 this=seq_get(id_vars, 0), 4581 expression=seq_get(id_vars, 1), 4582 ) 4583 4584 def _parse_primary_key( 4585 self, wrapped_optional: bool = False, in_props: bool = False 4586 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4587 desc = ( 4588 self._match_set((TokenType.ASC, TokenType.DESC)) 4589 and self._prev.token_type == TokenType.DESC 4590 ) 4591 4592 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4593 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4594 4595 expressions = self._parse_wrapped_csv( 4596 self._parse_primary_key_part, optional=wrapped_optional 4597 ) 4598 options = self._parse_key_constraint_options() 4599 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4600 4601 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4602 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4603 4604 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4605 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4606 return this 4607 4608 bracket_kind = self._prev.token_type 4609 expressions = self._parse_csv( 4610 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4611 ) 4612 4613 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4614 self.raise_error("Expected ]") 4615 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4616 self.raise_error("Expected }") 4617 4618 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4619 if bracket_kind == TokenType.L_BRACE: 4620 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4621 elif not this or this.name.upper() == "ARRAY": 4622 this = self.expression(exp.Array, expressions=expressions) 4623 else: 4624 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4625 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4626 4627 self._add_comments(this) 4628 return self._parse_bracket(this) 4629 4630 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4631 if self._match(TokenType.COLON): 4632 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4633 return this 4634 4635 def _parse_case(self) -> t.Optional[exp.Expression]: 4636 ifs = [] 4637 default = None 4638 4639 comments = self._prev_comments 4640 expression = self._parse_conjunction() 4641 4642 while self._match(TokenType.WHEN): 4643 this = self._parse_conjunction() 4644 self._match(TokenType.THEN) 4645 then = self._parse_conjunction() 4646 ifs.append(self.expression(exp.If, this=this, true=then)) 4647 4648 if self._match(TokenType.ELSE): 4649 default = self._parse_conjunction() 4650 4651 if not self._match(TokenType.END): 4652 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4653 default = exp.column("interval") 4654 else: 4655 self.raise_error("Expected END after CASE", self._prev) 4656 4657 return self._parse_window( 4658 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4659 ) 4660 4661 def _parse_if(self) -> t.Optional[exp.Expression]: 4662 if self._match(TokenType.L_PAREN): 4663 args = self._parse_csv(self._parse_conjunction) 4664 this = self.validate_expression(exp.If.from_arg_list(args), args) 4665 self._match_r_paren() 4666 else: 4667 index = self._index - 1 4668 4669 if self.NO_PAREN_IF_COMMANDS and index == 0: 4670 return self._parse_as_command(self._prev) 4671 4672 condition = self._parse_conjunction() 4673 4674 if not condition: 4675 self._retreat(index) 4676 return None 4677 4678 self._match(TokenType.THEN) 4679 true = self._parse_conjunction() 4680 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4681 self._match(TokenType.END) 4682 this = self.expression(exp.If, this=condition, true=true, false=false) 4683 4684 return self._parse_window(this) 4685 4686 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4687 if not self._match_text_seq("VALUE", "FOR"): 4688 self._retreat(self._index - 1) 4689 return None 4690 4691 return self.expression( 4692 exp.NextValueFor, 4693 this=self._parse_column(), 4694 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4695 ) 4696 4697 def _parse_extract(self) -> exp.Extract: 4698 this = self._parse_function() or self._parse_var() or self._parse_type() 4699 4700 if self._match(TokenType.FROM): 4701 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4702 4703 if not self._match(TokenType.COMMA): 4704 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4705 4706 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4707 4708 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4709 this = self._parse_conjunction() 4710 4711 if not self._match(TokenType.ALIAS): 4712 if self._match(TokenType.COMMA): 4713 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4714 4715 self.raise_error("Expected AS after CAST") 4716 4717 fmt = None 4718 to = self._parse_types() 4719 4720 if self._match(TokenType.FORMAT): 4721 fmt_string = self._parse_string() 4722 fmt = self._parse_at_time_zone(fmt_string) 4723 4724 if not to: 4725 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4726 if to.this in exp.DataType.TEMPORAL_TYPES: 4727 this = self.expression( 4728 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4729 this=this, 4730 format=exp.Literal.string( 4731 format_time( 4732 fmt_string.this if fmt_string else "", 4733 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4734 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4735 ) 4736 ), 4737 ) 4738 4739 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4740 this.set("zone", fmt.args["zone"]) 4741 return this 4742 elif not to: 4743 self.raise_error("Expected TYPE after CAST") 4744 elif isinstance(to, exp.Identifier): 4745 to = exp.DataType.build(to.name, udt=True) 4746 elif to.this == exp.DataType.Type.CHAR: 4747 if self._match(TokenType.CHARACTER_SET): 4748 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4749 4750 return self.expression( 4751 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4752 ) 4753 4754 def _parse_string_agg(self) -> exp.Expression: 4755 if self._match(TokenType.DISTINCT): 4756 args: t.List[t.Optional[exp.Expression]] = [ 4757 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4758 ] 4759 if self._match(TokenType.COMMA): 4760 args.extend(self._parse_csv(self._parse_conjunction)) 4761 else: 4762 args = self._parse_csv(self._parse_conjunction) # type: ignore 4763 4764 index = self._index 4765 if not self._match(TokenType.R_PAREN) and args: 4766 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4767 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4768 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4769 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4770 4771 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4772 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4773 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4774 if not self._match_text_seq("WITHIN", "GROUP"): 4775 self._retreat(index) 4776 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4777 4778 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4779 order = self._parse_order(this=seq_get(args, 0)) 4780 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4781 4782 def _parse_convert( 4783 self, strict: bool, safe: t.Optional[bool] = None 4784 ) -> t.Optional[exp.Expression]: 4785 this = self._parse_bitwise() 4786 4787 if self._match(TokenType.USING): 4788 to: t.Optional[exp.Expression] = self.expression( 4789 exp.CharacterSet, this=self._parse_var() 4790 ) 4791 elif self._match(TokenType.COMMA): 4792 to = self._parse_types() 4793 else: 4794 to = None 4795 4796 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4797 4798 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4799 """ 4800 There are generally two variants of the DECODE function: 4801 4802 - DECODE(bin, charset) 4803 - DECODE(expression, search, result [, search, result] ... [, default]) 4804 4805 The second variant will always be parsed into a CASE expression. Note that NULL 4806 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4807 instead of relying on pattern matching. 4808 """ 4809 args = self._parse_csv(self._parse_conjunction) 4810 4811 if len(args) < 3: 4812 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4813 4814 expression, *expressions = args 4815 if not expression: 4816 return None 4817 4818 ifs = [] 4819 for search, result in zip(expressions[::2], expressions[1::2]): 4820 if not search or not result: 4821 return None 4822 4823 if isinstance(search, exp.Literal): 4824 ifs.append( 4825 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4826 ) 4827 elif isinstance(search, exp.Null): 4828 ifs.append( 4829 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4830 ) 4831 else: 4832 cond = exp.or_( 4833 exp.EQ(this=expression.copy(), expression=search), 4834 exp.and_( 4835 exp.Is(this=expression.copy(), expression=exp.Null()), 4836 exp.Is(this=search.copy(), expression=exp.Null()), 4837 copy=False, 4838 ), 4839 copy=False, 4840 ) 4841 ifs.append(exp.If(this=cond, true=result)) 4842 4843 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4844 4845 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4846 self._match_text_seq("KEY") 4847 key = self._parse_column() 4848 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4849 self._match_text_seq("VALUE") 4850 value = self._parse_bitwise() 4851 4852 if not key and not value: 4853 return None 4854 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4855 4856 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4857 if not this or not self._match_text_seq("FORMAT", "JSON"): 4858 return this 4859 4860 return self.expression(exp.FormatJson, this=this) 4861 4862 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4863 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4864 for value in values: 4865 if self._match_text_seq(value, "ON", on): 4866 return f"{value} ON {on}" 4867 4868 return None 4869 4870 @t.overload 4871 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 4872 4873 @t.overload 4874 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 4875 4876 def _parse_json_object(self, agg=False): 4877 star = self._parse_star() 4878 expressions = ( 4879 [star] 4880 if star 4881 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4882 ) 4883 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4884 4885 unique_keys = None 4886 if self._match_text_seq("WITH", "UNIQUE"): 4887 unique_keys = True 4888 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4889 unique_keys = False 4890 4891 self._match_text_seq("KEYS") 4892 4893 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4894 self._parse_type() 4895 ) 4896 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4897 4898 return self.expression( 4899 exp.JSONObjectAgg if agg else exp.JSONObject, 4900 expressions=expressions, 4901 null_handling=null_handling, 4902 unique_keys=unique_keys, 4903 return_type=return_type, 4904 encoding=encoding, 4905 ) 4906 4907 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4908 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4909 if not self._match_text_seq("NESTED"): 4910 this = self._parse_id_var() 4911 kind = self._parse_types(allow_identifiers=False) 4912 nested = None 4913 else: 4914 this = None 4915 kind = None 4916 nested = True 4917 4918 path = self._match_text_seq("PATH") and self._parse_string() 4919 nested_schema = nested and self._parse_json_schema() 4920 4921 return self.expression( 4922 exp.JSONColumnDef, 4923 this=this, 4924 kind=kind, 4925 path=path, 4926 nested_schema=nested_schema, 4927 ) 4928 4929 def _parse_json_schema(self) -> exp.JSONSchema: 4930 self._match_text_seq("COLUMNS") 4931 return self.expression( 4932 exp.JSONSchema, 4933 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4934 ) 4935 4936 def _parse_json_table(self) -> exp.JSONTable: 4937 this = self._parse_format_json(self._parse_bitwise()) 4938 path = self._match(TokenType.COMMA) and self._parse_string() 4939 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4940 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4941 schema = self._parse_json_schema() 4942 4943 return exp.JSONTable( 4944 this=this, 4945 schema=schema, 4946 path=path, 4947 error_handling=error_handling, 4948 empty_handling=empty_handling, 4949 ) 4950 4951 def _parse_match_against(self) -> exp.MatchAgainst: 4952 expressions = self._parse_csv(self._parse_column) 4953 4954 self._match_text_seq(")", "AGAINST", "(") 4955 4956 this = self._parse_string() 4957 4958 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4959 modifier = "IN NATURAL LANGUAGE MODE" 4960 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4961 modifier = f"{modifier} WITH QUERY EXPANSION" 4962 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4963 modifier = "IN BOOLEAN MODE" 4964 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4965 modifier = "WITH QUERY EXPANSION" 4966 else: 4967 modifier = None 4968 4969 return self.expression( 4970 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4971 ) 4972 4973 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4974 def _parse_open_json(self) -> exp.OpenJSON: 4975 this = self._parse_bitwise() 4976 path = self._match(TokenType.COMMA) and self._parse_string() 4977 4978 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4979 this = self._parse_field(any_token=True) 4980 kind = self._parse_types() 4981 path = self._parse_string() 4982 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4983 4984 return self.expression( 4985 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4986 ) 4987 4988 expressions = None 4989 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4990 self._match_l_paren() 4991 expressions = self._parse_csv(_parse_open_json_column_def) 4992 4993 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4994 4995 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4996 args = self._parse_csv(self._parse_bitwise) 4997 4998 if self._match(TokenType.IN): 4999 return self.expression( 5000 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5001 ) 5002 5003 if haystack_first: 5004 haystack = seq_get(args, 0) 5005 needle = seq_get(args, 1) 5006 else: 5007 needle = seq_get(args, 0) 5008 haystack = seq_get(args, 1) 5009 5010 return self.expression( 5011 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5012 ) 5013 5014 def _parse_predict(self) -> exp.Predict: 5015 self._match_text_seq("MODEL") 5016 this = self._parse_table() 5017 5018 self._match(TokenType.COMMA) 5019 self._match_text_seq("TABLE") 5020 5021 return self.expression( 5022 exp.Predict, 5023 this=this, 5024 expression=self._parse_table(), 5025 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5026 ) 5027 5028 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5029 args = self._parse_csv(self._parse_table) 5030 return exp.JoinHint(this=func_name.upper(), expressions=args) 5031 5032 def _parse_substring(self) -> exp.Substring: 5033 # Postgres supports the form: substring(string [from int] [for int]) 5034 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5035 5036 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5037 5038 if self._match(TokenType.FROM): 5039 args.append(self._parse_bitwise()) 5040 if self._match(TokenType.FOR): 5041 args.append(self._parse_bitwise()) 5042 5043 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5044 5045 def _parse_trim(self) -> exp.Trim: 5046 # https://www.w3resource.com/sql/character-functions/trim.php 5047 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5048 5049 position = None 5050 collation = None 5051 expression = None 5052 5053 if self._match_texts(self.TRIM_TYPES): 5054 position = self._prev.text.upper() 5055 5056 this = self._parse_bitwise() 5057 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5058 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5059 expression = self._parse_bitwise() 5060 5061 if invert_order: 5062 this, expression = expression, this 5063 5064 if self._match(TokenType.COLLATE): 5065 collation = self._parse_bitwise() 5066 5067 return self.expression( 5068 exp.Trim, this=this, position=position, expression=expression, collation=collation 5069 ) 5070 5071 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5072 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5073 5074 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5075 return self._parse_window(self._parse_id_var(), alias=True) 5076 5077 def _parse_respect_or_ignore_nulls( 5078 self, this: t.Optional[exp.Expression] 5079 ) -> t.Optional[exp.Expression]: 5080 if self._match_text_seq("IGNORE", "NULLS"): 5081 return self.expression(exp.IgnoreNulls, this=this) 5082 if self._match_text_seq("RESPECT", "NULLS"): 5083 return self.expression(exp.RespectNulls, this=this) 5084 return this 5085 5086 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5087 if self._match(TokenType.HAVING): 5088 self._match_texts(("MAX", "MIN")) 5089 max = self._prev.text.upper() != "MIN" 5090 return self.expression( 5091 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5092 ) 5093 5094 return this 5095 5096 def _parse_window( 5097 self, this: t.Optional[exp.Expression], alias: bool = False 5098 ) -> t.Optional[exp.Expression]: 5099 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5100 self._match(TokenType.WHERE) 5101 this = self.expression( 5102 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5103 ) 5104 self._match_r_paren() 5105 5106 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5107 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5108 if self._match_text_seq("WITHIN", "GROUP"): 5109 order = self._parse_wrapped(self._parse_order) 5110 this = self.expression(exp.WithinGroup, this=this, expression=order) 5111 5112 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5113 # Some dialects choose to implement and some do not. 5114 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5115 5116 # There is some code above in _parse_lambda that handles 5117 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5118 5119 # The below changes handle 5120 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5121 5122 # Oracle allows both formats 5123 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5124 # and Snowflake chose to do the same for familiarity 5125 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5126 if isinstance(this, exp.AggFunc): 5127 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5128 5129 if ignore_respect and ignore_respect is not this: 5130 ignore_respect.replace(ignore_respect.this) 5131 this = self.expression(ignore_respect.__class__, this=this) 5132 5133 this = self._parse_respect_or_ignore_nulls(this) 5134 5135 # bigquery select from window x AS (partition by ...) 5136 if alias: 5137 over = None 5138 self._match(TokenType.ALIAS) 5139 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5140 return this 5141 else: 5142 over = self._prev.text.upper() 5143 5144 if not self._match(TokenType.L_PAREN): 5145 return self.expression( 5146 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5147 ) 5148 5149 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5150 5151 first = self._match(TokenType.FIRST) 5152 if self._match_text_seq("LAST"): 5153 first = False 5154 5155 partition, order = self._parse_partition_and_order() 5156 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5157 5158 if kind: 5159 self._match(TokenType.BETWEEN) 5160 start = self._parse_window_spec() 5161 self._match(TokenType.AND) 5162 end = self._parse_window_spec() 5163 5164 spec = self.expression( 5165 exp.WindowSpec, 5166 kind=kind, 5167 start=start["value"], 5168 start_side=start["side"], 5169 end=end["value"], 5170 end_side=end["side"], 5171 ) 5172 else: 5173 spec = None 5174 5175 self._match_r_paren() 5176 5177 window = self.expression( 5178 exp.Window, 5179 this=this, 5180 partition_by=partition, 5181 order=order, 5182 spec=spec, 5183 alias=window_alias, 5184 over=over, 5185 first=first, 5186 ) 5187 5188 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5189 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5190 return self._parse_window(window, alias=alias) 5191 5192 return window 5193 5194 def _parse_partition_and_order( 5195 self, 5196 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5197 return self._parse_partition_by(), self._parse_order() 5198 5199 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5200 self._match(TokenType.BETWEEN) 5201 5202 return { 5203 "value": ( 5204 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5205 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5206 or self._parse_bitwise() 5207 ), 5208 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5209 } 5210 5211 def _parse_alias( 5212 self, this: t.Optional[exp.Expression], explicit: bool = False 5213 ) -> t.Optional[exp.Expression]: 5214 any_token = self._match(TokenType.ALIAS) 5215 comments = self._prev_comments 5216 5217 if explicit and not any_token: 5218 return this 5219 5220 if self._match(TokenType.L_PAREN): 5221 aliases = self.expression( 5222 exp.Aliases, 5223 comments=comments, 5224 this=this, 5225 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5226 ) 5227 self._match_r_paren(aliases) 5228 return aliases 5229 5230 alias = self._parse_id_var(any_token) or ( 5231 self.STRING_ALIASES and self._parse_string_as_identifier() 5232 ) 5233 5234 if alias: 5235 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5236 column = this.this 5237 5238 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5239 if not this.comments and column and column.comments: 5240 this.comments = column.comments 5241 column.comments = None 5242 5243 return this 5244 5245 def _parse_id_var( 5246 self, 5247 any_token: bool = True, 5248 tokens: t.Optional[t.Collection[TokenType]] = None, 5249 ) -> t.Optional[exp.Expression]: 5250 identifier = self._parse_identifier() 5251 5252 if identifier: 5253 return identifier 5254 5255 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5256 quoted = self._prev.token_type == TokenType.STRING 5257 return exp.Identifier(this=self._prev.text, quoted=quoted) 5258 5259 return None 5260 5261 def _parse_string(self) -> t.Optional[exp.Expression]: 5262 if self._match_set(self.STRING_PARSERS): 5263 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5264 return self._parse_placeholder() 5265 5266 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5267 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5268 5269 def _parse_number(self) -> t.Optional[exp.Expression]: 5270 if self._match_set(self.NUMERIC_PARSERS): 5271 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5272 return self._parse_placeholder() 5273 5274 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5275 if self._match(TokenType.IDENTIFIER): 5276 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5277 return self._parse_placeholder() 5278 5279 def _parse_var( 5280 self, 5281 any_token: bool = False, 5282 tokens: t.Optional[t.Collection[TokenType]] = None, 5283 upper: bool = False, 5284 ) -> t.Optional[exp.Expression]: 5285 if ( 5286 (any_token and self._advance_any()) 5287 or self._match(TokenType.VAR) 5288 or (self._match_set(tokens) if tokens else False) 5289 ): 5290 return self.expression( 5291 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5292 ) 5293 return self._parse_placeholder() 5294 5295 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5296 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5297 self._advance() 5298 return self._prev 5299 return None 5300 5301 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5302 return self._parse_var() or self._parse_string() 5303 5304 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5305 return self._parse_primary() or self._parse_var(any_token=True) 5306 5307 def _parse_null(self) -> t.Optional[exp.Expression]: 5308 if self._match_set(self.NULL_TOKENS): 5309 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5310 return self._parse_placeholder() 5311 5312 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5313 if self._match(TokenType.TRUE): 5314 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5315 if self._match(TokenType.FALSE): 5316 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5317 return self._parse_placeholder() 5318 5319 def _parse_star(self) -> t.Optional[exp.Expression]: 5320 if self._match(TokenType.STAR): 5321 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5322 return self._parse_placeholder() 5323 5324 def _parse_parameter(self) -> exp.Parameter: 5325 self._match(TokenType.L_BRACE) 5326 this = self._parse_identifier() or self._parse_primary_or_var() 5327 expression = self._match(TokenType.COLON) and ( 5328 self._parse_identifier() or self._parse_primary_or_var() 5329 ) 5330 self._match(TokenType.R_BRACE) 5331 return self.expression(exp.Parameter, this=this, expression=expression) 5332 5333 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5334 if self._match_set(self.PLACEHOLDER_PARSERS): 5335 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5336 if placeholder: 5337 return placeholder 5338 self._advance(-1) 5339 return None 5340 5341 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5342 if not self._match(TokenType.EXCEPT): 5343 return None 5344 if self._match(TokenType.L_PAREN, advance=False): 5345 return self._parse_wrapped_csv(self._parse_column) 5346 5347 except_column = self._parse_column() 5348 return [except_column] if except_column else None 5349 5350 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5351 if not self._match(TokenType.REPLACE): 5352 return None 5353 if self._match(TokenType.L_PAREN, advance=False): 5354 return self._parse_wrapped_csv(self._parse_expression) 5355 5356 replace_expression = self._parse_expression() 5357 return [replace_expression] if replace_expression else None 5358 5359 def _parse_csv( 5360 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5361 ) -> t.List[exp.Expression]: 5362 parse_result = parse_method() 5363 items = [parse_result] if parse_result is not None else [] 5364 5365 while self._match(sep): 5366 self._add_comments(parse_result) 5367 parse_result = parse_method() 5368 if parse_result is not None: 5369 items.append(parse_result) 5370 5371 return items 5372 5373 def _parse_tokens( 5374 self, parse_method: t.Callable, expressions: t.Dict 5375 ) -> t.Optional[exp.Expression]: 5376 this = parse_method() 5377 5378 while self._match_set(expressions): 5379 this = self.expression( 5380 expressions[self._prev.token_type], 5381 this=this, 5382 comments=self._prev_comments, 5383 expression=parse_method(), 5384 ) 5385 5386 return this 5387 5388 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5389 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5390 5391 def _parse_wrapped_csv( 5392 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5393 ) -> t.List[exp.Expression]: 5394 return self._parse_wrapped( 5395 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5396 ) 5397 5398 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5399 wrapped = self._match(TokenType.L_PAREN) 5400 if not wrapped and not optional: 5401 self.raise_error("Expecting (") 5402 parse_result = parse_method() 5403 if wrapped: 5404 self._match_r_paren() 5405 return parse_result 5406 5407 def _parse_expressions(self) -> t.List[exp.Expression]: 5408 return self._parse_csv(self._parse_expression) 5409 5410 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5411 return self._parse_select() or self._parse_set_operations( 5412 self._parse_expression() if alias else self._parse_conjunction() 5413 ) 5414 5415 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5416 return self._parse_query_modifiers( 5417 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5418 ) 5419 5420 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5421 this = None 5422 if self._match_texts(self.TRANSACTION_KIND): 5423 this = self._prev.text 5424 5425 self._match_texts(("TRANSACTION", "WORK")) 5426 5427 modes = [] 5428 while True: 5429 mode = [] 5430 while self._match(TokenType.VAR): 5431 mode.append(self._prev.text) 5432 5433 if mode: 5434 modes.append(" ".join(mode)) 5435 if not self._match(TokenType.COMMA): 5436 break 5437 5438 return self.expression(exp.Transaction, this=this, modes=modes) 5439 5440 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5441 chain = None 5442 savepoint = None 5443 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5444 5445 self._match_texts(("TRANSACTION", "WORK")) 5446 5447 if self._match_text_seq("TO"): 5448 self._match_text_seq("SAVEPOINT") 5449 savepoint = self._parse_id_var() 5450 5451 if self._match(TokenType.AND): 5452 chain = not self._match_text_seq("NO") 5453 self._match_text_seq("CHAIN") 5454 5455 if is_rollback: 5456 return self.expression(exp.Rollback, savepoint=savepoint) 5457 5458 return self.expression(exp.Commit, chain=chain) 5459 5460 def _parse_refresh(self) -> exp.Refresh: 5461 self._match(TokenType.TABLE) 5462 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5463 5464 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5465 if not self._match_text_seq("ADD"): 5466 return None 5467 5468 self._match(TokenType.COLUMN) 5469 exists_column = self._parse_exists(not_=True) 5470 expression = self._parse_field_def() 5471 5472 if expression: 5473 expression.set("exists", exists_column) 5474 5475 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5476 if self._match_texts(("FIRST", "AFTER")): 5477 position = self._prev.text 5478 column_position = self.expression( 5479 exp.ColumnPosition, this=self._parse_column(), position=position 5480 ) 5481 expression.set("position", column_position) 5482 5483 return expression 5484 5485 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5486 drop = self._match(TokenType.DROP) and self._parse_drop() 5487 if drop and not isinstance(drop, exp.Command): 5488 drop.set("kind", drop.args.get("kind", "COLUMN")) 5489 return drop 5490 5491 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5492 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5493 return self.expression( 5494 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5495 ) 5496 5497 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5498 index = self._index - 1 5499 5500 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5501 return self._parse_csv( 5502 lambda: self.expression( 5503 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5504 ) 5505 ) 5506 5507 self._retreat(index) 5508 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5509 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5510 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5511 5512 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5513 self._match(TokenType.COLUMN) 5514 column = self._parse_field(any_token=True) 5515 5516 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5517 return self.expression(exp.AlterColumn, this=column, drop=True) 5518 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5519 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5520 if self._match(TokenType.COMMENT): 5521 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5522 5523 self._match_text_seq("SET", "DATA") 5524 return self.expression( 5525 exp.AlterColumn, 5526 this=column, 5527 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5528 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5529 using=self._match(TokenType.USING) and self._parse_conjunction(), 5530 ) 5531 5532 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5533 index = self._index - 1 5534 5535 partition_exists = self._parse_exists() 5536 if self._match(TokenType.PARTITION, advance=False): 5537 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5538 5539 self._retreat(index) 5540 return self._parse_csv(self._parse_drop_column) 5541 5542 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5543 if self._match(TokenType.COLUMN): 5544 exists = self._parse_exists() 5545 old_column = self._parse_column() 5546 to = self._match_text_seq("TO") 5547 new_column = self._parse_column() 5548 5549 if old_column is None or to is None or new_column is None: 5550 return None 5551 5552 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5553 5554 self._match_text_seq("TO") 5555 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5556 5557 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5558 start = self._prev 5559 5560 if not self._match(TokenType.TABLE): 5561 return self._parse_as_command(start) 5562 5563 exists = self._parse_exists() 5564 only = self._match_text_seq("ONLY") 5565 this = self._parse_table(schema=True) 5566 5567 if self._next: 5568 self._advance() 5569 5570 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5571 if parser: 5572 actions = ensure_list(parser(self)) 5573 options = self._parse_csv(self._parse_property) 5574 5575 if not self._curr and actions: 5576 return self.expression( 5577 exp.AlterTable, 5578 this=this, 5579 exists=exists, 5580 actions=actions, 5581 only=only, 5582 options=options, 5583 ) 5584 5585 return self._parse_as_command(start) 5586 5587 def _parse_merge(self) -> exp.Merge: 5588 self._match(TokenType.INTO) 5589 target = self._parse_table() 5590 5591 if target and self._match(TokenType.ALIAS, advance=False): 5592 target.set("alias", self._parse_table_alias()) 5593 5594 self._match(TokenType.USING) 5595 using = self._parse_table() 5596 5597 self._match(TokenType.ON) 5598 on = self._parse_conjunction() 5599 5600 return self.expression( 5601 exp.Merge, 5602 this=target, 5603 using=using, 5604 on=on, 5605 expressions=self._parse_when_matched(), 5606 ) 5607 5608 def _parse_when_matched(self) -> t.List[exp.When]: 5609 whens = [] 5610 5611 while self._match(TokenType.WHEN): 5612 matched = not self._match(TokenType.NOT) 5613 self._match_text_seq("MATCHED") 5614 source = ( 5615 False 5616 if self._match_text_seq("BY", "TARGET") 5617 else self._match_text_seq("BY", "SOURCE") 5618 ) 5619 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5620 5621 self._match(TokenType.THEN) 5622 5623 if self._match(TokenType.INSERT): 5624 _this = self._parse_star() 5625 if _this: 5626 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5627 else: 5628 then = self.expression( 5629 exp.Insert, 5630 this=self._parse_value(), 5631 expression=self._match_text_seq("VALUES") and self._parse_value(), 5632 ) 5633 elif self._match(TokenType.UPDATE): 5634 expressions = self._parse_star() 5635 if expressions: 5636 then = self.expression(exp.Update, expressions=expressions) 5637 else: 5638 then = self.expression( 5639 exp.Update, 5640 expressions=self._match(TokenType.SET) 5641 and self._parse_csv(self._parse_equality), 5642 ) 5643 elif self._match(TokenType.DELETE): 5644 then = self.expression(exp.Var, this=self._prev.text) 5645 else: 5646 then = None 5647 5648 whens.append( 5649 self.expression( 5650 exp.When, 5651 matched=matched, 5652 source=source, 5653 condition=condition, 5654 then=then, 5655 ) 5656 ) 5657 return whens 5658 5659 def _parse_show(self) -> t.Optional[exp.Expression]: 5660 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5661 if parser: 5662 return parser(self) 5663 return self._parse_as_command(self._prev) 5664 5665 def _parse_set_item_assignment( 5666 self, kind: t.Optional[str] = None 5667 ) -> t.Optional[exp.Expression]: 5668 index = self._index 5669 5670 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5671 return self._parse_set_transaction(global_=kind == "GLOBAL") 5672 5673 left = self._parse_primary() or self._parse_id_var() 5674 assignment_delimiter = self._match_texts(("=", "TO")) 5675 5676 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5677 self._retreat(index) 5678 return None 5679 5680 right = self._parse_statement() or self._parse_id_var() 5681 this = self.expression(exp.EQ, this=left, expression=right) 5682 5683 return self.expression(exp.SetItem, this=this, kind=kind) 5684 5685 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5686 self._match_text_seq("TRANSACTION") 5687 characteristics = self._parse_csv( 5688 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5689 ) 5690 return self.expression( 5691 exp.SetItem, 5692 expressions=characteristics, 5693 kind="TRANSACTION", 5694 **{"global": global_}, # type: ignore 5695 ) 5696 5697 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5698 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5699 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5700 5701 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5702 index = self._index 5703 set_ = self.expression( 5704 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5705 ) 5706 5707 if self._curr: 5708 self._retreat(index) 5709 return self._parse_as_command(self._prev) 5710 5711 return set_ 5712 5713 def _parse_var_from_options( 5714 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5715 ) -> t.Optional[exp.Var]: 5716 start = self._curr 5717 if not start: 5718 return None 5719 5720 option = start.text.upper() 5721 continuations = options.get(option) 5722 5723 index = self._index 5724 self._advance() 5725 for keywords in continuations or []: 5726 if isinstance(keywords, str): 5727 keywords = (keywords,) 5728 5729 if self._match_text_seq(*keywords): 5730 option = f"{option} {' '.join(keywords)}" 5731 break 5732 else: 5733 if continuations or continuations is None: 5734 if raise_unmatched: 5735 self.raise_error(f"Unknown option {option}") 5736 5737 self._retreat(index) 5738 return None 5739 5740 return exp.var(option) 5741 5742 def _parse_as_command(self, start: Token) -> exp.Command: 5743 while self._curr: 5744 self._advance() 5745 text = self._find_sql(start, self._prev) 5746 size = len(start.text) 5747 self._warn_unsupported() 5748 return exp.Command(this=text[:size], expression=text[size:]) 5749 5750 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5751 settings = [] 5752 5753 self._match_l_paren() 5754 kind = self._parse_id_var() 5755 5756 if self._match(TokenType.L_PAREN): 5757 while True: 5758 key = self._parse_id_var() 5759 value = self._parse_primary() 5760 5761 if not key and value is None: 5762 break 5763 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5764 self._match(TokenType.R_PAREN) 5765 5766 self._match_r_paren() 5767 5768 return self.expression( 5769 exp.DictProperty, 5770 this=this, 5771 kind=kind.this if kind else None, 5772 settings=settings, 5773 ) 5774 5775 def _parse_dict_range(self, this: str) -> exp.DictRange: 5776 self._match_l_paren() 5777 has_min = self._match_text_seq("MIN") 5778 if has_min: 5779 min = self._parse_var() or self._parse_primary() 5780 self._match_text_seq("MAX") 5781 max = self._parse_var() or self._parse_primary() 5782 else: 5783 max = self._parse_var() or self._parse_primary() 5784 min = exp.Literal.number(0) 5785 self._match_r_paren() 5786 return self.expression(exp.DictRange, this=this, min=min, max=max) 5787 5788 def _parse_comprehension( 5789 self, this: t.Optional[exp.Expression] 5790 ) -> t.Optional[exp.Comprehension]: 5791 index = self._index 5792 expression = self._parse_column() 5793 if not self._match(TokenType.IN): 5794 self._retreat(index - 1) 5795 return None 5796 iterator = self._parse_column() 5797 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5798 return self.expression( 5799 exp.Comprehension, 5800 this=this, 5801 expression=expression, 5802 iterator=iterator, 5803 condition=condition, 5804 ) 5805 5806 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5807 if self._match(TokenType.HEREDOC_STRING): 5808 return self.expression(exp.Heredoc, this=self._prev.text) 5809 5810 if not self._match_text_seq("$"): 5811 return None 5812 5813 tags = ["$"] 5814 tag_text = None 5815 5816 if self._is_connected(): 5817 self._advance() 5818 tags.append(self._prev.text.upper()) 5819 else: 5820 self.raise_error("No closing $ found") 5821 5822 if tags[-1] != "$": 5823 if self._is_connected() and self._match_text_seq("$"): 5824 tag_text = tags[-1] 5825 tags.append("$") 5826 else: 5827 self.raise_error("No closing $ found") 5828 5829 heredoc_start = self._curr 5830 5831 while self._curr: 5832 if self._match_text_seq(*tags, advance=False): 5833 this = self._find_sql(heredoc_start, self._prev) 5834 self._advance(len(tags)) 5835 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5836 5837 self._advance() 5838 5839 self.raise_error(f"No closing {''.join(tags)} found") 5840 return None 5841 5842 def _find_parser( 5843 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5844 ) -> t.Optional[t.Callable]: 5845 if not self._curr: 5846 return None 5847 5848 index = self._index 5849 this = [] 5850 while True: 5851 # The current token might be multiple words 5852 curr = self._curr.text.upper() 5853 key = curr.split(" ") 5854 this.append(curr) 5855 5856 self._advance() 5857 result, trie = in_trie(trie, key) 5858 if result == TrieResult.FAILED: 5859 break 5860 5861 if result == TrieResult.EXISTS: 5862 subparser = parsers[" ".join(this)] 5863 return subparser 5864 5865 self._retreat(index) 5866 return None 5867 5868 def _match(self, token_type, advance=True, expression=None): 5869 if not self._curr: 5870 return None 5871 5872 if self._curr.token_type == token_type: 5873 if advance: 5874 self._advance() 5875 self._add_comments(expression) 5876 return True 5877 5878 return None 5879 5880 def _match_set(self, types, advance=True): 5881 if not self._curr: 5882 return None 5883 5884 if self._curr.token_type in types: 5885 if advance: 5886 self._advance() 5887 return True 5888 5889 return None 5890 5891 def _match_pair(self, token_type_a, token_type_b, advance=True): 5892 if not self._curr or not self._next: 5893 return None 5894 5895 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5896 if advance: 5897 self._advance(2) 5898 return True 5899 5900 return None 5901 5902 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5903 if not self._match(TokenType.L_PAREN, expression=expression): 5904 self.raise_error("Expecting (") 5905 5906 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5907 if not self._match(TokenType.R_PAREN, expression=expression): 5908 self.raise_error("Expecting )") 5909 5910 def _match_texts(self, texts, advance=True): 5911 if self._curr and self._curr.text.upper() in texts: 5912 if advance: 5913 self._advance() 5914 return True 5915 return None 5916 5917 def _match_text_seq(self, *texts, advance=True): 5918 index = self._index 5919 for text in texts: 5920 if self._curr and self._curr.text.upper() == text: 5921 self._advance() 5922 else: 5923 self._retreat(index) 5924 return None 5925 5926 if not advance: 5927 self._retreat(index) 5928 5929 return True 5930 5931 @t.overload 5932 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ... 5933 5934 @t.overload 5935 def _replace_columns_with_dots( 5936 self, this: t.Optional[exp.Expression] 5937 ) -> t.Optional[exp.Expression]: ... 5938 5939 def _replace_columns_with_dots(self, this): 5940 if isinstance(this, exp.Dot): 5941 exp.replace_children(this, self._replace_columns_with_dots) 5942 elif isinstance(this, exp.Column): 5943 exp.replace_children(this, self._replace_columns_with_dots) 5944 table = this.args.get("table") 5945 this = ( 5946 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5947 ) 5948 5949 return this 5950 5951 def _replace_lambda( 5952 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5953 ) -> t.Optional[exp.Expression]: 5954 if not node: 5955 return node 5956 5957 for column in node.find_all(exp.Column): 5958 if column.parts[0].name in lambda_variables: 5959 dot_or_id = column.to_dot() if column.table else column.this 5960 parent = column.parent 5961 5962 while isinstance(parent, exp.Dot): 5963 if not isinstance(parent.parent, exp.Dot): 5964 parent.replace(dot_or_id) 5965 break 5966 parent = parent.parent 5967 else: 5968 if column is node: 5969 node = dot_or_id 5970 else: 5971 column.replace(dot_or_id) 5972 return node 5973 5974 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 5975 start = self._prev 5976 5977 # Not to be confused with TRUNCATE(number, decimals) function call 5978 if self._match(TokenType.L_PAREN): 5979 self._retreat(self._index - 2) 5980 return self._parse_function() 5981 5982 # Clickhouse supports TRUNCATE DATABASE as well 5983 is_database = self._match(TokenType.DATABASE) 5984 5985 self._match(TokenType.TABLE) 5986 5987 exists = self._parse_exists(not_=False) 5988 5989 expressions = self._parse_csv( 5990 lambda: self._parse_table(schema=True, is_db_reference=is_database) 5991 ) 5992 5993 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5994 5995 if self._match_text_seq("RESTART", "IDENTITY"): 5996 identity = "RESTART" 5997 elif self._match_text_seq("CONTINUE", "IDENTITY"): 5998 identity = "CONTINUE" 5999 else: 6000 identity = None 6001 6002 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6003 option = self._prev.text 6004 else: 6005 option = None 6006 6007 partition = self._parse_partition() 6008 6009 # Fallback case 6010 if self._curr: 6011 return self._parse_as_command(start) 6012 6013 return self.expression( 6014 exp.TruncateTable, 6015 expressions=expressions, 6016 is_database=is_database, 6017 exists=exists, 6018 cluster=cluster, 6019 identity=identity, 6020 option=option, 6021 partition=partition, 6022 )
24def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 25 if len(args) == 1 and args[0].is_star: 26 return exp.StarMap(this=args[0]) 27 28 keys = [] 29 values = [] 30 for i in range(0, len(args), 2): 31 keys.append(args[i]) 32 values.append(args[i + 1]) 33 34 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
63def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 64 def _builder(args: t.List, dialect: Dialect) -> E: 65 expression = expr_type( 66 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 67 ) 68 if len(args) > 2 and expr_type is exp.JSONExtract: 69 expression.set("expressions", args[2:]) 70 71 return expression 72 73 return _builder
86class Parser(metaclass=_Parser): 87 """ 88 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 89 90 Args: 91 error_level: The desired error level. 92 Default: ErrorLevel.IMMEDIATE 93 error_message_context: The amount of context to capture from a query string when displaying 94 the error message (in number of characters). 95 Default: 100 96 max_errors: Maximum number of error messages to include in a raised ParseError. 97 This is only relevant if error_level is ErrorLevel.RAISE. 98 Default: 3 99 """ 100 101 FUNCTIONS: t.Dict[str, t.Callable] = { 102 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 103 "CONCAT": lambda args, dialect: exp.Concat( 104 expressions=args, 105 safe=not dialect.STRICT_STRING_CONCAT, 106 coalesce=dialect.CONCAT_COALESCE, 107 ), 108 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 109 expressions=args, 110 safe=not dialect.STRICT_STRING_CONCAT, 111 coalesce=dialect.CONCAT_COALESCE, 112 ), 113 "DATE_TO_DATE_STR": lambda args: exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 118 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 119 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 120 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 121 "LIKE": build_like, 122 "LOG": build_logarithm, 123 "TIME_TO_TIME_STR": lambda args: exp.Cast( 124 this=seq_get(args, 0), 125 to=exp.DataType(this=exp.DataType.Type.TEXT), 126 ), 127 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 128 this=exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 start=exp.Literal.number(1), 133 length=exp.Literal.number(10), 134 ), 135 "VAR_MAP": build_var_map, 136 } 137 138 NO_PAREN_FUNCTIONS = { 139 TokenType.CURRENT_DATE: exp.CurrentDate, 140 TokenType.CURRENT_DATETIME: exp.CurrentDate, 141 TokenType.CURRENT_TIME: exp.CurrentTime, 142 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 143 TokenType.CURRENT_USER: exp.CurrentUser, 144 } 145 146 STRUCT_TYPE_TOKENS = { 147 TokenType.NESTED, 148 TokenType.STRUCT, 149 } 150 151 NESTED_TYPE_TOKENS = { 152 TokenType.ARRAY, 153 TokenType.LOWCARDINALITY, 154 TokenType.MAP, 155 TokenType.NULLABLE, 156 *STRUCT_TYPE_TOKENS, 157 } 158 159 ENUM_TYPE_TOKENS = { 160 TokenType.ENUM, 161 TokenType.ENUM8, 162 TokenType.ENUM16, 163 } 164 165 AGGREGATE_TYPE_TOKENS = { 166 TokenType.AGGREGATEFUNCTION, 167 TokenType.SIMPLEAGGREGATEFUNCTION, 168 } 169 170 TYPE_TOKENS = { 171 TokenType.BIT, 172 TokenType.BOOLEAN, 173 TokenType.TINYINT, 174 TokenType.UTINYINT, 175 TokenType.SMALLINT, 176 TokenType.USMALLINT, 177 TokenType.INT, 178 TokenType.UINT, 179 TokenType.BIGINT, 180 TokenType.UBIGINT, 181 TokenType.INT128, 182 TokenType.UINT128, 183 TokenType.INT256, 184 TokenType.UINT256, 185 TokenType.MEDIUMINT, 186 TokenType.UMEDIUMINT, 187 TokenType.FIXEDSTRING, 188 TokenType.FLOAT, 189 TokenType.DOUBLE, 190 TokenType.CHAR, 191 TokenType.NCHAR, 192 TokenType.VARCHAR, 193 TokenType.NVARCHAR, 194 TokenType.BPCHAR, 195 TokenType.TEXT, 196 TokenType.MEDIUMTEXT, 197 TokenType.LONGTEXT, 198 TokenType.MEDIUMBLOB, 199 TokenType.LONGBLOB, 200 TokenType.BINARY, 201 TokenType.VARBINARY, 202 TokenType.JSON, 203 TokenType.JSONB, 204 TokenType.INTERVAL, 205 TokenType.TINYBLOB, 206 TokenType.TINYTEXT, 207 TokenType.TIME, 208 TokenType.TIMETZ, 209 TokenType.TIMESTAMP, 210 TokenType.TIMESTAMP_S, 211 TokenType.TIMESTAMP_MS, 212 TokenType.TIMESTAMP_NS, 213 TokenType.TIMESTAMPTZ, 214 TokenType.TIMESTAMPLTZ, 215 TokenType.DATETIME, 216 TokenType.DATETIME64, 217 TokenType.DATE, 218 TokenType.DATE32, 219 TokenType.INT4RANGE, 220 TokenType.INT4MULTIRANGE, 221 TokenType.INT8RANGE, 222 TokenType.INT8MULTIRANGE, 223 TokenType.NUMRANGE, 224 TokenType.NUMMULTIRANGE, 225 TokenType.TSRANGE, 226 TokenType.TSMULTIRANGE, 227 TokenType.TSTZRANGE, 228 TokenType.TSTZMULTIRANGE, 229 TokenType.DATERANGE, 230 TokenType.DATEMULTIRANGE, 231 TokenType.DECIMAL, 232 TokenType.UDECIMAL, 233 TokenType.BIGDECIMAL, 234 TokenType.UUID, 235 TokenType.GEOGRAPHY, 236 TokenType.GEOMETRY, 237 TokenType.HLLSKETCH, 238 TokenType.HSTORE, 239 TokenType.PSEUDO_TYPE, 240 TokenType.SUPER, 241 TokenType.SERIAL, 242 TokenType.SMALLSERIAL, 243 TokenType.BIGSERIAL, 244 TokenType.XML, 245 TokenType.YEAR, 246 TokenType.UNIQUEIDENTIFIER, 247 TokenType.USERDEFINED, 248 TokenType.MONEY, 249 TokenType.SMALLMONEY, 250 TokenType.ROWVERSION, 251 TokenType.IMAGE, 252 TokenType.VARIANT, 253 TokenType.OBJECT, 254 TokenType.OBJECT_IDENTIFIER, 255 TokenType.INET, 256 TokenType.IPADDRESS, 257 TokenType.IPPREFIX, 258 TokenType.IPV4, 259 TokenType.IPV6, 260 TokenType.UNKNOWN, 261 TokenType.NULL, 262 *ENUM_TYPE_TOKENS, 263 *NESTED_TYPE_TOKENS, 264 *AGGREGATE_TYPE_TOKENS, 265 } 266 267 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 268 TokenType.BIGINT: TokenType.UBIGINT, 269 TokenType.INT: TokenType.UINT, 270 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 271 TokenType.SMALLINT: TokenType.USMALLINT, 272 TokenType.TINYINT: TokenType.UTINYINT, 273 TokenType.DECIMAL: TokenType.UDECIMAL, 274 } 275 276 SUBQUERY_PREDICATES = { 277 TokenType.ANY: exp.Any, 278 TokenType.ALL: exp.All, 279 TokenType.EXISTS: exp.Exists, 280 TokenType.SOME: exp.Any, 281 } 282 283 RESERVED_TOKENS = { 284 *Tokenizer.SINGLE_TOKENS.values(), 285 TokenType.SELECT, 286 } 287 288 DB_CREATABLES = { 289 TokenType.DATABASE, 290 TokenType.SCHEMA, 291 TokenType.TABLE, 292 TokenType.VIEW, 293 TokenType.MODEL, 294 TokenType.DICTIONARY, 295 TokenType.STORAGE_INTEGRATION, 296 } 297 298 CREATABLES = { 299 TokenType.COLUMN, 300 TokenType.CONSTRAINT, 301 TokenType.FUNCTION, 302 TokenType.INDEX, 303 TokenType.PROCEDURE, 304 TokenType.FOREIGN_KEY, 305 *DB_CREATABLES, 306 } 307 308 # Tokens that can represent identifiers 309 ID_VAR_TOKENS = { 310 TokenType.VAR, 311 TokenType.ANTI, 312 TokenType.APPLY, 313 TokenType.ASC, 314 TokenType.AUTO_INCREMENT, 315 TokenType.BEGIN, 316 TokenType.BPCHAR, 317 TokenType.CACHE, 318 TokenType.CASE, 319 TokenType.COLLATE, 320 TokenType.COMMAND, 321 TokenType.COMMENT, 322 TokenType.COMMIT, 323 TokenType.CONSTRAINT, 324 TokenType.DEFAULT, 325 TokenType.DELETE, 326 TokenType.DESC, 327 TokenType.DESCRIBE, 328 TokenType.DICTIONARY, 329 TokenType.DIV, 330 TokenType.END, 331 TokenType.EXECUTE, 332 TokenType.ESCAPE, 333 TokenType.FALSE, 334 TokenType.FIRST, 335 TokenType.FILTER, 336 TokenType.FINAL, 337 TokenType.FORMAT, 338 TokenType.FULL, 339 TokenType.IS, 340 TokenType.ISNULL, 341 TokenType.INTERVAL, 342 TokenType.KEEP, 343 TokenType.KILL, 344 TokenType.LEFT, 345 TokenType.LOAD, 346 TokenType.MERGE, 347 TokenType.NATURAL, 348 TokenType.NEXT, 349 TokenType.OFFSET, 350 TokenType.OPERATOR, 351 TokenType.ORDINALITY, 352 TokenType.OVERLAPS, 353 TokenType.OVERWRITE, 354 TokenType.PARTITION, 355 TokenType.PERCENT, 356 TokenType.PIVOT, 357 TokenType.PRAGMA, 358 TokenType.RANGE, 359 TokenType.RECURSIVE, 360 TokenType.REFERENCES, 361 TokenType.REFRESH, 362 TokenType.REPLACE, 363 TokenType.RIGHT, 364 TokenType.ROW, 365 TokenType.ROWS, 366 TokenType.SEMI, 367 TokenType.SET, 368 TokenType.SETTINGS, 369 TokenType.SHOW, 370 TokenType.TEMPORARY, 371 TokenType.TOP, 372 TokenType.TRUE, 373 TokenType.TRUNCATE, 374 TokenType.UNIQUE, 375 TokenType.UNPIVOT, 376 TokenType.UPDATE, 377 TokenType.USE, 378 TokenType.VOLATILE, 379 TokenType.WINDOW, 380 *CREATABLES, 381 *SUBQUERY_PREDICATES, 382 *TYPE_TOKENS, 383 *NO_PAREN_FUNCTIONS, 384 } 385 386 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 387 388 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 389 TokenType.ANTI, 390 TokenType.APPLY, 391 TokenType.ASOF, 392 TokenType.FULL, 393 TokenType.LEFT, 394 TokenType.LOCK, 395 TokenType.NATURAL, 396 TokenType.OFFSET, 397 TokenType.RIGHT, 398 TokenType.SEMI, 399 TokenType.WINDOW, 400 } 401 402 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 403 404 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 405 406 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 407 408 FUNC_TOKENS = { 409 TokenType.COLLATE, 410 TokenType.COMMAND, 411 TokenType.CURRENT_DATE, 412 TokenType.CURRENT_DATETIME, 413 TokenType.CURRENT_TIMESTAMP, 414 TokenType.CURRENT_TIME, 415 TokenType.CURRENT_USER, 416 TokenType.FILTER, 417 TokenType.FIRST, 418 TokenType.FORMAT, 419 TokenType.GLOB, 420 TokenType.IDENTIFIER, 421 TokenType.INDEX, 422 TokenType.ISNULL, 423 TokenType.ILIKE, 424 TokenType.INSERT, 425 TokenType.LIKE, 426 TokenType.MERGE, 427 TokenType.OFFSET, 428 TokenType.PRIMARY_KEY, 429 TokenType.RANGE, 430 TokenType.REPLACE, 431 TokenType.RLIKE, 432 TokenType.ROW, 433 TokenType.UNNEST, 434 TokenType.VAR, 435 TokenType.LEFT, 436 TokenType.RIGHT, 437 TokenType.DATE, 438 TokenType.DATETIME, 439 TokenType.TABLE, 440 TokenType.TIMESTAMP, 441 TokenType.TIMESTAMPTZ, 442 TokenType.TRUNCATE, 443 TokenType.WINDOW, 444 TokenType.XOR, 445 *TYPE_TOKENS, 446 *SUBQUERY_PREDICATES, 447 } 448 449 CONJUNCTION = { 450 TokenType.AND: exp.And, 451 TokenType.OR: exp.Or, 452 } 453 454 EQUALITY = { 455 TokenType.COLON_EQ: exp.PropertyEQ, 456 TokenType.EQ: exp.EQ, 457 TokenType.NEQ: exp.NEQ, 458 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 459 } 460 461 COMPARISON = { 462 TokenType.GT: exp.GT, 463 TokenType.GTE: exp.GTE, 464 TokenType.LT: exp.LT, 465 TokenType.LTE: exp.LTE, 466 } 467 468 BITWISE = { 469 TokenType.AMP: exp.BitwiseAnd, 470 TokenType.CARET: exp.BitwiseXor, 471 TokenType.PIPE: exp.BitwiseOr, 472 } 473 474 TERM = { 475 TokenType.DASH: exp.Sub, 476 TokenType.PLUS: exp.Add, 477 TokenType.MOD: exp.Mod, 478 TokenType.COLLATE: exp.Collate, 479 } 480 481 FACTOR = { 482 TokenType.DIV: exp.IntDiv, 483 TokenType.LR_ARROW: exp.Distance, 484 TokenType.SLASH: exp.Div, 485 TokenType.STAR: exp.Mul, 486 } 487 488 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 489 490 TIMES = { 491 TokenType.TIME, 492 TokenType.TIMETZ, 493 } 494 495 TIMESTAMPS = { 496 TokenType.TIMESTAMP, 497 TokenType.TIMESTAMPTZ, 498 TokenType.TIMESTAMPLTZ, 499 *TIMES, 500 } 501 502 SET_OPERATIONS = { 503 TokenType.UNION, 504 TokenType.INTERSECT, 505 TokenType.EXCEPT, 506 } 507 508 JOIN_METHODS = { 509 TokenType.NATURAL, 510 TokenType.ASOF, 511 } 512 513 JOIN_SIDES = { 514 TokenType.LEFT, 515 TokenType.RIGHT, 516 TokenType.FULL, 517 } 518 519 JOIN_KINDS = { 520 TokenType.INNER, 521 TokenType.OUTER, 522 TokenType.CROSS, 523 TokenType.SEMI, 524 TokenType.ANTI, 525 } 526 527 JOIN_HINTS: t.Set[str] = set() 528 529 LAMBDAS = { 530 TokenType.ARROW: lambda self, expressions: self.expression( 531 exp.Lambda, 532 this=self._replace_lambda( 533 self._parse_conjunction(), 534 {node.name for node in expressions}, 535 ), 536 expressions=expressions, 537 ), 538 TokenType.FARROW: lambda self, expressions: self.expression( 539 exp.Kwarg, 540 this=exp.var(expressions[0].name), 541 expression=self._parse_conjunction(), 542 ), 543 } 544 545 COLUMN_OPERATORS = { 546 TokenType.DOT: None, 547 TokenType.DCOLON: lambda self, this, to: self.expression( 548 exp.Cast if self.STRICT_CAST else exp.TryCast, 549 this=this, 550 to=to, 551 ), 552 TokenType.ARROW: lambda self, this, path: self.expression( 553 exp.JSONExtract, 554 this=this, 555 expression=self.dialect.to_json_path(path), 556 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 557 ), 558 TokenType.DARROW: lambda self, this, path: self.expression( 559 exp.JSONExtractScalar, 560 this=this, 561 expression=self.dialect.to_json_path(path), 562 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 563 ), 564 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 565 exp.JSONBExtract, 566 this=this, 567 expression=path, 568 ), 569 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 570 exp.JSONBExtractScalar, 571 this=this, 572 expression=path, 573 ), 574 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 575 exp.JSONBContains, 576 this=this, 577 expression=key, 578 ), 579 } 580 581 EXPRESSION_PARSERS = { 582 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 583 exp.Column: lambda self: self._parse_column(), 584 exp.Condition: lambda self: self._parse_conjunction(), 585 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 586 exp.Expression: lambda self: self._parse_expression(), 587 exp.From: lambda self: self._parse_from(), 588 exp.Group: lambda self: self._parse_group(), 589 exp.Having: lambda self: self._parse_having(), 590 exp.Identifier: lambda self: self._parse_id_var(), 591 exp.Join: lambda self: self._parse_join(), 592 exp.Lambda: lambda self: self._parse_lambda(), 593 exp.Lateral: lambda self: self._parse_lateral(), 594 exp.Limit: lambda self: self._parse_limit(), 595 exp.Offset: lambda self: self._parse_offset(), 596 exp.Order: lambda self: self._parse_order(), 597 exp.Ordered: lambda self: self._parse_ordered(), 598 exp.Properties: lambda self: self._parse_properties(), 599 exp.Qualify: lambda self: self._parse_qualify(), 600 exp.Returning: lambda self: self._parse_returning(), 601 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 602 exp.Table: lambda self: self._parse_table_parts(), 603 exp.TableAlias: lambda self: self._parse_table_alias(), 604 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 605 exp.Where: lambda self: self._parse_where(), 606 exp.Window: lambda self: self._parse_named_window(), 607 exp.With: lambda self: self._parse_with(), 608 "JOIN_TYPE": lambda self: self._parse_join_parts(), 609 } 610 611 STATEMENT_PARSERS = { 612 TokenType.ALTER: lambda self: self._parse_alter(), 613 TokenType.BEGIN: lambda self: self._parse_transaction(), 614 TokenType.CACHE: lambda self: self._parse_cache(), 615 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 616 TokenType.COMMENT: lambda self: self._parse_comment(), 617 TokenType.CREATE: lambda self: self._parse_create(), 618 TokenType.DELETE: lambda self: self._parse_delete(), 619 TokenType.DESC: lambda self: self._parse_describe(), 620 TokenType.DESCRIBE: lambda self: self._parse_describe(), 621 TokenType.DROP: lambda self: self._parse_drop(), 622 TokenType.INSERT: lambda self: self._parse_insert(), 623 TokenType.KILL: lambda self: self._parse_kill(), 624 TokenType.LOAD: lambda self: self._parse_load(), 625 TokenType.MERGE: lambda self: self._parse_merge(), 626 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 627 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 628 TokenType.REFRESH: lambda self: self._parse_refresh(), 629 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 630 TokenType.SET: lambda self: self._parse_set(), 631 TokenType.UNCACHE: lambda self: self._parse_uncache(), 632 TokenType.UPDATE: lambda self: self._parse_update(), 633 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 634 TokenType.USE: lambda self: self.expression( 635 exp.Use, 636 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 637 this=self._parse_table(schema=False), 638 ), 639 } 640 641 UNARY_PARSERS = { 642 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 643 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 644 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 645 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 646 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 647 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 648 } 649 650 STRING_PARSERS = { 651 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 652 exp.RawString, this=token.text 653 ), 654 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 655 exp.National, this=token.text 656 ), 657 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 658 TokenType.STRING: lambda self, token: self.expression( 659 exp.Literal, this=token.text, is_string=True 660 ), 661 TokenType.UNICODE_STRING: lambda self, token: self.expression( 662 exp.UnicodeString, 663 this=token.text, 664 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 665 ), 666 } 667 668 NUMERIC_PARSERS = { 669 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 670 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 671 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 672 TokenType.NUMBER: lambda self, token: self.expression( 673 exp.Literal, this=token.text, is_string=False 674 ), 675 } 676 677 PRIMARY_PARSERS = { 678 **STRING_PARSERS, 679 **NUMERIC_PARSERS, 680 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 681 TokenType.NULL: lambda self, _: self.expression(exp.Null), 682 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 683 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 684 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 685 TokenType.STAR: lambda self, _: self.expression( 686 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 687 ), 688 } 689 690 PLACEHOLDER_PARSERS = { 691 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 692 TokenType.PARAMETER: lambda self: self._parse_parameter(), 693 TokenType.COLON: lambda self: ( 694 self.expression(exp.Placeholder, this=self._prev.text) 695 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 696 else None 697 ), 698 } 699 700 RANGE_PARSERS = { 701 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 702 TokenType.GLOB: binary_range_parser(exp.Glob), 703 TokenType.ILIKE: binary_range_parser(exp.ILike), 704 TokenType.IN: lambda self, this: self._parse_in(this), 705 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 706 TokenType.IS: lambda self, this: self._parse_is(this), 707 TokenType.LIKE: binary_range_parser(exp.Like), 708 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 709 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 710 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 711 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 712 } 713 714 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 715 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 716 "AUTO": lambda self: self._parse_auto_property(), 717 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 718 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 719 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 720 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 721 "CHECKSUM": lambda self: self._parse_checksum(), 722 "CLUSTER BY": lambda self: self._parse_cluster(), 723 "CLUSTERED": lambda self: self._parse_clustered_by(), 724 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 725 exp.CollateProperty, **kwargs 726 ), 727 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 728 "CONTAINS": lambda self: self._parse_contains_property(), 729 "COPY": lambda self: self._parse_copy_property(), 730 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 731 "DEFINER": lambda self: self._parse_definer(), 732 "DETERMINISTIC": lambda self: self.expression( 733 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 734 ), 735 "DISTKEY": lambda self: self._parse_distkey(), 736 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 737 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 738 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 739 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 740 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 741 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 742 "FREESPACE": lambda self: self._parse_freespace(), 743 "HEAP": lambda self: self.expression(exp.HeapProperty), 744 "IMMUTABLE": lambda self: self.expression( 745 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 746 ), 747 "INHERITS": lambda self: self.expression( 748 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 749 ), 750 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 751 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 752 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 753 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 754 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 755 "LIKE": lambda self: self._parse_create_like(), 756 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 757 "LOCK": lambda self: self._parse_locking(), 758 "LOCKING": lambda self: self._parse_locking(), 759 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 760 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 761 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 762 "MODIFIES": lambda self: self._parse_modifies_property(), 763 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 764 "NO": lambda self: self._parse_no_property(), 765 "ON": lambda self: self._parse_on_property(), 766 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 767 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 768 "PARTITION": lambda self: self._parse_partitioned_of(), 769 "PARTITION BY": lambda self: self._parse_partitioned_by(), 770 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 771 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 772 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 773 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 774 "READS": lambda self: self._parse_reads_property(), 775 "REMOTE": lambda self: self._parse_remote_with_connection(), 776 "RETURNS": lambda self: self._parse_returns(), 777 "ROW": lambda self: self._parse_row(), 778 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 779 "SAMPLE": lambda self: self.expression( 780 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 781 ), 782 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 783 "SETTINGS": lambda self: self.expression( 784 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 785 ), 786 "SORTKEY": lambda self: self._parse_sortkey(), 787 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 788 "STABLE": lambda self: self.expression( 789 exp.StabilityProperty, this=exp.Literal.string("STABLE") 790 ), 791 "STORED": lambda self: self._parse_stored(), 792 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 793 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 794 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 795 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 796 "TO": lambda self: self._parse_to_table(), 797 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 798 "TRANSFORM": lambda self: self.expression( 799 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 800 ), 801 "TTL": lambda self: self._parse_ttl(), 802 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 803 "VOLATILE": lambda self: self._parse_volatile_property(), 804 "WITH": lambda self: self._parse_with_property(), 805 } 806 807 CONSTRAINT_PARSERS = { 808 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 809 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 810 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 811 "CHARACTER SET": lambda self: self.expression( 812 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 813 ), 814 "CHECK": lambda self: self.expression( 815 exp.CheckColumnConstraint, 816 this=self._parse_wrapped(self._parse_conjunction), 817 enforced=self._match_text_seq("ENFORCED"), 818 ), 819 "COLLATE": lambda self: self.expression( 820 exp.CollateColumnConstraint, this=self._parse_var() 821 ), 822 "COMMENT": lambda self: self.expression( 823 exp.CommentColumnConstraint, this=self._parse_string() 824 ), 825 "COMPRESS": lambda self: self._parse_compress(), 826 "CLUSTERED": lambda self: self.expression( 827 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 828 ), 829 "NONCLUSTERED": lambda self: self.expression( 830 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 831 ), 832 "DEFAULT": lambda self: self.expression( 833 exp.DefaultColumnConstraint, this=self._parse_bitwise() 834 ), 835 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 836 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 837 "FORMAT": lambda self: self.expression( 838 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 839 ), 840 "GENERATED": lambda self: self._parse_generated_as_identity(), 841 "IDENTITY": lambda self: self._parse_auto_increment(), 842 "INLINE": lambda self: self._parse_inline(), 843 "LIKE": lambda self: self._parse_create_like(), 844 "NOT": lambda self: self._parse_not_constraint(), 845 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 846 "ON": lambda self: ( 847 self._match(TokenType.UPDATE) 848 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 849 ) 850 or self.expression(exp.OnProperty, this=self._parse_id_var()), 851 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 852 "PERIOD": lambda self: self._parse_period_for_system_time(), 853 "PRIMARY KEY": lambda self: self._parse_primary_key(), 854 "REFERENCES": lambda self: self._parse_references(match=False), 855 "TITLE": lambda self: self.expression( 856 exp.TitleColumnConstraint, this=self._parse_var_or_string() 857 ), 858 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 859 "UNIQUE": lambda self: self._parse_unique(), 860 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 861 "WITH": lambda self: self.expression( 862 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 863 ), 864 } 865 866 ALTER_PARSERS = { 867 "ADD": lambda self: self._parse_alter_table_add(), 868 "ALTER": lambda self: self._parse_alter_table_alter(), 869 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 870 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 871 "DROP": lambda self: self._parse_alter_table_drop(), 872 "RENAME": lambda self: self._parse_alter_table_rename(), 873 } 874 875 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 876 877 NO_PAREN_FUNCTION_PARSERS = { 878 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 879 "CASE": lambda self: self._parse_case(), 880 "IF": lambda self: self._parse_if(), 881 "NEXT": lambda self: self._parse_next_value_for(), 882 } 883 884 INVALID_FUNC_NAME_TOKENS = { 885 TokenType.IDENTIFIER, 886 TokenType.STRING, 887 } 888 889 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 890 891 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 892 893 FUNCTION_PARSERS = { 894 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 895 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 896 "DECODE": lambda self: self._parse_decode(), 897 "EXTRACT": lambda self: self._parse_extract(), 898 "JSON_OBJECT": lambda self: self._parse_json_object(), 899 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 900 "JSON_TABLE": lambda self: self._parse_json_table(), 901 "MATCH": lambda self: self._parse_match_against(), 902 "OPENJSON": lambda self: self._parse_open_json(), 903 "POSITION": lambda self: self._parse_position(), 904 "PREDICT": lambda self: self._parse_predict(), 905 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 906 "STRING_AGG": lambda self: self._parse_string_agg(), 907 "SUBSTRING": lambda self: self._parse_substring(), 908 "TRIM": lambda self: self._parse_trim(), 909 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 910 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 911 } 912 913 QUERY_MODIFIER_PARSERS = { 914 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 915 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 916 TokenType.WHERE: lambda self: ("where", self._parse_where()), 917 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 918 TokenType.HAVING: lambda self: ("having", self._parse_having()), 919 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 920 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 921 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 922 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 923 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 924 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 925 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 926 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 927 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 928 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 929 TokenType.CLUSTER_BY: lambda self: ( 930 "cluster", 931 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 932 ), 933 TokenType.DISTRIBUTE_BY: lambda self: ( 934 "distribute", 935 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 936 ), 937 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 938 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 939 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 940 } 941 942 SET_PARSERS = { 943 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 944 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 945 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 946 "TRANSACTION": lambda self: self._parse_set_transaction(), 947 } 948 949 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 950 951 TYPE_LITERAL_PARSERS = { 952 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 953 } 954 955 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 956 957 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 958 959 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 960 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 961 "ISOLATION": ( 962 ("LEVEL", "REPEATABLE", "READ"), 963 ("LEVEL", "READ", "COMMITTED"), 964 ("LEVEL", "READ", "UNCOMITTED"), 965 ("LEVEL", "SERIALIZABLE"), 966 ), 967 "READ": ("WRITE", "ONLY"), 968 } 969 970 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 971 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 972 ) 973 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 974 975 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 976 977 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 978 979 CLONE_KEYWORDS = {"CLONE", "COPY"} 980 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 981 982 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 983 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 984 985 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 986 987 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 988 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 989 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 990 991 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 992 993 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 994 995 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 996 997 DISTINCT_TOKENS = {TokenType.DISTINCT} 998 999 NULL_TOKENS = {TokenType.NULL} 1000 1001 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1002 1003 STRICT_CAST = True 1004 1005 PREFIXED_PIVOT_COLUMNS = False 1006 IDENTIFY_PIVOT_STRINGS = False 1007 1008 LOG_DEFAULTS_TO_LN = False 1009 1010 # Whether ADD is present for each column added by ALTER TABLE 1011 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1012 1013 # Whether the table sample clause expects CSV syntax 1014 TABLESAMPLE_CSV = False 1015 1016 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1017 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1018 1019 # Whether the TRIM function expects the characters to trim as its first argument 1020 TRIM_PATTERN_FIRST = False 1021 1022 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1023 STRING_ALIASES = False 1024 1025 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1026 MODIFIERS_ATTACHED_TO_UNION = True 1027 UNION_MODIFIERS = {"order", "limit", "offset"} 1028 1029 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1030 NO_PAREN_IF_COMMANDS = True 1031 1032 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1033 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1034 1035 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1036 # If this is True and '(' is not found, the keyword will be treated as an identifier 1037 VALUES_FOLLOWED_BY_PAREN = True 1038 1039 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1040 SUPPORTS_IMPLICIT_UNNEST = False 1041 1042 __slots__ = ( 1043 "error_level", 1044 "error_message_context", 1045 "max_errors", 1046 "dialect", 1047 "sql", 1048 "errors", 1049 "_tokens", 1050 "_index", 1051 "_curr", 1052 "_next", 1053 "_prev", 1054 "_prev_comments", 1055 ) 1056 1057 # Autofilled 1058 SHOW_TRIE: t.Dict = {} 1059 SET_TRIE: t.Dict = {} 1060 1061 def __init__( 1062 self, 1063 error_level: t.Optional[ErrorLevel] = None, 1064 error_message_context: int = 100, 1065 max_errors: int = 3, 1066 dialect: DialectType = None, 1067 ): 1068 from sqlglot.dialects import Dialect 1069 1070 self.error_level = error_level or ErrorLevel.IMMEDIATE 1071 self.error_message_context = error_message_context 1072 self.max_errors = max_errors 1073 self.dialect = Dialect.get_or_raise(dialect) 1074 self.reset() 1075 1076 def reset(self): 1077 self.sql = "" 1078 self.errors = [] 1079 self._tokens = [] 1080 self._index = 0 1081 self._curr = None 1082 self._next = None 1083 self._prev = None 1084 self._prev_comments = None 1085 1086 def parse( 1087 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1088 ) -> t.List[t.Optional[exp.Expression]]: 1089 """ 1090 Parses a list of tokens and returns a list of syntax trees, one tree 1091 per parsed SQL statement. 1092 1093 Args: 1094 raw_tokens: The list of tokens. 1095 sql: The original SQL string, used to produce helpful debug messages. 1096 1097 Returns: 1098 The list of the produced syntax trees. 1099 """ 1100 return self._parse( 1101 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1102 ) 1103 1104 def parse_into( 1105 self, 1106 expression_types: exp.IntoType, 1107 raw_tokens: t.List[Token], 1108 sql: t.Optional[str] = None, 1109 ) -> t.List[t.Optional[exp.Expression]]: 1110 """ 1111 Parses a list of tokens into a given Expression type. If a collection of Expression 1112 types is given instead, this method will try to parse the token list into each one 1113 of them, stopping at the first for which the parsing succeeds. 1114 1115 Args: 1116 expression_types: The expression type(s) to try and parse the token list into. 1117 raw_tokens: The list of tokens. 1118 sql: The original SQL string, used to produce helpful debug messages. 1119 1120 Returns: 1121 The target Expression. 1122 """ 1123 errors = [] 1124 for expression_type in ensure_list(expression_types): 1125 parser = self.EXPRESSION_PARSERS.get(expression_type) 1126 if not parser: 1127 raise TypeError(f"No parser registered for {expression_type}") 1128 1129 try: 1130 return self._parse(parser, raw_tokens, sql) 1131 except ParseError as e: 1132 e.errors[0]["into_expression"] = expression_type 1133 errors.append(e) 1134 1135 raise ParseError( 1136 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1137 errors=merge_errors(errors), 1138 ) from errors[-1] 1139 1140 def _parse( 1141 self, 1142 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1143 raw_tokens: t.List[Token], 1144 sql: t.Optional[str] = None, 1145 ) -> t.List[t.Optional[exp.Expression]]: 1146 self.reset() 1147 self.sql = sql or "" 1148 1149 total = len(raw_tokens) 1150 chunks: t.List[t.List[Token]] = [[]] 1151 1152 for i, token in enumerate(raw_tokens): 1153 if token.token_type == TokenType.SEMICOLON: 1154 if i < total - 1: 1155 chunks.append([]) 1156 else: 1157 chunks[-1].append(token) 1158 1159 expressions = [] 1160 1161 for tokens in chunks: 1162 self._index = -1 1163 self._tokens = tokens 1164 self._advance() 1165 1166 expressions.append(parse_method(self)) 1167 1168 if self._index < len(self._tokens): 1169 self.raise_error("Invalid expression / Unexpected token") 1170 1171 self.check_errors() 1172 1173 return expressions 1174 1175 def check_errors(self) -> None: 1176 """Logs or raises any found errors, depending on the chosen error level setting.""" 1177 if self.error_level == ErrorLevel.WARN: 1178 for error in self.errors: 1179 logger.error(str(error)) 1180 elif self.error_level == ErrorLevel.RAISE and self.errors: 1181 raise ParseError( 1182 concat_messages(self.errors, self.max_errors), 1183 errors=merge_errors(self.errors), 1184 ) 1185 1186 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1187 """ 1188 Appends an error in the list of recorded errors or raises it, depending on the chosen 1189 error level setting. 1190 """ 1191 token = token or self._curr or self._prev or Token.string("") 1192 start = token.start 1193 end = token.end + 1 1194 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1195 highlight = self.sql[start:end] 1196 end_context = self.sql[end : end + self.error_message_context] 1197 1198 error = ParseError.new( 1199 f"{message}. Line {token.line}, Col: {token.col}.\n" 1200 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1201 description=message, 1202 line=token.line, 1203 col=token.col, 1204 start_context=start_context, 1205 highlight=highlight, 1206 end_context=end_context, 1207 ) 1208 1209 if self.error_level == ErrorLevel.IMMEDIATE: 1210 raise error 1211 1212 self.errors.append(error) 1213 1214 def expression( 1215 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1216 ) -> E: 1217 """ 1218 Creates a new, validated Expression. 1219 1220 Args: 1221 exp_class: The expression class to instantiate. 1222 comments: An optional list of comments to attach to the expression. 1223 kwargs: The arguments to set for the expression along with their respective values. 1224 1225 Returns: 1226 The target expression. 1227 """ 1228 instance = exp_class(**kwargs) 1229 instance.add_comments(comments) if comments else self._add_comments(instance) 1230 return self.validate_expression(instance) 1231 1232 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1233 if expression and self._prev_comments: 1234 expression.add_comments(self._prev_comments) 1235 self._prev_comments = None 1236 1237 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1238 """ 1239 Validates an Expression, making sure that all its mandatory arguments are set. 1240 1241 Args: 1242 expression: The expression to validate. 1243 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1244 1245 Returns: 1246 The validated expression. 1247 """ 1248 if self.error_level != ErrorLevel.IGNORE: 1249 for error_message in expression.error_messages(args): 1250 self.raise_error(error_message) 1251 1252 return expression 1253 1254 def _find_sql(self, start: Token, end: Token) -> str: 1255 return self.sql[start.start : end.end + 1] 1256 1257 def _is_connected(self) -> bool: 1258 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1259 1260 def _advance(self, times: int = 1) -> None: 1261 self._index += times 1262 self._curr = seq_get(self._tokens, self._index) 1263 self._next = seq_get(self._tokens, self._index + 1) 1264 1265 if self._index > 0: 1266 self._prev = self._tokens[self._index - 1] 1267 self._prev_comments = self._prev.comments 1268 else: 1269 self._prev = None 1270 self._prev_comments = None 1271 1272 def _retreat(self, index: int) -> None: 1273 if index != self._index: 1274 self._advance(index - self._index) 1275 1276 def _warn_unsupported(self) -> None: 1277 if len(self._tokens) <= 1: 1278 return 1279 1280 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1281 # interested in emitting a warning for the one being currently processed. 1282 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1283 1284 logger.warning( 1285 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1286 ) 1287 1288 def _parse_command(self) -> exp.Command: 1289 self._warn_unsupported() 1290 return self.expression( 1291 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1292 ) 1293 1294 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1295 start = self._prev 1296 exists = self._parse_exists() if allow_exists else None 1297 1298 self._match(TokenType.ON) 1299 1300 kind = self._match_set(self.CREATABLES) and self._prev 1301 if not kind: 1302 return self._parse_as_command(start) 1303 1304 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1305 this = self._parse_user_defined_function(kind=kind.token_type) 1306 elif kind.token_type == TokenType.TABLE: 1307 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1308 elif kind.token_type == TokenType.COLUMN: 1309 this = self._parse_column() 1310 else: 1311 this = self._parse_id_var() 1312 1313 self._match(TokenType.IS) 1314 1315 return self.expression( 1316 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1317 ) 1318 1319 def _parse_to_table( 1320 self, 1321 ) -> exp.ToTableProperty: 1322 table = self._parse_table_parts(schema=True) 1323 return self.expression(exp.ToTableProperty, this=table) 1324 1325 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1326 def _parse_ttl(self) -> exp.Expression: 1327 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1328 this = self._parse_bitwise() 1329 1330 if self._match_text_seq("DELETE"): 1331 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1332 if self._match_text_seq("RECOMPRESS"): 1333 return self.expression( 1334 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1335 ) 1336 if self._match_text_seq("TO", "DISK"): 1337 return self.expression( 1338 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1339 ) 1340 if self._match_text_seq("TO", "VOLUME"): 1341 return self.expression( 1342 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1343 ) 1344 1345 return this 1346 1347 expressions = self._parse_csv(_parse_ttl_action) 1348 where = self._parse_where() 1349 group = self._parse_group() 1350 1351 aggregates = None 1352 if group and self._match(TokenType.SET): 1353 aggregates = self._parse_csv(self._parse_set_item) 1354 1355 return self.expression( 1356 exp.MergeTreeTTL, 1357 expressions=expressions, 1358 where=where, 1359 group=group, 1360 aggregates=aggregates, 1361 ) 1362 1363 def _parse_statement(self) -> t.Optional[exp.Expression]: 1364 if self._curr is None: 1365 return None 1366 1367 if self._match_set(self.STATEMENT_PARSERS): 1368 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1369 1370 if self._match_set(Tokenizer.COMMANDS): 1371 return self._parse_command() 1372 1373 expression = self._parse_expression() 1374 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1375 return self._parse_query_modifiers(expression) 1376 1377 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1378 start = self._prev 1379 temporary = self._match(TokenType.TEMPORARY) 1380 materialized = self._match_text_seq("MATERIALIZED") 1381 1382 kind = self._match_set(self.CREATABLES) and self._prev.text 1383 if not kind: 1384 return self._parse_as_command(start) 1385 1386 return self.expression( 1387 exp.Drop, 1388 comments=start.comments, 1389 exists=exists or self._parse_exists(), 1390 this=self._parse_table( 1391 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1392 ), 1393 kind=kind, 1394 temporary=temporary, 1395 materialized=materialized, 1396 cascade=self._match_text_seq("CASCADE"), 1397 constraints=self._match_text_seq("CONSTRAINTS"), 1398 purge=self._match_text_seq("PURGE"), 1399 ) 1400 1401 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1402 return ( 1403 self._match_text_seq("IF") 1404 and (not not_ or self._match(TokenType.NOT)) 1405 and self._match(TokenType.EXISTS) 1406 ) 1407 1408 def _parse_create(self) -> exp.Create | exp.Command: 1409 # Note: this can't be None because we've matched a statement parser 1410 start = self._prev 1411 comments = self._prev_comments 1412 1413 replace = ( 1414 start.token_type == TokenType.REPLACE 1415 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1416 or self._match_pair(TokenType.OR, TokenType.ALTER) 1417 ) 1418 unique = self._match(TokenType.UNIQUE) 1419 1420 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1421 self._advance() 1422 1423 properties = None 1424 create_token = self._match_set(self.CREATABLES) and self._prev 1425 1426 if not create_token: 1427 # exp.Properties.Location.POST_CREATE 1428 properties = self._parse_properties() 1429 create_token = self._match_set(self.CREATABLES) and self._prev 1430 1431 if not properties or not create_token: 1432 return self._parse_as_command(start) 1433 1434 exists = self._parse_exists(not_=True) 1435 this = None 1436 expression: t.Optional[exp.Expression] = None 1437 indexes = None 1438 no_schema_binding = None 1439 begin = None 1440 end = None 1441 clone = None 1442 1443 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1444 nonlocal properties 1445 if properties and temp_props: 1446 properties.expressions.extend(temp_props.expressions) 1447 elif temp_props: 1448 properties = temp_props 1449 1450 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1451 this = self._parse_user_defined_function(kind=create_token.token_type) 1452 1453 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1454 extend_props(self._parse_properties()) 1455 1456 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1457 1458 if not expression: 1459 if self._match(TokenType.COMMAND): 1460 expression = self._parse_as_command(self._prev) 1461 else: 1462 begin = self._match(TokenType.BEGIN) 1463 return_ = self._match_text_seq("RETURN") 1464 1465 if self._match(TokenType.STRING, advance=False): 1466 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1467 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1468 expression = self._parse_string() 1469 extend_props(self._parse_properties()) 1470 else: 1471 expression = self._parse_statement() 1472 1473 end = self._match_text_seq("END") 1474 1475 if return_: 1476 expression = self.expression(exp.Return, this=expression) 1477 elif create_token.token_type == TokenType.INDEX: 1478 this = self._parse_index(index=self._parse_id_var()) 1479 elif create_token.token_type in self.DB_CREATABLES: 1480 table_parts = self._parse_table_parts( 1481 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1482 ) 1483 1484 # exp.Properties.Location.POST_NAME 1485 self._match(TokenType.COMMA) 1486 extend_props(self._parse_properties(before=True)) 1487 1488 this = self._parse_schema(this=table_parts) 1489 1490 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1491 extend_props(self._parse_properties()) 1492 1493 self._match(TokenType.ALIAS) 1494 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1495 # exp.Properties.Location.POST_ALIAS 1496 extend_props(self._parse_properties()) 1497 1498 expression = self._parse_ddl_select() 1499 1500 if create_token.token_type == TokenType.TABLE: 1501 # exp.Properties.Location.POST_EXPRESSION 1502 extend_props(self._parse_properties()) 1503 1504 indexes = [] 1505 while True: 1506 index = self._parse_index() 1507 1508 # exp.Properties.Location.POST_INDEX 1509 extend_props(self._parse_properties()) 1510 1511 if not index: 1512 break 1513 else: 1514 self._match(TokenType.COMMA) 1515 indexes.append(index) 1516 elif create_token.token_type == TokenType.VIEW: 1517 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1518 no_schema_binding = True 1519 1520 shallow = self._match_text_seq("SHALLOW") 1521 1522 if self._match_texts(self.CLONE_KEYWORDS): 1523 copy = self._prev.text.lower() == "copy" 1524 clone = self.expression( 1525 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1526 ) 1527 1528 if self._curr: 1529 return self._parse_as_command(start) 1530 1531 return self.expression( 1532 exp.Create, 1533 comments=comments, 1534 this=this, 1535 kind=create_token.text.upper(), 1536 replace=replace, 1537 unique=unique, 1538 expression=expression, 1539 exists=exists, 1540 properties=properties, 1541 indexes=indexes, 1542 no_schema_binding=no_schema_binding, 1543 begin=begin, 1544 end=end, 1545 clone=clone, 1546 ) 1547 1548 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1549 # only used for teradata currently 1550 self._match(TokenType.COMMA) 1551 1552 kwargs = { 1553 "no": self._match_text_seq("NO"), 1554 "dual": self._match_text_seq("DUAL"), 1555 "before": self._match_text_seq("BEFORE"), 1556 "default": self._match_text_seq("DEFAULT"), 1557 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1558 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1559 "after": self._match_text_seq("AFTER"), 1560 "minimum": self._match_texts(("MIN", "MINIMUM")), 1561 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1562 } 1563 1564 if self._match_texts(self.PROPERTY_PARSERS): 1565 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1566 try: 1567 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1568 except TypeError: 1569 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1570 1571 return None 1572 1573 def _parse_property(self) -> t.Optional[exp.Expression]: 1574 if self._match_texts(self.PROPERTY_PARSERS): 1575 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1576 1577 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1578 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1579 1580 if self._match_text_seq("COMPOUND", "SORTKEY"): 1581 return self._parse_sortkey(compound=True) 1582 1583 if self._match_text_seq("SQL", "SECURITY"): 1584 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1585 1586 index = self._index 1587 key = self._parse_column() 1588 1589 if not self._match(TokenType.EQ): 1590 self._retreat(index) 1591 return None 1592 1593 return self.expression( 1594 exp.Property, 1595 this=key.to_dot() if isinstance(key, exp.Column) else key, 1596 value=self._parse_column() or self._parse_var(any_token=True), 1597 ) 1598 1599 def _parse_stored(self) -> exp.FileFormatProperty: 1600 self._match(TokenType.ALIAS) 1601 1602 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1603 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1604 1605 return self.expression( 1606 exp.FileFormatProperty, 1607 this=( 1608 self.expression( 1609 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1610 ) 1611 if input_format or output_format 1612 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1613 ), 1614 ) 1615 1616 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1617 self._match(TokenType.EQ) 1618 self._match(TokenType.ALIAS) 1619 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1620 1621 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1622 properties = [] 1623 while True: 1624 if before: 1625 prop = self._parse_property_before() 1626 else: 1627 prop = self._parse_property() 1628 1629 if not prop: 1630 break 1631 for p in ensure_list(prop): 1632 properties.append(p) 1633 1634 if properties: 1635 return self.expression(exp.Properties, expressions=properties) 1636 1637 return None 1638 1639 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1640 return self.expression( 1641 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1642 ) 1643 1644 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1645 if self._index >= 2: 1646 pre_volatile_token = self._tokens[self._index - 2] 1647 else: 1648 pre_volatile_token = None 1649 1650 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1651 return exp.VolatileProperty() 1652 1653 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1654 1655 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1656 self._match_pair(TokenType.EQ, TokenType.ON) 1657 1658 prop = self.expression(exp.WithSystemVersioningProperty) 1659 if self._match(TokenType.L_PAREN): 1660 self._match_text_seq("HISTORY_TABLE", "=") 1661 prop.set("this", self._parse_table_parts()) 1662 1663 if self._match(TokenType.COMMA): 1664 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1665 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1666 1667 self._match_r_paren() 1668 1669 return prop 1670 1671 def _parse_with_property( 1672 self, 1673 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1674 if self._match(TokenType.L_PAREN, advance=False): 1675 return self._parse_wrapped_csv(self._parse_property) 1676 1677 if self._match_text_seq("JOURNAL"): 1678 return self._parse_withjournaltable() 1679 1680 if self._match_text_seq("DATA"): 1681 return self._parse_withdata(no=False) 1682 elif self._match_text_seq("NO", "DATA"): 1683 return self._parse_withdata(no=True) 1684 1685 if not self._next: 1686 return None 1687 1688 return self._parse_withisolatedloading() 1689 1690 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1691 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1692 self._match(TokenType.EQ) 1693 1694 user = self._parse_id_var() 1695 self._match(TokenType.PARAMETER) 1696 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1697 1698 if not user or not host: 1699 return None 1700 1701 return exp.DefinerProperty(this=f"{user}@{host}") 1702 1703 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1704 self._match(TokenType.TABLE) 1705 self._match(TokenType.EQ) 1706 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1707 1708 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1709 return self.expression(exp.LogProperty, no=no) 1710 1711 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1712 return self.expression(exp.JournalProperty, **kwargs) 1713 1714 def _parse_checksum(self) -> exp.ChecksumProperty: 1715 self._match(TokenType.EQ) 1716 1717 on = None 1718 if self._match(TokenType.ON): 1719 on = True 1720 elif self._match_text_seq("OFF"): 1721 on = False 1722 1723 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1724 1725 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1726 return self.expression( 1727 exp.Cluster, 1728 expressions=( 1729 self._parse_wrapped_csv(self._parse_ordered) 1730 if wrapped 1731 else self._parse_csv(self._parse_ordered) 1732 ), 1733 ) 1734 1735 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1736 self._match_text_seq("BY") 1737 1738 self._match_l_paren() 1739 expressions = self._parse_csv(self._parse_column) 1740 self._match_r_paren() 1741 1742 if self._match_text_seq("SORTED", "BY"): 1743 self._match_l_paren() 1744 sorted_by = self._parse_csv(self._parse_ordered) 1745 self._match_r_paren() 1746 else: 1747 sorted_by = None 1748 1749 self._match(TokenType.INTO) 1750 buckets = self._parse_number() 1751 self._match_text_seq("BUCKETS") 1752 1753 return self.expression( 1754 exp.ClusteredByProperty, 1755 expressions=expressions, 1756 sorted_by=sorted_by, 1757 buckets=buckets, 1758 ) 1759 1760 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1761 if not self._match_text_seq("GRANTS"): 1762 self._retreat(self._index - 1) 1763 return None 1764 1765 return self.expression(exp.CopyGrantsProperty) 1766 1767 def _parse_freespace(self) -> exp.FreespaceProperty: 1768 self._match(TokenType.EQ) 1769 return self.expression( 1770 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1771 ) 1772 1773 def _parse_mergeblockratio( 1774 self, no: bool = False, default: bool = False 1775 ) -> exp.MergeBlockRatioProperty: 1776 if self._match(TokenType.EQ): 1777 return self.expression( 1778 exp.MergeBlockRatioProperty, 1779 this=self._parse_number(), 1780 percent=self._match(TokenType.PERCENT), 1781 ) 1782 1783 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1784 1785 def _parse_datablocksize( 1786 self, 1787 default: t.Optional[bool] = None, 1788 minimum: t.Optional[bool] = None, 1789 maximum: t.Optional[bool] = None, 1790 ) -> exp.DataBlocksizeProperty: 1791 self._match(TokenType.EQ) 1792 size = self._parse_number() 1793 1794 units = None 1795 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1796 units = self._prev.text 1797 1798 return self.expression( 1799 exp.DataBlocksizeProperty, 1800 size=size, 1801 units=units, 1802 default=default, 1803 minimum=minimum, 1804 maximum=maximum, 1805 ) 1806 1807 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1808 self._match(TokenType.EQ) 1809 always = self._match_text_seq("ALWAYS") 1810 manual = self._match_text_seq("MANUAL") 1811 never = self._match_text_seq("NEVER") 1812 default = self._match_text_seq("DEFAULT") 1813 1814 autotemp = None 1815 if self._match_text_seq("AUTOTEMP"): 1816 autotemp = self._parse_schema() 1817 1818 return self.expression( 1819 exp.BlockCompressionProperty, 1820 always=always, 1821 manual=manual, 1822 never=never, 1823 default=default, 1824 autotemp=autotemp, 1825 ) 1826 1827 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1828 no = self._match_text_seq("NO") 1829 concurrent = self._match_text_seq("CONCURRENT") 1830 self._match_text_seq("ISOLATED", "LOADING") 1831 for_all = self._match_text_seq("FOR", "ALL") 1832 for_insert = self._match_text_seq("FOR", "INSERT") 1833 for_none = self._match_text_seq("FOR", "NONE") 1834 return self.expression( 1835 exp.IsolatedLoadingProperty, 1836 no=no, 1837 concurrent=concurrent, 1838 for_all=for_all, 1839 for_insert=for_insert, 1840 for_none=for_none, 1841 ) 1842 1843 def _parse_locking(self) -> exp.LockingProperty: 1844 if self._match(TokenType.TABLE): 1845 kind = "TABLE" 1846 elif self._match(TokenType.VIEW): 1847 kind = "VIEW" 1848 elif self._match(TokenType.ROW): 1849 kind = "ROW" 1850 elif self._match_text_seq("DATABASE"): 1851 kind = "DATABASE" 1852 else: 1853 kind = None 1854 1855 if kind in ("DATABASE", "TABLE", "VIEW"): 1856 this = self._parse_table_parts() 1857 else: 1858 this = None 1859 1860 if self._match(TokenType.FOR): 1861 for_or_in = "FOR" 1862 elif self._match(TokenType.IN): 1863 for_or_in = "IN" 1864 else: 1865 for_or_in = None 1866 1867 if self._match_text_seq("ACCESS"): 1868 lock_type = "ACCESS" 1869 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1870 lock_type = "EXCLUSIVE" 1871 elif self._match_text_seq("SHARE"): 1872 lock_type = "SHARE" 1873 elif self._match_text_seq("READ"): 1874 lock_type = "READ" 1875 elif self._match_text_seq("WRITE"): 1876 lock_type = "WRITE" 1877 elif self._match_text_seq("CHECKSUM"): 1878 lock_type = "CHECKSUM" 1879 else: 1880 lock_type = None 1881 1882 override = self._match_text_seq("OVERRIDE") 1883 1884 return self.expression( 1885 exp.LockingProperty, 1886 this=this, 1887 kind=kind, 1888 for_or_in=for_or_in, 1889 lock_type=lock_type, 1890 override=override, 1891 ) 1892 1893 def _parse_partition_by(self) -> t.List[exp.Expression]: 1894 if self._match(TokenType.PARTITION_BY): 1895 return self._parse_csv(self._parse_conjunction) 1896 return [] 1897 1898 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1899 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1900 if self._match_text_seq("MINVALUE"): 1901 return exp.var("MINVALUE") 1902 if self._match_text_seq("MAXVALUE"): 1903 return exp.var("MAXVALUE") 1904 return self._parse_bitwise() 1905 1906 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1907 expression = None 1908 from_expressions = None 1909 to_expressions = None 1910 1911 if self._match(TokenType.IN): 1912 this = self._parse_wrapped_csv(self._parse_bitwise) 1913 elif self._match(TokenType.FROM): 1914 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1915 self._match_text_seq("TO") 1916 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1917 elif self._match_text_seq("WITH", "(", "MODULUS"): 1918 this = self._parse_number() 1919 self._match_text_seq(",", "REMAINDER") 1920 expression = self._parse_number() 1921 self._match_r_paren() 1922 else: 1923 self.raise_error("Failed to parse partition bound spec.") 1924 1925 return self.expression( 1926 exp.PartitionBoundSpec, 1927 this=this, 1928 expression=expression, 1929 from_expressions=from_expressions, 1930 to_expressions=to_expressions, 1931 ) 1932 1933 # https://www.postgresql.org/docs/current/sql-createtable.html 1934 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1935 if not self._match_text_seq("OF"): 1936 self._retreat(self._index - 1) 1937 return None 1938 1939 this = self._parse_table(schema=True) 1940 1941 if self._match(TokenType.DEFAULT): 1942 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1943 elif self._match_text_seq("FOR", "VALUES"): 1944 expression = self._parse_partition_bound_spec() 1945 else: 1946 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1947 1948 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1949 1950 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1951 self._match(TokenType.EQ) 1952 return self.expression( 1953 exp.PartitionedByProperty, 1954 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1955 ) 1956 1957 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1958 if self._match_text_seq("AND", "STATISTICS"): 1959 statistics = True 1960 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1961 statistics = False 1962 else: 1963 statistics = None 1964 1965 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1966 1967 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1968 if self._match_text_seq("SQL"): 1969 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1970 return None 1971 1972 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1973 if self._match_text_seq("SQL", "DATA"): 1974 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1975 return None 1976 1977 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1978 if self._match_text_seq("PRIMARY", "INDEX"): 1979 return exp.NoPrimaryIndexProperty() 1980 if self._match_text_seq("SQL"): 1981 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1982 return None 1983 1984 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1985 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1986 return exp.OnCommitProperty() 1987 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1988 return exp.OnCommitProperty(delete=True) 1989 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1990 1991 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1992 if self._match_text_seq("SQL", "DATA"): 1993 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1994 return None 1995 1996 def _parse_distkey(self) -> exp.DistKeyProperty: 1997 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1998 1999 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2000 table = self._parse_table(schema=True) 2001 2002 options = [] 2003 while self._match_texts(("INCLUDING", "EXCLUDING")): 2004 this = self._prev.text.upper() 2005 2006 id_var = self._parse_id_var() 2007 if not id_var: 2008 return None 2009 2010 options.append( 2011 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2012 ) 2013 2014 return self.expression(exp.LikeProperty, this=table, expressions=options) 2015 2016 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2017 return self.expression( 2018 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2019 ) 2020 2021 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2022 self._match(TokenType.EQ) 2023 return self.expression( 2024 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2025 ) 2026 2027 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2028 self._match_text_seq("WITH", "CONNECTION") 2029 return self.expression( 2030 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2031 ) 2032 2033 def _parse_returns(self) -> exp.ReturnsProperty: 2034 value: t.Optional[exp.Expression] 2035 is_table = self._match(TokenType.TABLE) 2036 2037 if is_table: 2038 if self._match(TokenType.LT): 2039 value = self.expression( 2040 exp.Schema, 2041 this="TABLE", 2042 expressions=self._parse_csv(self._parse_struct_types), 2043 ) 2044 if not self._match(TokenType.GT): 2045 self.raise_error("Expecting >") 2046 else: 2047 value = self._parse_schema(exp.var("TABLE")) 2048 else: 2049 value = self._parse_types() 2050 2051 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2052 2053 def _parse_describe(self) -> exp.Describe: 2054 kind = self._match_set(self.CREATABLES) and self._prev.text 2055 extended = self._match_text_seq("EXTENDED") 2056 this = self._parse_table(schema=True) 2057 properties = self._parse_properties() 2058 expressions = properties.expressions if properties else None 2059 return self.expression( 2060 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2061 ) 2062 2063 def _parse_insert(self) -> exp.Insert: 2064 comments = ensure_list(self._prev_comments) 2065 hint = self._parse_hint() 2066 overwrite = self._match(TokenType.OVERWRITE) 2067 ignore = self._match(TokenType.IGNORE) 2068 local = self._match_text_seq("LOCAL") 2069 alternative = None 2070 2071 if self._match_text_seq("DIRECTORY"): 2072 this: t.Optional[exp.Expression] = self.expression( 2073 exp.Directory, 2074 this=self._parse_var_or_string(), 2075 local=local, 2076 row_format=self._parse_row_format(match_row=True), 2077 ) 2078 else: 2079 if self._match(TokenType.OR): 2080 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2081 2082 self._match(TokenType.INTO) 2083 comments += ensure_list(self._prev_comments) 2084 self._match(TokenType.TABLE) 2085 this = self._parse_table(schema=True) 2086 2087 returning = self._parse_returning() 2088 2089 return self.expression( 2090 exp.Insert, 2091 comments=comments, 2092 hint=hint, 2093 this=this, 2094 by_name=self._match_text_seq("BY", "NAME"), 2095 exists=self._parse_exists(), 2096 partition=self._parse_partition(), 2097 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2098 and self._parse_conjunction(), 2099 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2100 conflict=self._parse_on_conflict(), 2101 returning=returning or self._parse_returning(), 2102 overwrite=overwrite, 2103 alternative=alternative, 2104 ignore=ignore, 2105 ) 2106 2107 def _parse_kill(self) -> exp.Kill: 2108 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2109 2110 return self.expression( 2111 exp.Kill, 2112 this=self._parse_primary(), 2113 kind=kind, 2114 ) 2115 2116 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2117 conflict = self._match_text_seq("ON", "CONFLICT") 2118 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2119 2120 if not conflict and not duplicate: 2121 return None 2122 2123 conflict_keys = None 2124 constraint = None 2125 2126 if conflict: 2127 if self._match_text_seq("ON", "CONSTRAINT"): 2128 constraint = self._parse_id_var() 2129 elif self._match(TokenType.L_PAREN): 2130 conflict_keys = self._parse_csv(self._parse_id_var) 2131 self._match_r_paren() 2132 2133 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2134 if self._prev.token_type == TokenType.UPDATE: 2135 self._match(TokenType.SET) 2136 expressions = self._parse_csv(self._parse_equality) 2137 else: 2138 expressions = None 2139 2140 return self.expression( 2141 exp.OnConflict, 2142 duplicate=duplicate, 2143 expressions=expressions, 2144 action=action, 2145 conflict_keys=conflict_keys, 2146 constraint=constraint, 2147 ) 2148 2149 def _parse_returning(self) -> t.Optional[exp.Returning]: 2150 if not self._match(TokenType.RETURNING): 2151 return None 2152 return self.expression( 2153 exp.Returning, 2154 expressions=self._parse_csv(self._parse_expression), 2155 into=self._match(TokenType.INTO) and self._parse_table_part(), 2156 ) 2157 2158 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2159 if not self._match(TokenType.FORMAT): 2160 return None 2161 return self._parse_row_format() 2162 2163 def _parse_row_format( 2164 self, match_row: bool = False 2165 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2166 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2167 return None 2168 2169 if self._match_text_seq("SERDE"): 2170 this = self._parse_string() 2171 2172 serde_properties = None 2173 if self._match(TokenType.SERDE_PROPERTIES): 2174 serde_properties = self.expression( 2175 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2176 ) 2177 2178 return self.expression( 2179 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2180 ) 2181 2182 self._match_text_seq("DELIMITED") 2183 2184 kwargs = {} 2185 2186 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2187 kwargs["fields"] = self._parse_string() 2188 if self._match_text_seq("ESCAPED", "BY"): 2189 kwargs["escaped"] = self._parse_string() 2190 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2191 kwargs["collection_items"] = self._parse_string() 2192 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2193 kwargs["map_keys"] = self._parse_string() 2194 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2195 kwargs["lines"] = self._parse_string() 2196 if self._match_text_seq("NULL", "DEFINED", "AS"): 2197 kwargs["null"] = self._parse_string() 2198 2199 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2200 2201 def _parse_load(self) -> exp.LoadData | exp.Command: 2202 if self._match_text_seq("DATA"): 2203 local = self._match_text_seq("LOCAL") 2204 self._match_text_seq("INPATH") 2205 inpath = self._parse_string() 2206 overwrite = self._match(TokenType.OVERWRITE) 2207 self._match_pair(TokenType.INTO, TokenType.TABLE) 2208 2209 return self.expression( 2210 exp.LoadData, 2211 this=self._parse_table(schema=True), 2212 local=local, 2213 overwrite=overwrite, 2214 inpath=inpath, 2215 partition=self._parse_partition(), 2216 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2217 serde=self._match_text_seq("SERDE") and self._parse_string(), 2218 ) 2219 return self._parse_as_command(self._prev) 2220 2221 def _parse_delete(self) -> exp.Delete: 2222 # This handles MySQL's "Multiple-Table Syntax" 2223 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2224 tables = None 2225 comments = self._prev_comments 2226 if not self._match(TokenType.FROM, advance=False): 2227 tables = self._parse_csv(self._parse_table) or None 2228 2229 returning = self._parse_returning() 2230 2231 return self.expression( 2232 exp.Delete, 2233 comments=comments, 2234 tables=tables, 2235 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2236 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2237 where=self._parse_where(), 2238 returning=returning or self._parse_returning(), 2239 limit=self._parse_limit(), 2240 ) 2241 2242 def _parse_update(self) -> exp.Update: 2243 comments = self._prev_comments 2244 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2245 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2246 returning = self._parse_returning() 2247 return self.expression( 2248 exp.Update, 2249 comments=comments, 2250 **{ # type: ignore 2251 "this": this, 2252 "expressions": expressions, 2253 "from": self._parse_from(joins=True), 2254 "where": self._parse_where(), 2255 "returning": returning or self._parse_returning(), 2256 "order": self._parse_order(), 2257 "limit": self._parse_limit(), 2258 }, 2259 ) 2260 2261 def _parse_uncache(self) -> exp.Uncache: 2262 if not self._match(TokenType.TABLE): 2263 self.raise_error("Expecting TABLE after UNCACHE") 2264 2265 return self.expression( 2266 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2267 ) 2268 2269 def _parse_cache(self) -> exp.Cache: 2270 lazy = self._match_text_seq("LAZY") 2271 self._match(TokenType.TABLE) 2272 table = self._parse_table(schema=True) 2273 2274 options = [] 2275 if self._match_text_seq("OPTIONS"): 2276 self._match_l_paren() 2277 k = self._parse_string() 2278 self._match(TokenType.EQ) 2279 v = self._parse_string() 2280 options = [k, v] 2281 self._match_r_paren() 2282 2283 self._match(TokenType.ALIAS) 2284 return self.expression( 2285 exp.Cache, 2286 this=table, 2287 lazy=lazy, 2288 options=options, 2289 expression=self._parse_select(nested=True), 2290 ) 2291 2292 def _parse_partition(self) -> t.Optional[exp.Partition]: 2293 if not self._match(TokenType.PARTITION): 2294 return None 2295 2296 return self.expression( 2297 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2298 ) 2299 2300 def _parse_value(self) -> exp.Tuple: 2301 if self._match(TokenType.L_PAREN): 2302 expressions = self._parse_csv(self._parse_expression) 2303 self._match_r_paren() 2304 return self.expression(exp.Tuple, expressions=expressions) 2305 2306 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2307 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2308 2309 def _parse_projections(self) -> t.List[exp.Expression]: 2310 return self._parse_expressions() 2311 2312 def _parse_select( 2313 self, 2314 nested: bool = False, 2315 table: bool = False, 2316 parse_subquery_alias: bool = True, 2317 parse_set_operation: bool = True, 2318 ) -> t.Optional[exp.Expression]: 2319 cte = self._parse_with() 2320 2321 if cte: 2322 this = self._parse_statement() 2323 2324 if not this: 2325 self.raise_error("Failed to parse any statement following CTE") 2326 return cte 2327 2328 if "with" in this.arg_types: 2329 this.set("with", cte) 2330 else: 2331 self.raise_error(f"{this.key} does not support CTE") 2332 this = cte 2333 2334 return this 2335 2336 # duckdb supports leading with FROM x 2337 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2338 2339 if self._match(TokenType.SELECT): 2340 comments = self._prev_comments 2341 2342 hint = self._parse_hint() 2343 all_ = self._match(TokenType.ALL) 2344 distinct = self._match_set(self.DISTINCT_TOKENS) 2345 2346 kind = ( 2347 self._match(TokenType.ALIAS) 2348 and self._match_texts(("STRUCT", "VALUE")) 2349 and self._prev.text.upper() 2350 ) 2351 2352 if distinct: 2353 distinct = self.expression( 2354 exp.Distinct, 2355 on=self._parse_value() if self._match(TokenType.ON) else None, 2356 ) 2357 2358 if all_ and distinct: 2359 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2360 2361 limit = self._parse_limit(top=True) 2362 projections = self._parse_projections() 2363 2364 this = self.expression( 2365 exp.Select, 2366 kind=kind, 2367 hint=hint, 2368 distinct=distinct, 2369 expressions=projections, 2370 limit=limit, 2371 ) 2372 this.comments = comments 2373 2374 into = self._parse_into() 2375 if into: 2376 this.set("into", into) 2377 2378 if not from_: 2379 from_ = self._parse_from() 2380 2381 if from_: 2382 this.set("from", from_) 2383 2384 this = self._parse_query_modifiers(this) 2385 elif (table or nested) and self._match(TokenType.L_PAREN): 2386 if self._match(TokenType.PIVOT): 2387 this = self._parse_simplified_pivot() 2388 elif self._match(TokenType.FROM): 2389 this = exp.select("*").from_( 2390 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2391 ) 2392 else: 2393 this = ( 2394 self._parse_table() 2395 if table 2396 else self._parse_select(nested=True, parse_set_operation=False) 2397 ) 2398 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2399 2400 self._match_r_paren() 2401 2402 # We return early here so that the UNION isn't attached to the subquery by the 2403 # following call to _parse_set_operations, but instead becomes the parent node 2404 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2405 elif self._match(TokenType.VALUES, advance=False): 2406 this = self._parse_derived_table_values() 2407 elif from_: 2408 this = exp.select("*").from_(from_.this, copy=False) 2409 else: 2410 this = None 2411 2412 if parse_set_operation: 2413 return self._parse_set_operations(this) 2414 return this 2415 2416 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2417 if not skip_with_token and not self._match(TokenType.WITH): 2418 return None 2419 2420 comments = self._prev_comments 2421 recursive = self._match(TokenType.RECURSIVE) 2422 2423 expressions = [] 2424 while True: 2425 expressions.append(self._parse_cte()) 2426 2427 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2428 break 2429 else: 2430 self._match(TokenType.WITH) 2431 2432 return self.expression( 2433 exp.With, comments=comments, expressions=expressions, recursive=recursive 2434 ) 2435 2436 def _parse_cte(self) -> exp.CTE: 2437 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2438 if not alias or not alias.this: 2439 self.raise_error("Expected CTE to have alias") 2440 2441 self._match(TokenType.ALIAS) 2442 return self.expression( 2443 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2444 ) 2445 2446 def _parse_table_alias( 2447 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2448 ) -> t.Optional[exp.TableAlias]: 2449 any_token = self._match(TokenType.ALIAS) 2450 alias = ( 2451 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2452 or self._parse_string_as_identifier() 2453 ) 2454 2455 index = self._index 2456 if self._match(TokenType.L_PAREN): 2457 columns = self._parse_csv(self._parse_function_parameter) 2458 self._match_r_paren() if columns else self._retreat(index) 2459 else: 2460 columns = None 2461 2462 if not alias and not columns: 2463 return None 2464 2465 return self.expression(exp.TableAlias, this=alias, columns=columns) 2466 2467 def _parse_subquery( 2468 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2469 ) -> t.Optional[exp.Subquery]: 2470 if not this: 2471 return None 2472 2473 return self.expression( 2474 exp.Subquery, 2475 this=this, 2476 pivots=self._parse_pivots(), 2477 alias=self._parse_table_alias() if parse_alias else None, 2478 ) 2479 2480 def _implicit_unnests_to_explicit(self, this: E) -> E: 2481 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2482 2483 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2484 for i, join in enumerate(this.args.get("joins") or []): 2485 table = join.this 2486 normalized_table = table.copy() 2487 normalized_table.meta["maybe_column"] = True 2488 normalized_table = _norm(normalized_table, dialect=self.dialect) 2489 2490 if isinstance(table, exp.Table) and not join.args.get("on"): 2491 if normalized_table.parts[0].name in refs: 2492 table_as_column = table.to_column() 2493 unnest = exp.Unnest(expressions=[table_as_column]) 2494 2495 # Table.to_column creates a parent Alias node that we want to convert to 2496 # a TableAlias and attach to the Unnest, so it matches the parser's output 2497 if isinstance(table.args.get("alias"), exp.TableAlias): 2498 table_as_column.replace(table_as_column.this) 2499 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2500 2501 table.replace(unnest) 2502 2503 refs.add(normalized_table.alias_or_name) 2504 2505 return this 2506 2507 def _parse_query_modifiers( 2508 self, this: t.Optional[exp.Expression] 2509 ) -> t.Optional[exp.Expression]: 2510 if isinstance(this, (exp.Query, exp.Table)): 2511 for join in iter(self._parse_join, None): 2512 this.append("joins", join) 2513 for lateral in iter(self._parse_lateral, None): 2514 this.append("laterals", lateral) 2515 2516 while True: 2517 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2518 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2519 key, expression = parser(self) 2520 2521 if expression: 2522 this.set(key, expression) 2523 if key == "limit": 2524 offset = expression.args.pop("offset", None) 2525 2526 if offset: 2527 offset = exp.Offset(expression=offset) 2528 this.set("offset", offset) 2529 2530 limit_by_expressions = expression.expressions 2531 expression.set("expressions", None) 2532 offset.set("expressions", limit_by_expressions) 2533 continue 2534 break 2535 2536 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2537 this = self._implicit_unnests_to_explicit(this) 2538 2539 return this 2540 2541 def _parse_hint(self) -> t.Optional[exp.Hint]: 2542 if self._match(TokenType.HINT): 2543 hints = [] 2544 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2545 hints.extend(hint) 2546 2547 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2548 self.raise_error("Expected */ after HINT") 2549 2550 return self.expression(exp.Hint, expressions=hints) 2551 2552 return None 2553 2554 def _parse_into(self) -> t.Optional[exp.Into]: 2555 if not self._match(TokenType.INTO): 2556 return None 2557 2558 temp = self._match(TokenType.TEMPORARY) 2559 unlogged = self._match_text_seq("UNLOGGED") 2560 self._match(TokenType.TABLE) 2561 2562 return self.expression( 2563 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2564 ) 2565 2566 def _parse_from( 2567 self, joins: bool = False, skip_from_token: bool = False 2568 ) -> t.Optional[exp.From]: 2569 if not skip_from_token and not self._match(TokenType.FROM): 2570 return None 2571 2572 return self.expression( 2573 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2574 ) 2575 2576 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2577 if not self._match(TokenType.MATCH_RECOGNIZE): 2578 return None 2579 2580 self._match_l_paren() 2581 2582 partition = self._parse_partition_by() 2583 order = self._parse_order() 2584 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2585 2586 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2587 rows = exp.var("ONE ROW PER MATCH") 2588 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2589 text = "ALL ROWS PER MATCH" 2590 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2591 text += " SHOW EMPTY MATCHES" 2592 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2593 text += " OMIT EMPTY MATCHES" 2594 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2595 text += " WITH UNMATCHED ROWS" 2596 rows = exp.var(text) 2597 else: 2598 rows = None 2599 2600 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2601 text = "AFTER MATCH SKIP" 2602 if self._match_text_seq("PAST", "LAST", "ROW"): 2603 text += " PAST LAST ROW" 2604 elif self._match_text_seq("TO", "NEXT", "ROW"): 2605 text += " TO NEXT ROW" 2606 elif self._match_text_seq("TO", "FIRST"): 2607 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2608 elif self._match_text_seq("TO", "LAST"): 2609 text += f" TO LAST {self._advance_any().text}" # type: ignore 2610 after = exp.var(text) 2611 else: 2612 after = None 2613 2614 if self._match_text_seq("PATTERN"): 2615 self._match_l_paren() 2616 2617 if not self._curr: 2618 self.raise_error("Expecting )", self._curr) 2619 2620 paren = 1 2621 start = self._curr 2622 2623 while self._curr and paren > 0: 2624 if self._curr.token_type == TokenType.L_PAREN: 2625 paren += 1 2626 if self._curr.token_type == TokenType.R_PAREN: 2627 paren -= 1 2628 2629 end = self._prev 2630 self._advance() 2631 2632 if paren > 0: 2633 self.raise_error("Expecting )", self._curr) 2634 2635 pattern = exp.var(self._find_sql(start, end)) 2636 else: 2637 pattern = None 2638 2639 define = ( 2640 self._parse_csv(self._parse_name_as_expression) 2641 if self._match_text_seq("DEFINE") 2642 else None 2643 ) 2644 2645 self._match_r_paren() 2646 2647 return self.expression( 2648 exp.MatchRecognize, 2649 partition_by=partition, 2650 order=order, 2651 measures=measures, 2652 rows=rows, 2653 after=after, 2654 pattern=pattern, 2655 define=define, 2656 alias=self._parse_table_alias(), 2657 ) 2658 2659 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2660 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2661 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2662 cross_apply = False 2663 2664 if cross_apply is not None: 2665 this = self._parse_select(table=True) 2666 view = None 2667 outer = None 2668 elif self._match(TokenType.LATERAL): 2669 this = self._parse_select(table=True) 2670 view = self._match(TokenType.VIEW) 2671 outer = self._match(TokenType.OUTER) 2672 else: 2673 return None 2674 2675 if not this: 2676 this = ( 2677 self._parse_unnest() 2678 or self._parse_function() 2679 or self._parse_id_var(any_token=False) 2680 ) 2681 2682 while self._match(TokenType.DOT): 2683 this = exp.Dot( 2684 this=this, 2685 expression=self._parse_function() or self._parse_id_var(any_token=False), 2686 ) 2687 2688 if view: 2689 table = self._parse_id_var(any_token=False) 2690 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2691 table_alias: t.Optional[exp.TableAlias] = self.expression( 2692 exp.TableAlias, this=table, columns=columns 2693 ) 2694 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2695 # We move the alias from the lateral's child node to the lateral itself 2696 table_alias = this.args["alias"].pop() 2697 else: 2698 table_alias = self._parse_table_alias() 2699 2700 return self.expression( 2701 exp.Lateral, 2702 this=this, 2703 view=view, 2704 outer=outer, 2705 alias=table_alias, 2706 cross_apply=cross_apply, 2707 ) 2708 2709 def _parse_join_parts( 2710 self, 2711 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2712 return ( 2713 self._match_set(self.JOIN_METHODS) and self._prev, 2714 self._match_set(self.JOIN_SIDES) and self._prev, 2715 self._match_set(self.JOIN_KINDS) and self._prev, 2716 ) 2717 2718 def _parse_join( 2719 self, skip_join_token: bool = False, parse_bracket: bool = False 2720 ) -> t.Optional[exp.Join]: 2721 if self._match(TokenType.COMMA): 2722 return self.expression(exp.Join, this=self._parse_table()) 2723 2724 index = self._index 2725 method, side, kind = self._parse_join_parts() 2726 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2727 join = self._match(TokenType.JOIN) 2728 2729 if not skip_join_token and not join: 2730 self._retreat(index) 2731 kind = None 2732 method = None 2733 side = None 2734 2735 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2736 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2737 2738 if not skip_join_token and not join and not outer_apply and not cross_apply: 2739 return None 2740 2741 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2742 2743 if method: 2744 kwargs["method"] = method.text 2745 if side: 2746 kwargs["side"] = side.text 2747 if kind: 2748 kwargs["kind"] = kind.text 2749 if hint: 2750 kwargs["hint"] = hint 2751 2752 if self._match(TokenType.ON): 2753 kwargs["on"] = self._parse_conjunction() 2754 elif self._match(TokenType.USING): 2755 kwargs["using"] = self._parse_wrapped_id_vars() 2756 elif not (kind and kind.token_type == TokenType.CROSS): 2757 index = self._index 2758 join = self._parse_join() 2759 2760 if join and self._match(TokenType.ON): 2761 kwargs["on"] = self._parse_conjunction() 2762 elif join and self._match(TokenType.USING): 2763 kwargs["using"] = self._parse_wrapped_id_vars() 2764 else: 2765 join = None 2766 self._retreat(index) 2767 2768 kwargs["this"].set("joins", [join] if join else None) 2769 2770 comments = [c for token in (method, side, kind) if token for c in token.comments] 2771 return self.expression(exp.Join, comments=comments, **kwargs) 2772 2773 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2774 this = self._parse_conjunction() 2775 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2776 return this 2777 2778 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2779 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2780 2781 return this 2782 2783 def _parse_index( 2784 self, 2785 index: t.Optional[exp.Expression] = None, 2786 ) -> t.Optional[exp.Index]: 2787 if index: 2788 unique = None 2789 primary = None 2790 amp = None 2791 2792 self._match(TokenType.ON) 2793 self._match(TokenType.TABLE) # hive 2794 table = self._parse_table_parts(schema=True) 2795 else: 2796 unique = self._match(TokenType.UNIQUE) 2797 primary = self._match_text_seq("PRIMARY") 2798 amp = self._match_text_seq("AMP") 2799 2800 if not self._match(TokenType.INDEX): 2801 return None 2802 2803 index = self._parse_id_var() 2804 table = None 2805 2806 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2807 2808 if self._match(TokenType.L_PAREN, advance=False): 2809 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2810 else: 2811 columns = None 2812 2813 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2814 2815 return self.expression( 2816 exp.Index, 2817 this=index, 2818 table=table, 2819 using=using, 2820 columns=columns, 2821 unique=unique, 2822 primary=primary, 2823 amp=amp, 2824 include=include, 2825 partition_by=self._parse_partition_by(), 2826 where=self._parse_where(), 2827 ) 2828 2829 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2830 hints: t.List[exp.Expression] = [] 2831 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2832 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2833 hints.append( 2834 self.expression( 2835 exp.WithTableHint, 2836 expressions=self._parse_csv( 2837 lambda: self._parse_function() or self._parse_var(any_token=True) 2838 ), 2839 ) 2840 ) 2841 self._match_r_paren() 2842 else: 2843 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2844 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2845 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2846 2847 self._match_texts(("INDEX", "KEY")) 2848 if self._match(TokenType.FOR): 2849 hint.set("target", self._advance_any() and self._prev.text.upper()) 2850 2851 hint.set("expressions", self._parse_wrapped_id_vars()) 2852 hints.append(hint) 2853 2854 return hints or None 2855 2856 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2857 return ( 2858 (not schema and self._parse_function(optional_parens=False)) 2859 or self._parse_id_var(any_token=False) 2860 or self._parse_string_as_identifier() 2861 or self._parse_placeholder() 2862 ) 2863 2864 def _parse_table_parts( 2865 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2866 ) -> exp.Table: 2867 catalog = None 2868 db = None 2869 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2870 2871 while self._match(TokenType.DOT): 2872 if catalog: 2873 # This allows nesting the table in arbitrarily many dot expressions if needed 2874 table = self.expression( 2875 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2876 ) 2877 else: 2878 catalog = db 2879 db = table 2880 # "" used for tsql FROM a..b case 2881 table = self._parse_table_part(schema=schema) or "" 2882 2883 if ( 2884 wildcard 2885 and self._is_connected() 2886 and (isinstance(table, exp.Identifier) or not table) 2887 and self._match(TokenType.STAR) 2888 ): 2889 if isinstance(table, exp.Identifier): 2890 table.args["this"] += "*" 2891 else: 2892 table = exp.Identifier(this="*") 2893 2894 if is_db_reference: 2895 catalog = db 2896 db = table 2897 table = None 2898 2899 if not table and not is_db_reference: 2900 self.raise_error(f"Expected table name but got {self._curr}") 2901 if not db and is_db_reference: 2902 self.raise_error(f"Expected database name but got {self._curr}") 2903 2904 return self.expression( 2905 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2906 ) 2907 2908 def _parse_table( 2909 self, 2910 schema: bool = False, 2911 joins: bool = False, 2912 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2913 parse_bracket: bool = False, 2914 is_db_reference: bool = False, 2915 ) -> t.Optional[exp.Expression]: 2916 lateral = self._parse_lateral() 2917 if lateral: 2918 return lateral 2919 2920 unnest = self._parse_unnest() 2921 if unnest: 2922 return unnest 2923 2924 values = self._parse_derived_table_values() 2925 if values: 2926 return values 2927 2928 subquery = self._parse_select(table=True) 2929 if subquery: 2930 if not subquery.args.get("pivots"): 2931 subquery.set("pivots", self._parse_pivots()) 2932 return subquery 2933 2934 bracket = parse_bracket and self._parse_bracket(None) 2935 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2936 2937 only = self._match(TokenType.ONLY) 2938 2939 this = t.cast( 2940 exp.Expression, 2941 bracket 2942 or self._parse_bracket( 2943 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2944 ), 2945 ) 2946 2947 if only: 2948 this.set("only", only) 2949 2950 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 2951 self._match_text_seq("*") 2952 2953 if schema: 2954 return self._parse_schema(this=this) 2955 2956 version = self._parse_version() 2957 2958 if version: 2959 this.set("version", version) 2960 2961 if self.dialect.ALIAS_POST_TABLESAMPLE: 2962 table_sample = self._parse_table_sample() 2963 2964 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2965 if alias: 2966 this.set("alias", alias) 2967 2968 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2969 return self.expression( 2970 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2971 ) 2972 2973 this.set("hints", self._parse_table_hints()) 2974 2975 if not this.args.get("pivots"): 2976 this.set("pivots", self._parse_pivots()) 2977 2978 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2979 table_sample = self._parse_table_sample() 2980 2981 if table_sample: 2982 table_sample.set("this", this) 2983 this = table_sample 2984 2985 if joins: 2986 for join in iter(self._parse_join, None): 2987 this.append("joins", join) 2988 2989 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2990 this.set("ordinality", True) 2991 this.set("alias", self._parse_table_alias()) 2992 2993 return this 2994 2995 def _parse_version(self) -> t.Optional[exp.Version]: 2996 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2997 this = "TIMESTAMP" 2998 elif self._match(TokenType.VERSION_SNAPSHOT): 2999 this = "VERSION" 3000 else: 3001 return None 3002 3003 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3004 kind = self._prev.text.upper() 3005 start = self._parse_bitwise() 3006 self._match_texts(("TO", "AND")) 3007 end = self._parse_bitwise() 3008 expression: t.Optional[exp.Expression] = self.expression( 3009 exp.Tuple, expressions=[start, end] 3010 ) 3011 elif self._match_text_seq("CONTAINED", "IN"): 3012 kind = "CONTAINED IN" 3013 expression = self.expression( 3014 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3015 ) 3016 elif self._match(TokenType.ALL): 3017 kind = "ALL" 3018 expression = None 3019 else: 3020 self._match_text_seq("AS", "OF") 3021 kind = "AS OF" 3022 expression = self._parse_type() 3023 3024 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3025 3026 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3027 if not self._match(TokenType.UNNEST): 3028 return None 3029 3030 expressions = self._parse_wrapped_csv(self._parse_equality) 3031 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3032 3033 alias = self._parse_table_alias() if with_alias else None 3034 3035 if alias: 3036 if self.dialect.UNNEST_COLUMN_ONLY: 3037 if alias.args.get("columns"): 3038 self.raise_error("Unexpected extra column alias in unnest.") 3039 3040 alias.set("columns", [alias.this]) 3041 alias.set("this", None) 3042 3043 columns = alias.args.get("columns") or [] 3044 if offset and len(expressions) < len(columns): 3045 offset = columns.pop() 3046 3047 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3048 self._match(TokenType.ALIAS) 3049 offset = self._parse_id_var( 3050 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3051 ) or exp.to_identifier("offset") 3052 3053 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3054 3055 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3056 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3057 if not is_derived and not self._match_text_seq("VALUES"): 3058 return None 3059 3060 expressions = self._parse_csv(self._parse_value) 3061 alias = self._parse_table_alias() 3062 3063 if is_derived: 3064 self._match_r_paren() 3065 3066 return self.expression( 3067 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3068 ) 3069 3070 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3071 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3072 as_modifier and self._match_text_seq("USING", "SAMPLE") 3073 ): 3074 return None 3075 3076 bucket_numerator = None 3077 bucket_denominator = None 3078 bucket_field = None 3079 percent = None 3080 size = None 3081 seed = None 3082 3083 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3084 matched_l_paren = self._match(TokenType.L_PAREN) 3085 3086 if self.TABLESAMPLE_CSV: 3087 num = None 3088 expressions = self._parse_csv(self._parse_primary) 3089 else: 3090 expressions = None 3091 num = ( 3092 self._parse_factor() 3093 if self._match(TokenType.NUMBER, advance=False) 3094 else self._parse_primary() or self._parse_placeholder() 3095 ) 3096 3097 if self._match_text_seq("BUCKET"): 3098 bucket_numerator = self._parse_number() 3099 self._match_text_seq("OUT", "OF") 3100 bucket_denominator = bucket_denominator = self._parse_number() 3101 self._match(TokenType.ON) 3102 bucket_field = self._parse_field() 3103 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3104 percent = num 3105 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3106 size = num 3107 else: 3108 percent = num 3109 3110 if matched_l_paren: 3111 self._match_r_paren() 3112 3113 if self._match(TokenType.L_PAREN): 3114 method = self._parse_var(upper=True) 3115 seed = self._match(TokenType.COMMA) and self._parse_number() 3116 self._match_r_paren() 3117 elif self._match_texts(("SEED", "REPEATABLE")): 3118 seed = self._parse_wrapped(self._parse_number) 3119 3120 return self.expression( 3121 exp.TableSample, 3122 expressions=expressions, 3123 method=method, 3124 bucket_numerator=bucket_numerator, 3125 bucket_denominator=bucket_denominator, 3126 bucket_field=bucket_field, 3127 percent=percent, 3128 size=size, 3129 seed=seed, 3130 ) 3131 3132 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3133 return list(iter(self._parse_pivot, None)) or None 3134 3135 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3136 return list(iter(self._parse_join, None)) or None 3137 3138 # https://duckdb.org/docs/sql/statements/pivot 3139 def _parse_simplified_pivot(self) -> exp.Pivot: 3140 def _parse_on() -> t.Optional[exp.Expression]: 3141 this = self._parse_bitwise() 3142 return self._parse_in(this) if self._match(TokenType.IN) else this 3143 3144 this = self._parse_table() 3145 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3146 using = self._match(TokenType.USING) and self._parse_csv( 3147 lambda: self._parse_alias(self._parse_function()) 3148 ) 3149 group = self._parse_group() 3150 return self.expression( 3151 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3152 ) 3153 3154 def _parse_pivot_in(self) -> exp.In: 3155 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3156 this = self._parse_conjunction() 3157 3158 self._match(TokenType.ALIAS) 3159 alias = self._parse_field() 3160 if alias: 3161 return self.expression(exp.PivotAlias, this=this, alias=alias) 3162 3163 return this 3164 3165 value = self._parse_column() 3166 3167 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3168 self.raise_error("Expecting IN (") 3169 3170 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3171 3172 self._match_r_paren() 3173 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3174 3175 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3176 index = self._index 3177 include_nulls = None 3178 3179 if self._match(TokenType.PIVOT): 3180 unpivot = False 3181 elif self._match(TokenType.UNPIVOT): 3182 unpivot = True 3183 3184 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3185 if self._match_text_seq("INCLUDE", "NULLS"): 3186 include_nulls = True 3187 elif self._match_text_seq("EXCLUDE", "NULLS"): 3188 include_nulls = False 3189 else: 3190 return None 3191 3192 expressions = [] 3193 3194 if not self._match(TokenType.L_PAREN): 3195 self._retreat(index) 3196 return None 3197 3198 if unpivot: 3199 expressions = self._parse_csv(self._parse_column) 3200 else: 3201 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3202 3203 if not expressions: 3204 self.raise_error("Failed to parse PIVOT's aggregation list") 3205 3206 if not self._match(TokenType.FOR): 3207 self.raise_error("Expecting FOR") 3208 3209 field = self._parse_pivot_in() 3210 3211 self._match_r_paren() 3212 3213 pivot = self.expression( 3214 exp.Pivot, 3215 expressions=expressions, 3216 field=field, 3217 unpivot=unpivot, 3218 include_nulls=include_nulls, 3219 ) 3220 3221 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3222 pivot.set("alias", self._parse_table_alias()) 3223 3224 if not unpivot: 3225 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3226 3227 columns: t.List[exp.Expression] = [] 3228 for fld in pivot.args["field"].expressions: 3229 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3230 for name in names: 3231 if self.PREFIXED_PIVOT_COLUMNS: 3232 name = f"{name}_{field_name}" if name else field_name 3233 else: 3234 name = f"{field_name}_{name}" if name else field_name 3235 3236 columns.append(exp.to_identifier(name)) 3237 3238 pivot.set("columns", columns) 3239 3240 return pivot 3241 3242 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3243 return [agg.alias for agg in aggregations] 3244 3245 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3246 if not skip_where_token and not self._match(TokenType.PREWHERE): 3247 return None 3248 3249 return self.expression( 3250 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3251 ) 3252 3253 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3254 if not skip_where_token and not self._match(TokenType.WHERE): 3255 return None 3256 3257 return self.expression( 3258 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3259 ) 3260 3261 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3262 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3263 return None 3264 3265 elements = defaultdict(list) 3266 3267 if self._match(TokenType.ALL): 3268 return self.expression(exp.Group, all=True) 3269 3270 while True: 3271 expressions = self._parse_csv(self._parse_conjunction) 3272 if expressions: 3273 elements["expressions"].extend(expressions) 3274 3275 grouping_sets = self._parse_grouping_sets() 3276 if grouping_sets: 3277 elements["grouping_sets"].extend(grouping_sets) 3278 3279 rollup = None 3280 cube = None 3281 totals = None 3282 3283 index = self._index 3284 with_ = self._match(TokenType.WITH) 3285 if self._match(TokenType.ROLLUP): 3286 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3287 elements["rollup"].extend(ensure_list(rollup)) 3288 3289 if self._match(TokenType.CUBE): 3290 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3291 elements["cube"].extend(ensure_list(cube)) 3292 3293 if self._match_text_seq("TOTALS"): 3294 totals = True 3295 elements["totals"] = True # type: ignore 3296 3297 if not (grouping_sets or rollup or cube or totals): 3298 if with_: 3299 self._retreat(index) 3300 break 3301 3302 return self.expression(exp.Group, **elements) # type: ignore 3303 3304 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3305 if not self._match(TokenType.GROUPING_SETS): 3306 return None 3307 3308 return self._parse_wrapped_csv(self._parse_grouping_set) 3309 3310 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3311 if self._match(TokenType.L_PAREN): 3312 grouping_set = self._parse_csv(self._parse_column) 3313 self._match_r_paren() 3314 return self.expression(exp.Tuple, expressions=grouping_set) 3315 3316 return self._parse_column() 3317 3318 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3319 if not skip_having_token and not self._match(TokenType.HAVING): 3320 return None 3321 return self.expression(exp.Having, this=self._parse_conjunction()) 3322 3323 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3324 if not self._match(TokenType.QUALIFY): 3325 return None 3326 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3327 3328 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3329 if skip_start_token: 3330 start = None 3331 elif self._match(TokenType.START_WITH): 3332 start = self._parse_conjunction() 3333 else: 3334 return None 3335 3336 self._match(TokenType.CONNECT_BY) 3337 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3338 exp.Prior, this=self._parse_bitwise() 3339 ) 3340 connect = self._parse_conjunction() 3341 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3342 3343 if not start and self._match(TokenType.START_WITH): 3344 start = self._parse_conjunction() 3345 3346 return self.expression(exp.Connect, start=start, connect=connect) 3347 3348 def _parse_name_as_expression(self) -> exp.Alias: 3349 return self.expression( 3350 exp.Alias, 3351 alias=self._parse_id_var(any_token=True), 3352 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3353 ) 3354 3355 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3356 if self._match_text_seq("INTERPOLATE"): 3357 return self._parse_wrapped_csv(self._parse_name_as_expression) 3358 return None 3359 3360 def _parse_order( 3361 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3362 ) -> t.Optional[exp.Expression]: 3363 siblings = None 3364 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3365 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3366 return this 3367 3368 siblings = True 3369 3370 return self.expression( 3371 exp.Order, 3372 this=this, 3373 expressions=self._parse_csv(self._parse_ordered), 3374 interpolate=self._parse_interpolate(), 3375 siblings=siblings, 3376 ) 3377 3378 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3379 if not self._match(token): 3380 return None 3381 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3382 3383 def _parse_ordered( 3384 self, parse_method: t.Optional[t.Callable] = None 3385 ) -> t.Optional[exp.Ordered]: 3386 this = parse_method() if parse_method else self._parse_conjunction() 3387 if not this: 3388 return None 3389 3390 asc = self._match(TokenType.ASC) 3391 desc = self._match(TokenType.DESC) or (asc and False) 3392 3393 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3394 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3395 3396 nulls_first = is_nulls_first or False 3397 explicitly_null_ordered = is_nulls_first or is_nulls_last 3398 3399 if ( 3400 not explicitly_null_ordered 3401 and ( 3402 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3403 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3404 ) 3405 and self.dialect.NULL_ORDERING != "nulls_are_last" 3406 ): 3407 nulls_first = True 3408 3409 if self._match_text_seq("WITH", "FILL"): 3410 with_fill = self.expression( 3411 exp.WithFill, 3412 **{ # type: ignore 3413 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3414 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3415 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3416 }, 3417 ) 3418 else: 3419 with_fill = None 3420 3421 return self.expression( 3422 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3423 ) 3424 3425 def _parse_limit( 3426 self, this: t.Optional[exp.Expression] = None, top: bool = False 3427 ) -> t.Optional[exp.Expression]: 3428 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3429 comments = self._prev_comments 3430 if top: 3431 limit_paren = self._match(TokenType.L_PAREN) 3432 expression = self._parse_term() if limit_paren else self._parse_number() 3433 3434 if limit_paren: 3435 self._match_r_paren() 3436 else: 3437 expression = self._parse_term() 3438 3439 if self._match(TokenType.COMMA): 3440 offset = expression 3441 expression = self._parse_term() 3442 else: 3443 offset = None 3444 3445 limit_exp = self.expression( 3446 exp.Limit, 3447 this=this, 3448 expression=expression, 3449 offset=offset, 3450 comments=comments, 3451 expressions=self._parse_limit_by(), 3452 ) 3453 3454 return limit_exp 3455 3456 if self._match(TokenType.FETCH): 3457 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3458 direction = self._prev.text.upper() if direction else "FIRST" 3459 3460 count = self._parse_field(tokens=self.FETCH_TOKENS) 3461 percent = self._match(TokenType.PERCENT) 3462 3463 self._match_set((TokenType.ROW, TokenType.ROWS)) 3464 3465 only = self._match_text_seq("ONLY") 3466 with_ties = self._match_text_seq("WITH", "TIES") 3467 3468 if only and with_ties: 3469 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3470 3471 return self.expression( 3472 exp.Fetch, 3473 direction=direction, 3474 count=count, 3475 percent=percent, 3476 with_ties=with_ties, 3477 ) 3478 3479 return this 3480 3481 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3482 if not self._match(TokenType.OFFSET): 3483 return this 3484 3485 count = self._parse_term() 3486 self._match_set((TokenType.ROW, TokenType.ROWS)) 3487 3488 return self.expression( 3489 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3490 ) 3491 3492 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3493 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3494 3495 def _parse_locks(self) -> t.List[exp.Lock]: 3496 locks = [] 3497 while True: 3498 if self._match_text_seq("FOR", "UPDATE"): 3499 update = True 3500 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3501 "LOCK", "IN", "SHARE", "MODE" 3502 ): 3503 update = False 3504 else: 3505 break 3506 3507 expressions = None 3508 if self._match_text_seq("OF"): 3509 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3510 3511 wait: t.Optional[bool | exp.Expression] = None 3512 if self._match_text_seq("NOWAIT"): 3513 wait = True 3514 elif self._match_text_seq("WAIT"): 3515 wait = self._parse_primary() 3516 elif self._match_text_seq("SKIP", "LOCKED"): 3517 wait = False 3518 3519 locks.append( 3520 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3521 ) 3522 3523 return locks 3524 3525 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3526 while this and self._match_set(self.SET_OPERATIONS): 3527 token_type = self._prev.token_type 3528 3529 if token_type == TokenType.UNION: 3530 operation = exp.Union 3531 elif token_type == TokenType.EXCEPT: 3532 operation = exp.Except 3533 else: 3534 operation = exp.Intersect 3535 3536 comments = self._prev.comments 3537 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3538 by_name = self._match_text_seq("BY", "NAME") 3539 expression = self._parse_select(nested=True, parse_set_operation=False) 3540 3541 this = self.expression( 3542 operation, 3543 comments=comments, 3544 this=this, 3545 distinct=distinct, 3546 by_name=by_name, 3547 expression=expression, 3548 ) 3549 3550 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3551 expression = this.expression 3552 3553 if expression: 3554 for arg in self.UNION_MODIFIERS: 3555 expr = expression.args.get(arg) 3556 if expr: 3557 this.set(arg, expr.pop()) 3558 3559 return this 3560 3561 def _parse_expression(self) -> t.Optional[exp.Expression]: 3562 return self._parse_alias(self._parse_conjunction()) 3563 3564 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3565 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3566 3567 def _parse_equality(self) -> t.Optional[exp.Expression]: 3568 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3569 3570 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3571 return self._parse_tokens(self._parse_range, self.COMPARISON) 3572 3573 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3574 this = this or self._parse_bitwise() 3575 negate = self._match(TokenType.NOT) 3576 3577 if self._match_set(self.RANGE_PARSERS): 3578 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3579 if not expression: 3580 return this 3581 3582 this = expression 3583 elif self._match(TokenType.ISNULL): 3584 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3585 3586 # Postgres supports ISNULL and NOTNULL for conditions. 3587 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3588 if self._match(TokenType.NOTNULL): 3589 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3590 this = self.expression(exp.Not, this=this) 3591 3592 if negate: 3593 this = self.expression(exp.Not, this=this) 3594 3595 if self._match(TokenType.IS): 3596 this = self._parse_is(this) 3597 3598 return this 3599 3600 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3601 index = self._index - 1 3602 negate = self._match(TokenType.NOT) 3603 3604 if self._match_text_seq("DISTINCT", "FROM"): 3605 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3606 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3607 3608 expression = self._parse_null() or self._parse_boolean() 3609 if not expression: 3610 self._retreat(index) 3611 return None 3612 3613 this = self.expression(exp.Is, this=this, expression=expression) 3614 return self.expression(exp.Not, this=this) if negate else this 3615 3616 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3617 unnest = self._parse_unnest(with_alias=False) 3618 if unnest: 3619 this = self.expression(exp.In, this=this, unnest=unnest) 3620 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3621 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3622 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3623 3624 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3625 this = self.expression(exp.In, this=this, query=expressions[0]) 3626 else: 3627 this = self.expression(exp.In, this=this, expressions=expressions) 3628 3629 if matched_l_paren: 3630 self._match_r_paren(this) 3631 elif not self._match(TokenType.R_BRACKET, expression=this): 3632 self.raise_error("Expecting ]") 3633 else: 3634 this = self.expression(exp.In, this=this, field=self._parse_field()) 3635 3636 return this 3637 3638 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3639 low = self._parse_bitwise() 3640 self._match(TokenType.AND) 3641 high = self._parse_bitwise() 3642 return self.expression(exp.Between, this=this, low=low, high=high) 3643 3644 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3645 if not self._match(TokenType.ESCAPE): 3646 return this 3647 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3648 3649 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3650 index = self._index 3651 3652 if not self._match(TokenType.INTERVAL) and match_interval: 3653 return None 3654 3655 if self._match(TokenType.STRING, advance=False): 3656 this = self._parse_primary() 3657 else: 3658 this = self._parse_term() 3659 3660 if not this or ( 3661 isinstance(this, exp.Column) 3662 and not this.table 3663 and not this.this.quoted 3664 and this.name.upper() == "IS" 3665 ): 3666 self._retreat(index) 3667 return None 3668 3669 unit = self._parse_function() or ( 3670 not self._match(TokenType.ALIAS, advance=False) 3671 and self._parse_var(any_token=True, upper=True) 3672 ) 3673 3674 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3675 # each INTERVAL expression into this canonical form so it's easy to transpile 3676 if this and this.is_number: 3677 this = exp.Literal.string(this.name) 3678 elif this and this.is_string: 3679 parts = this.name.split() 3680 3681 if len(parts) == 2: 3682 if unit: 3683 # This is not actually a unit, it's something else (e.g. a "window side") 3684 unit = None 3685 self._retreat(self._index - 1) 3686 3687 this = exp.Literal.string(parts[0]) 3688 unit = self.expression(exp.Var, this=parts[1].upper()) 3689 3690 return self.expression(exp.Interval, this=this, unit=unit) 3691 3692 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3693 this = self._parse_term() 3694 3695 while True: 3696 if self._match_set(self.BITWISE): 3697 this = self.expression( 3698 self.BITWISE[self._prev.token_type], 3699 this=this, 3700 expression=self._parse_term(), 3701 ) 3702 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3703 this = self.expression( 3704 exp.DPipe, 3705 this=this, 3706 expression=self._parse_term(), 3707 safe=not self.dialect.STRICT_STRING_CONCAT, 3708 ) 3709 elif self._match(TokenType.DQMARK): 3710 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3711 elif self._match_pair(TokenType.LT, TokenType.LT): 3712 this = self.expression( 3713 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3714 ) 3715 elif self._match_pair(TokenType.GT, TokenType.GT): 3716 this = self.expression( 3717 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3718 ) 3719 else: 3720 break 3721 3722 return this 3723 3724 def _parse_term(self) -> t.Optional[exp.Expression]: 3725 return self._parse_tokens(self._parse_factor, self.TERM) 3726 3727 def _parse_factor(self) -> t.Optional[exp.Expression]: 3728 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3729 this = parse_method() 3730 3731 while self._match_set(self.FACTOR): 3732 this = self.expression( 3733 self.FACTOR[self._prev.token_type], 3734 this=this, 3735 comments=self._prev_comments, 3736 expression=parse_method(), 3737 ) 3738 if isinstance(this, exp.Div): 3739 this.args["typed"] = self.dialect.TYPED_DIVISION 3740 this.args["safe"] = self.dialect.SAFE_DIVISION 3741 3742 return this 3743 3744 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3745 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3746 3747 def _parse_unary(self) -> t.Optional[exp.Expression]: 3748 if self._match_set(self.UNARY_PARSERS): 3749 return self.UNARY_PARSERS[self._prev.token_type](self) 3750 return self._parse_at_time_zone(self._parse_type()) 3751 3752 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3753 interval = parse_interval and self._parse_interval() 3754 if interval: 3755 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3756 while True: 3757 index = self._index 3758 self._match(TokenType.PLUS) 3759 3760 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3761 self._retreat(index) 3762 break 3763 3764 interval = self.expression( # type: ignore 3765 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3766 ) 3767 3768 return interval 3769 3770 index = self._index 3771 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3772 this = self._parse_column() 3773 3774 if data_type: 3775 if isinstance(this, exp.Literal): 3776 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3777 if parser: 3778 return parser(self, this, data_type) 3779 return self.expression(exp.Cast, this=this, to=data_type) 3780 if not data_type.expressions: 3781 self._retreat(index) 3782 return self._parse_column() 3783 return self._parse_column_ops(data_type) 3784 3785 return this and self._parse_column_ops(this) 3786 3787 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3788 this = self._parse_type() 3789 if not this: 3790 return None 3791 3792 return self.expression( 3793 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3794 ) 3795 3796 def _parse_types( 3797 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3798 ) -> t.Optional[exp.Expression]: 3799 index = self._index 3800 3801 prefix = self._match_text_seq("SYSUDTLIB", ".") 3802 3803 if not self._match_set(self.TYPE_TOKENS): 3804 identifier = allow_identifiers and self._parse_id_var( 3805 any_token=False, tokens=(TokenType.VAR,) 3806 ) 3807 if identifier: 3808 tokens = self.dialect.tokenize(identifier.name) 3809 3810 if len(tokens) != 1: 3811 self.raise_error("Unexpected identifier", self._prev) 3812 3813 if tokens[0].token_type in self.TYPE_TOKENS: 3814 self._prev = tokens[0] 3815 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3816 type_name = identifier.name 3817 3818 while self._match(TokenType.DOT): 3819 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3820 3821 return exp.DataType.build(type_name, udt=True) 3822 else: 3823 self._retreat(self._index - 1) 3824 return None 3825 else: 3826 return None 3827 3828 type_token = self._prev.token_type 3829 3830 if type_token == TokenType.PSEUDO_TYPE: 3831 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3832 3833 if type_token == TokenType.OBJECT_IDENTIFIER: 3834 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3835 3836 nested = type_token in self.NESTED_TYPE_TOKENS 3837 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3838 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3839 expressions = None 3840 maybe_func = False 3841 3842 if self._match(TokenType.L_PAREN): 3843 if is_struct: 3844 expressions = self._parse_csv(self._parse_struct_types) 3845 elif nested: 3846 expressions = self._parse_csv( 3847 lambda: self._parse_types( 3848 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3849 ) 3850 ) 3851 elif type_token in self.ENUM_TYPE_TOKENS: 3852 expressions = self._parse_csv(self._parse_equality) 3853 elif is_aggregate: 3854 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3855 any_token=False, tokens=(TokenType.VAR,) 3856 ) 3857 if not func_or_ident or not self._match(TokenType.COMMA): 3858 return None 3859 expressions = self._parse_csv( 3860 lambda: self._parse_types( 3861 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3862 ) 3863 ) 3864 expressions.insert(0, func_or_ident) 3865 else: 3866 expressions = self._parse_csv(self._parse_type_size) 3867 3868 if not expressions or not self._match(TokenType.R_PAREN): 3869 self._retreat(index) 3870 return None 3871 3872 maybe_func = True 3873 3874 this: t.Optional[exp.Expression] = None 3875 values: t.Optional[t.List[exp.Expression]] = None 3876 3877 if nested and self._match(TokenType.LT): 3878 if is_struct: 3879 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3880 else: 3881 expressions = self._parse_csv( 3882 lambda: self._parse_types( 3883 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3884 ) 3885 ) 3886 3887 if not self._match(TokenType.GT): 3888 self.raise_error("Expecting >") 3889 3890 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3891 values = self._parse_csv(self._parse_conjunction) 3892 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3893 3894 if type_token in self.TIMESTAMPS: 3895 if self._match_text_seq("WITH", "TIME", "ZONE"): 3896 maybe_func = False 3897 tz_type = ( 3898 exp.DataType.Type.TIMETZ 3899 if type_token in self.TIMES 3900 else exp.DataType.Type.TIMESTAMPTZ 3901 ) 3902 this = exp.DataType(this=tz_type, expressions=expressions) 3903 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3904 maybe_func = False 3905 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3906 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3907 maybe_func = False 3908 elif type_token == TokenType.INTERVAL: 3909 unit = self._parse_var() 3910 3911 if self._match_text_seq("TO"): 3912 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3913 else: 3914 span = None 3915 3916 if span or not unit: 3917 this = self.expression( 3918 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3919 ) 3920 else: 3921 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3922 3923 if maybe_func and check_func: 3924 index2 = self._index 3925 peek = self._parse_string() 3926 3927 if not peek: 3928 self._retreat(index) 3929 return None 3930 3931 self._retreat(index2) 3932 3933 if not this: 3934 if self._match_text_seq("UNSIGNED"): 3935 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3936 if not unsigned_type_token: 3937 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3938 3939 type_token = unsigned_type_token or type_token 3940 3941 this = exp.DataType( 3942 this=exp.DataType.Type[type_token.value], 3943 expressions=expressions, 3944 nested=nested, 3945 values=values, 3946 prefix=prefix, 3947 ) 3948 3949 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3950 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3951 3952 return this 3953 3954 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3955 index = self._index 3956 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3957 self._match(TokenType.COLON) 3958 column_def = self._parse_column_def(this) 3959 3960 if type_required and ( 3961 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3962 ): 3963 self._retreat(index) 3964 return self._parse_types() 3965 3966 return column_def 3967 3968 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3969 if not self._match_text_seq("AT", "TIME", "ZONE"): 3970 return this 3971 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3972 3973 def _parse_column(self) -> t.Optional[exp.Expression]: 3974 this = self._parse_column_reference() 3975 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3976 3977 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3978 this = self._parse_field() 3979 if ( 3980 not this 3981 and self._match(TokenType.VALUES, advance=False) 3982 and self.VALUES_FOLLOWED_BY_PAREN 3983 and (not self._next or self._next.token_type != TokenType.L_PAREN) 3984 ): 3985 this = self._parse_id_var() 3986 3987 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 3988 3989 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3990 this = self._parse_bracket(this) 3991 3992 while self._match_set(self.COLUMN_OPERATORS): 3993 op_token = self._prev.token_type 3994 op = self.COLUMN_OPERATORS.get(op_token) 3995 3996 if op_token == TokenType.DCOLON: 3997 field = self._parse_types() 3998 if not field: 3999 self.raise_error("Expected type") 4000 elif op and self._curr: 4001 field = self._parse_column_reference() 4002 else: 4003 field = self._parse_field(anonymous_func=True, any_token=True) 4004 4005 if isinstance(field, exp.Func): 4006 # bigquery allows function calls like x.y.count(...) 4007 # SAFE.SUBSTR(...) 4008 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4009 this = self._replace_columns_with_dots(this) 4010 4011 if op: 4012 this = op(self, this, field) 4013 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4014 this = self.expression( 4015 exp.Column, 4016 this=field, 4017 table=this.this, 4018 db=this.args.get("table"), 4019 catalog=this.args.get("db"), 4020 ) 4021 else: 4022 this = self.expression(exp.Dot, this=this, expression=field) 4023 this = self._parse_bracket(this) 4024 return this 4025 4026 def _parse_primary(self) -> t.Optional[exp.Expression]: 4027 if self._match_set(self.PRIMARY_PARSERS): 4028 token_type = self._prev.token_type 4029 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4030 4031 if token_type == TokenType.STRING: 4032 expressions = [primary] 4033 while self._match(TokenType.STRING): 4034 expressions.append(exp.Literal.string(self._prev.text)) 4035 4036 if len(expressions) > 1: 4037 return self.expression(exp.Concat, expressions=expressions) 4038 4039 return primary 4040 4041 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4042 return exp.Literal.number(f"0.{self._prev.text}") 4043 4044 if self._match(TokenType.L_PAREN): 4045 comments = self._prev_comments 4046 query = self._parse_select() 4047 4048 if query: 4049 expressions = [query] 4050 else: 4051 expressions = self._parse_expressions() 4052 4053 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4054 4055 if isinstance(this, exp.UNWRAPPED_QUERIES): 4056 this = self._parse_set_operations( 4057 self._parse_subquery(this=this, parse_alias=False) 4058 ) 4059 elif len(expressions) > 1: 4060 this = self.expression(exp.Tuple, expressions=expressions) 4061 else: 4062 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 4063 4064 if this: 4065 this.add_comments(comments) 4066 4067 self._match_r_paren(expression=this) 4068 return this 4069 4070 return None 4071 4072 def _parse_field( 4073 self, 4074 any_token: bool = False, 4075 tokens: t.Optional[t.Collection[TokenType]] = None, 4076 anonymous_func: bool = False, 4077 ) -> t.Optional[exp.Expression]: 4078 return ( 4079 self._parse_primary() 4080 or self._parse_function(anonymous=anonymous_func) 4081 or self._parse_id_var(any_token=any_token, tokens=tokens) 4082 ) 4083 4084 def _parse_function( 4085 self, 4086 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4087 anonymous: bool = False, 4088 optional_parens: bool = True, 4089 ) -> t.Optional[exp.Expression]: 4090 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4091 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4092 fn_syntax = False 4093 if ( 4094 self._match(TokenType.L_BRACE, advance=False) 4095 and self._next 4096 and self._next.text.upper() == "FN" 4097 ): 4098 self._advance(2) 4099 fn_syntax = True 4100 4101 func = self._parse_function_call( 4102 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4103 ) 4104 4105 if fn_syntax: 4106 self._match(TokenType.R_BRACE) 4107 4108 return func 4109 4110 def _parse_function_call( 4111 self, 4112 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4113 anonymous: bool = False, 4114 optional_parens: bool = True, 4115 ) -> t.Optional[exp.Expression]: 4116 if not self._curr: 4117 return None 4118 4119 comments = self._curr.comments 4120 token_type = self._curr.token_type 4121 this = self._curr.text 4122 upper = this.upper() 4123 4124 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4125 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4126 self._advance() 4127 return parser(self) 4128 4129 if not self._next or self._next.token_type != TokenType.L_PAREN: 4130 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4131 self._advance() 4132 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4133 4134 return None 4135 4136 if token_type not in self.FUNC_TOKENS: 4137 return None 4138 4139 self._advance(2) 4140 4141 parser = self.FUNCTION_PARSERS.get(upper) 4142 if parser and not anonymous: 4143 this = parser(self) 4144 else: 4145 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4146 4147 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4148 this = self.expression(subquery_predicate, this=self._parse_select()) 4149 self._match_r_paren() 4150 return this 4151 4152 if functions is None: 4153 functions = self.FUNCTIONS 4154 4155 function = functions.get(upper) 4156 4157 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4158 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4159 4160 if alias: 4161 args = self._kv_to_prop_eq(args) 4162 4163 if function and not anonymous: 4164 if "dialect" in function.__code__.co_varnames: 4165 func = function(args, dialect=self.dialect) 4166 else: 4167 func = function(args) 4168 4169 func = self.validate_expression(func, args) 4170 if not self.dialect.NORMALIZE_FUNCTIONS: 4171 func.meta["name"] = this 4172 4173 this = func 4174 else: 4175 if token_type == TokenType.IDENTIFIER: 4176 this = exp.Identifier(this=this, quoted=True) 4177 this = self.expression(exp.Anonymous, this=this, expressions=args) 4178 4179 if isinstance(this, exp.Expression): 4180 this.add_comments(comments) 4181 4182 self._match_r_paren(this) 4183 return self._parse_window(this) 4184 4185 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4186 transformed = [] 4187 4188 for e in expressions: 4189 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4190 if isinstance(e, exp.Alias): 4191 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4192 4193 if not isinstance(e, exp.PropertyEQ): 4194 e = self.expression( 4195 exp.PropertyEQ, this=exp.to_identifier(e.name), expression=e.expression 4196 ) 4197 4198 if isinstance(e.this, exp.Column): 4199 e.this.replace(e.this.this) 4200 4201 transformed.append(e) 4202 4203 return transformed 4204 4205 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4206 return self._parse_column_def(self._parse_id_var()) 4207 4208 def _parse_user_defined_function( 4209 self, kind: t.Optional[TokenType] = None 4210 ) -> t.Optional[exp.Expression]: 4211 this = self._parse_id_var() 4212 4213 while self._match(TokenType.DOT): 4214 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4215 4216 if not self._match(TokenType.L_PAREN): 4217 return this 4218 4219 expressions = self._parse_csv(self._parse_function_parameter) 4220 self._match_r_paren() 4221 return self.expression( 4222 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4223 ) 4224 4225 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4226 literal = self._parse_primary() 4227 if literal: 4228 return self.expression(exp.Introducer, this=token.text, expression=literal) 4229 4230 return self.expression(exp.Identifier, this=token.text) 4231 4232 def _parse_session_parameter(self) -> exp.SessionParameter: 4233 kind = None 4234 this = self._parse_id_var() or self._parse_primary() 4235 4236 if this and self._match(TokenType.DOT): 4237 kind = this.name 4238 this = self._parse_var() or self._parse_primary() 4239 4240 return self.expression(exp.SessionParameter, this=this, kind=kind) 4241 4242 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4243 index = self._index 4244 4245 if self._match(TokenType.L_PAREN): 4246 expressions = t.cast( 4247 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4248 ) 4249 4250 if not self._match(TokenType.R_PAREN): 4251 self._retreat(index) 4252 else: 4253 expressions = [self._parse_id_var()] 4254 4255 if self._match_set(self.LAMBDAS): 4256 return self.LAMBDAS[self._prev.token_type](self, expressions) 4257 4258 self._retreat(index) 4259 4260 this: t.Optional[exp.Expression] 4261 4262 if self._match(TokenType.DISTINCT): 4263 this = self.expression( 4264 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4265 ) 4266 else: 4267 this = self._parse_select_or_expression(alias=alias) 4268 4269 return self._parse_limit( 4270 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4271 ) 4272 4273 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4274 index = self._index 4275 4276 if not self.errors: 4277 try: 4278 if self._parse_select(nested=True): 4279 return this 4280 except ParseError: 4281 pass 4282 finally: 4283 self.errors.clear() 4284 self._retreat(index) 4285 4286 if not self._match(TokenType.L_PAREN): 4287 return this 4288 4289 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4290 4291 self._match_r_paren() 4292 return self.expression(exp.Schema, this=this, expressions=args) 4293 4294 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4295 return self._parse_column_def(self._parse_field(any_token=True)) 4296 4297 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4298 # column defs are not really columns, they're identifiers 4299 if isinstance(this, exp.Column): 4300 this = this.this 4301 4302 kind = self._parse_types(schema=True) 4303 4304 if self._match_text_seq("FOR", "ORDINALITY"): 4305 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4306 4307 constraints: t.List[exp.Expression] = [] 4308 4309 if not kind and self._match(TokenType.ALIAS): 4310 constraints.append( 4311 self.expression( 4312 exp.ComputedColumnConstraint, 4313 this=self._parse_conjunction(), 4314 persisted=self._match_text_seq("PERSISTED"), 4315 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4316 ) 4317 ) 4318 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4319 self._match(TokenType.ALIAS) 4320 constraints.append( 4321 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4322 ) 4323 4324 while True: 4325 constraint = self._parse_column_constraint() 4326 if not constraint: 4327 break 4328 constraints.append(constraint) 4329 4330 if not kind and not constraints: 4331 return this 4332 4333 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4334 4335 def _parse_auto_increment( 4336 self, 4337 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4338 start = None 4339 increment = None 4340 4341 if self._match(TokenType.L_PAREN, advance=False): 4342 args = self._parse_wrapped_csv(self._parse_bitwise) 4343 start = seq_get(args, 0) 4344 increment = seq_get(args, 1) 4345 elif self._match_text_seq("START"): 4346 start = self._parse_bitwise() 4347 self._match_text_seq("INCREMENT") 4348 increment = self._parse_bitwise() 4349 4350 if start and increment: 4351 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4352 4353 return exp.AutoIncrementColumnConstraint() 4354 4355 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4356 if not self._match_text_seq("REFRESH"): 4357 self._retreat(self._index - 1) 4358 return None 4359 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4360 4361 def _parse_compress(self) -> exp.CompressColumnConstraint: 4362 if self._match(TokenType.L_PAREN, advance=False): 4363 return self.expression( 4364 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4365 ) 4366 4367 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4368 4369 def _parse_generated_as_identity( 4370 self, 4371 ) -> ( 4372 exp.GeneratedAsIdentityColumnConstraint 4373 | exp.ComputedColumnConstraint 4374 | exp.GeneratedAsRowColumnConstraint 4375 ): 4376 if self._match_text_seq("BY", "DEFAULT"): 4377 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4378 this = self.expression( 4379 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4380 ) 4381 else: 4382 self._match_text_seq("ALWAYS") 4383 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4384 4385 self._match(TokenType.ALIAS) 4386 4387 if self._match_text_seq("ROW"): 4388 start = self._match_text_seq("START") 4389 if not start: 4390 self._match(TokenType.END) 4391 hidden = self._match_text_seq("HIDDEN") 4392 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4393 4394 identity = self._match_text_seq("IDENTITY") 4395 4396 if self._match(TokenType.L_PAREN): 4397 if self._match(TokenType.START_WITH): 4398 this.set("start", self._parse_bitwise()) 4399 if self._match_text_seq("INCREMENT", "BY"): 4400 this.set("increment", self._parse_bitwise()) 4401 if self._match_text_seq("MINVALUE"): 4402 this.set("minvalue", self._parse_bitwise()) 4403 if self._match_text_seq("MAXVALUE"): 4404 this.set("maxvalue", self._parse_bitwise()) 4405 4406 if self._match_text_seq("CYCLE"): 4407 this.set("cycle", True) 4408 elif self._match_text_seq("NO", "CYCLE"): 4409 this.set("cycle", False) 4410 4411 if not identity: 4412 this.set("expression", self._parse_bitwise()) 4413 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4414 args = self._parse_csv(self._parse_bitwise) 4415 this.set("start", seq_get(args, 0)) 4416 this.set("increment", seq_get(args, 1)) 4417 4418 self._match_r_paren() 4419 4420 return this 4421 4422 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4423 self._match_text_seq("LENGTH") 4424 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4425 4426 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4427 if self._match_text_seq("NULL"): 4428 return self.expression(exp.NotNullColumnConstraint) 4429 if self._match_text_seq("CASESPECIFIC"): 4430 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4431 if self._match_text_seq("FOR", "REPLICATION"): 4432 return self.expression(exp.NotForReplicationColumnConstraint) 4433 return None 4434 4435 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4436 if self._match(TokenType.CONSTRAINT): 4437 this = self._parse_id_var() 4438 else: 4439 this = None 4440 4441 if self._match_texts(self.CONSTRAINT_PARSERS): 4442 return self.expression( 4443 exp.ColumnConstraint, 4444 this=this, 4445 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4446 ) 4447 4448 return this 4449 4450 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4451 if not self._match(TokenType.CONSTRAINT): 4452 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4453 4454 return self.expression( 4455 exp.Constraint, 4456 this=self._parse_id_var(), 4457 expressions=self._parse_unnamed_constraints(), 4458 ) 4459 4460 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4461 constraints = [] 4462 while True: 4463 constraint = self._parse_unnamed_constraint() or self._parse_function() 4464 if not constraint: 4465 break 4466 constraints.append(constraint) 4467 4468 return constraints 4469 4470 def _parse_unnamed_constraint( 4471 self, constraints: t.Optional[t.Collection[str]] = None 4472 ) -> t.Optional[exp.Expression]: 4473 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4474 constraints or self.CONSTRAINT_PARSERS 4475 ): 4476 return None 4477 4478 constraint = self._prev.text.upper() 4479 if constraint not in self.CONSTRAINT_PARSERS: 4480 self.raise_error(f"No parser found for schema constraint {constraint}.") 4481 4482 return self.CONSTRAINT_PARSERS[constraint](self) 4483 4484 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4485 self._match_text_seq("KEY") 4486 return self.expression( 4487 exp.UniqueColumnConstraint, 4488 this=self._parse_schema(self._parse_id_var(any_token=False)), 4489 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4490 on_conflict=self._parse_on_conflict(), 4491 ) 4492 4493 def _parse_key_constraint_options(self) -> t.List[str]: 4494 options = [] 4495 while True: 4496 if not self._curr: 4497 break 4498 4499 if self._match(TokenType.ON): 4500 action = None 4501 on = self._advance_any() and self._prev.text 4502 4503 if self._match_text_seq("NO", "ACTION"): 4504 action = "NO ACTION" 4505 elif self._match_text_seq("CASCADE"): 4506 action = "CASCADE" 4507 elif self._match_text_seq("RESTRICT"): 4508 action = "RESTRICT" 4509 elif self._match_pair(TokenType.SET, TokenType.NULL): 4510 action = "SET NULL" 4511 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4512 action = "SET DEFAULT" 4513 else: 4514 self.raise_error("Invalid key constraint") 4515 4516 options.append(f"ON {on} {action}") 4517 elif self._match_text_seq("NOT", "ENFORCED"): 4518 options.append("NOT ENFORCED") 4519 elif self._match_text_seq("DEFERRABLE"): 4520 options.append("DEFERRABLE") 4521 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4522 options.append("INITIALLY DEFERRED") 4523 elif self._match_text_seq("NORELY"): 4524 options.append("NORELY") 4525 elif self._match_text_seq("MATCH", "FULL"): 4526 options.append("MATCH FULL") 4527 else: 4528 break 4529 4530 return options 4531 4532 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4533 if match and not self._match(TokenType.REFERENCES): 4534 return None 4535 4536 expressions = None 4537 this = self._parse_table(schema=True) 4538 options = self._parse_key_constraint_options() 4539 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4540 4541 def _parse_foreign_key(self) -> exp.ForeignKey: 4542 expressions = self._parse_wrapped_id_vars() 4543 reference = self._parse_references() 4544 options = {} 4545 4546 while self._match(TokenType.ON): 4547 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4548 self.raise_error("Expected DELETE or UPDATE") 4549 4550 kind = self._prev.text.lower() 4551 4552 if self._match_text_seq("NO", "ACTION"): 4553 action = "NO ACTION" 4554 elif self._match(TokenType.SET): 4555 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4556 action = "SET " + self._prev.text.upper() 4557 else: 4558 self._advance() 4559 action = self._prev.text.upper() 4560 4561 options[kind] = action 4562 4563 return self.expression( 4564 exp.ForeignKey, 4565 expressions=expressions, 4566 reference=reference, 4567 **options, # type: ignore 4568 ) 4569 4570 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4571 return self._parse_field() 4572 4573 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4574 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4575 self._retreat(self._index - 1) 4576 return None 4577 4578 id_vars = self._parse_wrapped_id_vars() 4579 return self.expression( 4580 exp.PeriodForSystemTimeConstraint, 4581 this=seq_get(id_vars, 0), 4582 expression=seq_get(id_vars, 1), 4583 ) 4584 4585 def _parse_primary_key( 4586 self, wrapped_optional: bool = False, in_props: bool = False 4587 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4588 desc = ( 4589 self._match_set((TokenType.ASC, TokenType.DESC)) 4590 and self._prev.token_type == TokenType.DESC 4591 ) 4592 4593 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4594 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4595 4596 expressions = self._parse_wrapped_csv( 4597 self._parse_primary_key_part, optional=wrapped_optional 4598 ) 4599 options = self._parse_key_constraint_options() 4600 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4601 4602 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4603 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4604 4605 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4606 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4607 return this 4608 4609 bracket_kind = self._prev.token_type 4610 expressions = self._parse_csv( 4611 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4612 ) 4613 4614 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4615 self.raise_error("Expected ]") 4616 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4617 self.raise_error("Expected }") 4618 4619 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4620 if bracket_kind == TokenType.L_BRACE: 4621 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4622 elif not this or this.name.upper() == "ARRAY": 4623 this = self.expression(exp.Array, expressions=expressions) 4624 else: 4625 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4626 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4627 4628 self._add_comments(this) 4629 return self._parse_bracket(this) 4630 4631 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4632 if self._match(TokenType.COLON): 4633 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4634 return this 4635 4636 def _parse_case(self) -> t.Optional[exp.Expression]: 4637 ifs = [] 4638 default = None 4639 4640 comments = self._prev_comments 4641 expression = self._parse_conjunction() 4642 4643 while self._match(TokenType.WHEN): 4644 this = self._parse_conjunction() 4645 self._match(TokenType.THEN) 4646 then = self._parse_conjunction() 4647 ifs.append(self.expression(exp.If, this=this, true=then)) 4648 4649 if self._match(TokenType.ELSE): 4650 default = self._parse_conjunction() 4651 4652 if not self._match(TokenType.END): 4653 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4654 default = exp.column("interval") 4655 else: 4656 self.raise_error("Expected END after CASE", self._prev) 4657 4658 return self._parse_window( 4659 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4660 ) 4661 4662 def _parse_if(self) -> t.Optional[exp.Expression]: 4663 if self._match(TokenType.L_PAREN): 4664 args = self._parse_csv(self._parse_conjunction) 4665 this = self.validate_expression(exp.If.from_arg_list(args), args) 4666 self._match_r_paren() 4667 else: 4668 index = self._index - 1 4669 4670 if self.NO_PAREN_IF_COMMANDS and index == 0: 4671 return self._parse_as_command(self._prev) 4672 4673 condition = self._parse_conjunction() 4674 4675 if not condition: 4676 self._retreat(index) 4677 return None 4678 4679 self._match(TokenType.THEN) 4680 true = self._parse_conjunction() 4681 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4682 self._match(TokenType.END) 4683 this = self.expression(exp.If, this=condition, true=true, false=false) 4684 4685 return self._parse_window(this) 4686 4687 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4688 if not self._match_text_seq("VALUE", "FOR"): 4689 self._retreat(self._index - 1) 4690 return None 4691 4692 return self.expression( 4693 exp.NextValueFor, 4694 this=self._parse_column(), 4695 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4696 ) 4697 4698 def _parse_extract(self) -> exp.Extract: 4699 this = self._parse_function() or self._parse_var() or self._parse_type() 4700 4701 if self._match(TokenType.FROM): 4702 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4703 4704 if not self._match(TokenType.COMMA): 4705 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4706 4707 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4708 4709 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4710 this = self._parse_conjunction() 4711 4712 if not self._match(TokenType.ALIAS): 4713 if self._match(TokenType.COMMA): 4714 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4715 4716 self.raise_error("Expected AS after CAST") 4717 4718 fmt = None 4719 to = self._parse_types() 4720 4721 if self._match(TokenType.FORMAT): 4722 fmt_string = self._parse_string() 4723 fmt = self._parse_at_time_zone(fmt_string) 4724 4725 if not to: 4726 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4727 if to.this in exp.DataType.TEMPORAL_TYPES: 4728 this = self.expression( 4729 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4730 this=this, 4731 format=exp.Literal.string( 4732 format_time( 4733 fmt_string.this if fmt_string else "", 4734 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4735 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4736 ) 4737 ), 4738 ) 4739 4740 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4741 this.set("zone", fmt.args["zone"]) 4742 return this 4743 elif not to: 4744 self.raise_error("Expected TYPE after CAST") 4745 elif isinstance(to, exp.Identifier): 4746 to = exp.DataType.build(to.name, udt=True) 4747 elif to.this == exp.DataType.Type.CHAR: 4748 if self._match(TokenType.CHARACTER_SET): 4749 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4750 4751 return self.expression( 4752 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4753 ) 4754 4755 def _parse_string_agg(self) -> exp.Expression: 4756 if self._match(TokenType.DISTINCT): 4757 args: t.List[t.Optional[exp.Expression]] = [ 4758 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4759 ] 4760 if self._match(TokenType.COMMA): 4761 args.extend(self._parse_csv(self._parse_conjunction)) 4762 else: 4763 args = self._parse_csv(self._parse_conjunction) # type: ignore 4764 4765 index = self._index 4766 if not self._match(TokenType.R_PAREN) and args: 4767 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4768 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4769 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4770 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4771 4772 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4773 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4774 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4775 if not self._match_text_seq("WITHIN", "GROUP"): 4776 self._retreat(index) 4777 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4778 4779 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4780 order = self._parse_order(this=seq_get(args, 0)) 4781 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4782 4783 def _parse_convert( 4784 self, strict: bool, safe: t.Optional[bool] = None 4785 ) -> t.Optional[exp.Expression]: 4786 this = self._parse_bitwise() 4787 4788 if self._match(TokenType.USING): 4789 to: t.Optional[exp.Expression] = self.expression( 4790 exp.CharacterSet, this=self._parse_var() 4791 ) 4792 elif self._match(TokenType.COMMA): 4793 to = self._parse_types() 4794 else: 4795 to = None 4796 4797 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4798 4799 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4800 """ 4801 There are generally two variants of the DECODE function: 4802 4803 - DECODE(bin, charset) 4804 - DECODE(expression, search, result [, search, result] ... [, default]) 4805 4806 The second variant will always be parsed into a CASE expression. Note that NULL 4807 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4808 instead of relying on pattern matching. 4809 """ 4810 args = self._parse_csv(self._parse_conjunction) 4811 4812 if len(args) < 3: 4813 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4814 4815 expression, *expressions = args 4816 if not expression: 4817 return None 4818 4819 ifs = [] 4820 for search, result in zip(expressions[::2], expressions[1::2]): 4821 if not search or not result: 4822 return None 4823 4824 if isinstance(search, exp.Literal): 4825 ifs.append( 4826 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4827 ) 4828 elif isinstance(search, exp.Null): 4829 ifs.append( 4830 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4831 ) 4832 else: 4833 cond = exp.or_( 4834 exp.EQ(this=expression.copy(), expression=search), 4835 exp.and_( 4836 exp.Is(this=expression.copy(), expression=exp.Null()), 4837 exp.Is(this=search.copy(), expression=exp.Null()), 4838 copy=False, 4839 ), 4840 copy=False, 4841 ) 4842 ifs.append(exp.If(this=cond, true=result)) 4843 4844 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4845 4846 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4847 self._match_text_seq("KEY") 4848 key = self._parse_column() 4849 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4850 self._match_text_seq("VALUE") 4851 value = self._parse_bitwise() 4852 4853 if not key and not value: 4854 return None 4855 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4856 4857 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4858 if not this or not self._match_text_seq("FORMAT", "JSON"): 4859 return this 4860 4861 return self.expression(exp.FormatJson, this=this) 4862 4863 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4864 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4865 for value in values: 4866 if self._match_text_seq(value, "ON", on): 4867 return f"{value} ON {on}" 4868 4869 return None 4870 4871 @t.overload 4872 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 4873 4874 @t.overload 4875 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 4876 4877 def _parse_json_object(self, agg=False): 4878 star = self._parse_star() 4879 expressions = ( 4880 [star] 4881 if star 4882 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4883 ) 4884 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4885 4886 unique_keys = None 4887 if self._match_text_seq("WITH", "UNIQUE"): 4888 unique_keys = True 4889 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4890 unique_keys = False 4891 4892 self._match_text_seq("KEYS") 4893 4894 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4895 self._parse_type() 4896 ) 4897 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4898 4899 return self.expression( 4900 exp.JSONObjectAgg if agg else exp.JSONObject, 4901 expressions=expressions, 4902 null_handling=null_handling, 4903 unique_keys=unique_keys, 4904 return_type=return_type, 4905 encoding=encoding, 4906 ) 4907 4908 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4909 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4910 if not self._match_text_seq("NESTED"): 4911 this = self._parse_id_var() 4912 kind = self._parse_types(allow_identifiers=False) 4913 nested = None 4914 else: 4915 this = None 4916 kind = None 4917 nested = True 4918 4919 path = self._match_text_seq("PATH") and self._parse_string() 4920 nested_schema = nested and self._parse_json_schema() 4921 4922 return self.expression( 4923 exp.JSONColumnDef, 4924 this=this, 4925 kind=kind, 4926 path=path, 4927 nested_schema=nested_schema, 4928 ) 4929 4930 def _parse_json_schema(self) -> exp.JSONSchema: 4931 self._match_text_seq("COLUMNS") 4932 return self.expression( 4933 exp.JSONSchema, 4934 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4935 ) 4936 4937 def _parse_json_table(self) -> exp.JSONTable: 4938 this = self._parse_format_json(self._parse_bitwise()) 4939 path = self._match(TokenType.COMMA) and self._parse_string() 4940 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4941 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4942 schema = self._parse_json_schema() 4943 4944 return exp.JSONTable( 4945 this=this, 4946 schema=schema, 4947 path=path, 4948 error_handling=error_handling, 4949 empty_handling=empty_handling, 4950 ) 4951 4952 def _parse_match_against(self) -> exp.MatchAgainst: 4953 expressions = self._parse_csv(self._parse_column) 4954 4955 self._match_text_seq(")", "AGAINST", "(") 4956 4957 this = self._parse_string() 4958 4959 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4960 modifier = "IN NATURAL LANGUAGE MODE" 4961 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4962 modifier = f"{modifier} WITH QUERY EXPANSION" 4963 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4964 modifier = "IN BOOLEAN MODE" 4965 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4966 modifier = "WITH QUERY EXPANSION" 4967 else: 4968 modifier = None 4969 4970 return self.expression( 4971 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4972 ) 4973 4974 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4975 def _parse_open_json(self) -> exp.OpenJSON: 4976 this = self._parse_bitwise() 4977 path = self._match(TokenType.COMMA) and self._parse_string() 4978 4979 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4980 this = self._parse_field(any_token=True) 4981 kind = self._parse_types() 4982 path = self._parse_string() 4983 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4984 4985 return self.expression( 4986 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4987 ) 4988 4989 expressions = None 4990 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4991 self._match_l_paren() 4992 expressions = self._parse_csv(_parse_open_json_column_def) 4993 4994 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4995 4996 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4997 args = self._parse_csv(self._parse_bitwise) 4998 4999 if self._match(TokenType.IN): 5000 return self.expression( 5001 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5002 ) 5003 5004 if haystack_first: 5005 haystack = seq_get(args, 0) 5006 needle = seq_get(args, 1) 5007 else: 5008 needle = seq_get(args, 0) 5009 haystack = seq_get(args, 1) 5010 5011 return self.expression( 5012 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5013 ) 5014 5015 def _parse_predict(self) -> exp.Predict: 5016 self._match_text_seq("MODEL") 5017 this = self._parse_table() 5018 5019 self._match(TokenType.COMMA) 5020 self._match_text_seq("TABLE") 5021 5022 return self.expression( 5023 exp.Predict, 5024 this=this, 5025 expression=self._parse_table(), 5026 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5027 ) 5028 5029 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5030 args = self._parse_csv(self._parse_table) 5031 return exp.JoinHint(this=func_name.upper(), expressions=args) 5032 5033 def _parse_substring(self) -> exp.Substring: 5034 # Postgres supports the form: substring(string [from int] [for int]) 5035 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5036 5037 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5038 5039 if self._match(TokenType.FROM): 5040 args.append(self._parse_bitwise()) 5041 if self._match(TokenType.FOR): 5042 args.append(self._parse_bitwise()) 5043 5044 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5045 5046 def _parse_trim(self) -> exp.Trim: 5047 # https://www.w3resource.com/sql/character-functions/trim.php 5048 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5049 5050 position = None 5051 collation = None 5052 expression = None 5053 5054 if self._match_texts(self.TRIM_TYPES): 5055 position = self._prev.text.upper() 5056 5057 this = self._parse_bitwise() 5058 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5059 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5060 expression = self._parse_bitwise() 5061 5062 if invert_order: 5063 this, expression = expression, this 5064 5065 if self._match(TokenType.COLLATE): 5066 collation = self._parse_bitwise() 5067 5068 return self.expression( 5069 exp.Trim, this=this, position=position, expression=expression, collation=collation 5070 ) 5071 5072 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5073 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5074 5075 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5076 return self._parse_window(self._parse_id_var(), alias=True) 5077 5078 def _parse_respect_or_ignore_nulls( 5079 self, this: t.Optional[exp.Expression] 5080 ) -> t.Optional[exp.Expression]: 5081 if self._match_text_seq("IGNORE", "NULLS"): 5082 return self.expression(exp.IgnoreNulls, this=this) 5083 if self._match_text_seq("RESPECT", "NULLS"): 5084 return self.expression(exp.RespectNulls, this=this) 5085 return this 5086 5087 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5088 if self._match(TokenType.HAVING): 5089 self._match_texts(("MAX", "MIN")) 5090 max = self._prev.text.upper() != "MIN" 5091 return self.expression( 5092 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5093 ) 5094 5095 return this 5096 5097 def _parse_window( 5098 self, this: t.Optional[exp.Expression], alias: bool = False 5099 ) -> t.Optional[exp.Expression]: 5100 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5101 self._match(TokenType.WHERE) 5102 this = self.expression( 5103 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5104 ) 5105 self._match_r_paren() 5106 5107 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5108 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5109 if self._match_text_seq("WITHIN", "GROUP"): 5110 order = self._parse_wrapped(self._parse_order) 5111 this = self.expression(exp.WithinGroup, this=this, expression=order) 5112 5113 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5114 # Some dialects choose to implement and some do not. 5115 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5116 5117 # There is some code above in _parse_lambda that handles 5118 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5119 5120 # The below changes handle 5121 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5122 5123 # Oracle allows both formats 5124 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5125 # and Snowflake chose to do the same for familiarity 5126 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5127 if isinstance(this, exp.AggFunc): 5128 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5129 5130 if ignore_respect and ignore_respect is not this: 5131 ignore_respect.replace(ignore_respect.this) 5132 this = self.expression(ignore_respect.__class__, this=this) 5133 5134 this = self._parse_respect_or_ignore_nulls(this) 5135 5136 # bigquery select from window x AS (partition by ...) 5137 if alias: 5138 over = None 5139 self._match(TokenType.ALIAS) 5140 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5141 return this 5142 else: 5143 over = self._prev.text.upper() 5144 5145 if not self._match(TokenType.L_PAREN): 5146 return self.expression( 5147 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5148 ) 5149 5150 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5151 5152 first = self._match(TokenType.FIRST) 5153 if self._match_text_seq("LAST"): 5154 first = False 5155 5156 partition, order = self._parse_partition_and_order() 5157 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5158 5159 if kind: 5160 self._match(TokenType.BETWEEN) 5161 start = self._parse_window_spec() 5162 self._match(TokenType.AND) 5163 end = self._parse_window_spec() 5164 5165 spec = self.expression( 5166 exp.WindowSpec, 5167 kind=kind, 5168 start=start["value"], 5169 start_side=start["side"], 5170 end=end["value"], 5171 end_side=end["side"], 5172 ) 5173 else: 5174 spec = None 5175 5176 self._match_r_paren() 5177 5178 window = self.expression( 5179 exp.Window, 5180 this=this, 5181 partition_by=partition, 5182 order=order, 5183 spec=spec, 5184 alias=window_alias, 5185 over=over, 5186 first=first, 5187 ) 5188 5189 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5190 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5191 return self._parse_window(window, alias=alias) 5192 5193 return window 5194 5195 def _parse_partition_and_order( 5196 self, 5197 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5198 return self._parse_partition_by(), self._parse_order() 5199 5200 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5201 self._match(TokenType.BETWEEN) 5202 5203 return { 5204 "value": ( 5205 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5206 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5207 or self._parse_bitwise() 5208 ), 5209 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5210 } 5211 5212 def _parse_alias( 5213 self, this: t.Optional[exp.Expression], explicit: bool = False 5214 ) -> t.Optional[exp.Expression]: 5215 any_token = self._match(TokenType.ALIAS) 5216 comments = self._prev_comments 5217 5218 if explicit and not any_token: 5219 return this 5220 5221 if self._match(TokenType.L_PAREN): 5222 aliases = self.expression( 5223 exp.Aliases, 5224 comments=comments, 5225 this=this, 5226 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5227 ) 5228 self._match_r_paren(aliases) 5229 return aliases 5230 5231 alias = self._parse_id_var(any_token) or ( 5232 self.STRING_ALIASES and self._parse_string_as_identifier() 5233 ) 5234 5235 if alias: 5236 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5237 column = this.this 5238 5239 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5240 if not this.comments and column and column.comments: 5241 this.comments = column.comments 5242 column.comments = None 5243 5244 return this 5245 5246 def _parse_id_var( 5247 self, 5248 any_token: bool = True, 5249 tokens: t.Optional[t.Collection[TokenType]] = None, 5250 ) -> t.Optional[exp.Expression]: 5251 identifier = self._parse_identifier() 5252 5253 if identifier: 5254 return identifier 5255 5256 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5257 quoted = self._prev.token_type == TokenType.STRING 5258 return exp.Identifier(this=self._prev.text, quoted=quoted) 5259 5260 return None 5261 5262 def _parse_string(self) -> t.Optional[exp.Expression]: 5263 if self._match_set(self.STRING_PARSERS): 5264 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5265 return self._parse_placeholder() 5266 5267 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5268 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5269 5270 def _parse_number(self) -> t.Optional[exp.Expression]: 5271 if self._match_set(self.NUMERIC_PARSERS): 5272 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5273 return self._parse_placeholder() 5274 5275 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5276 if self._match(TokenType.IDENTIFIER): 5277 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5278 return self._parse_placeholder() 5279 5280 def _parse_var( 5281 self, 5282 any_token: bool = False, 5283 tokens: t.Optional[t.Collection[TokenType]] = None, 5284 upper: bool = False, 5285 ) -> t.Optional[exp.Expression]: 5286 if ( 5287 (any_token and self._advance_any()) 5288 or self._match(TokenType.VAR) 5289 or (self._match_set(tokens) if tokens else False) 5290 ): 5291 return self.expression( 5292 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5293 ) 5294 return self._parse_placeholder() 5295 5296 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5297 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5298 self._advance() 5299 return self._prev 5300 return None 5301 5302 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5303 return self._parse_var() or self._parse_string() 5304 5305 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5306 return self._parse_primary() or self._parse_var(any_token=True) 5307 5308 def _parse_null(self) -> t.Optional[exp.Expression]: 5309 if self._match_set(self.NULL_TOKENS): 5310 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5311 return self._parse_placeholder() 5312 5313 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5314 if self._match(TokenType.TRUE): 5315 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5316 if self._match(TokenType.FALSE): 5317 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5318 return self._parse_placeholder() 5319 5320 def _parse_star(self) -> t.Optional[exp.Expression]: 5321 if self._match(TokenType.STAR): 5322 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5323 return self._parse_placeholder() 5324 5325 def _parse_parameter(self) -> exp.Parameter: 5326 self._match(TokenType.L_BRACE) 5327 this = self._parse_identifier() or self._parse_primary_or_var() 5328 expression = self._match(TokenType.COLON) and ( 5329 self._parse_identifier() or self._parse_primary_or_var() 5330 ) 5331 self._match(TokenType.R_BRACE) 5332 return self.expression(exp.Parameter, this=this, expression=expression) 5333 5334 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5335 if self._match_set(self.PLACEHOLDER_PARSERS): 5336 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5337 if placeholder: 5338 return placeholder 5339 self._advance(-1) 5340 return None 5341 5342 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5343 if not self._match(TokenType.EXCEPT): 5344 return None 5345 if self._match(TokenType.L_PAREN, advance=False): 5346 return self._parse_wrapped_csv(self._parse_column) 5347 5348 except_column = self._parse_column() 5349 return [except_column] if except_column else None 5350 5351 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5352 if not self._match(TokenType.REPLACE): 5353 return None 5354 if self._match(TokenType.L_PAREN, advance=False): 5355 return self._parse_wrapped_csv(self._parse_expression) 5356 5357 replace_expression = self._parse_expression() 5358 return [replace_expression] if replace_expression else None 5359 5360 def _parse_csv( 5361 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5362 ) -> t.List[exp.Expression]: 5363 parse_result = parse_method() 5364 items = [parse_result] if parse_result is not None else [] 5365 5366 while self._match(sep): 5367 self._add_comments(parse_result) 5368 parse_result = parse_method() 5369 if parse_result is not None: 5370 items.append(parse_result) 5371 5372 return items 5373 5374 def _parse_tokens( 5375 self, parse_method: t.Callable, expressions: t.Dict 5376 ) -> t.Optional[exp.Expression]: 5377 this = parse_method() 5378 5379 while self._match_set(expressions): 5380 this = self.expression( 5381 expressions[self._prev.token_type], 5382 this=this, 5383 comments=self._prev_comments, 5384 expression=parse_method(), 5385 ) 5386 5387 return this 5388 5389 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5390 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5391 5392 def _parse_wrapped_csv( 5393 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5394 ) -> t.List[exp.Expression]: 5395 return self._parse_wrapped( 5396 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5397 ) 5398 5399 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5400 wrapped = self._match(TokenType.L_PAREN) 5401 if not wrapped and not optional: 5402 self.raise_error("Expecting (") 5403 parse_result = parse_method() 5404 if wrapped: 5405 self._match_r_paren() 5406 return parse_result 5407 5408 def _parse_expressions(self) -> t.List[exp.Expression]: 5409 return self._parse_csv(self._parse_expression) 5410 5411 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5412 return self._parse_select() or self._parse_set_operations( 5413 self._parse_expression() if alias else self._parse_conjunction() 5414 ) 5415 5416 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5417 return self._parse_query_modifiers( 5418 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5419 ) 5420 5421 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5422 this = None 5423 if self._match_texts(self.TRANSACTION_KIND): 5424 this = self._prev.text 5425 5426 self._match_texts(("TRANSACTION", "WORK")) 5427 5428 modes = [] 5429 while True: 5430 mode = [] 5431 while self._match(TokenType.VAR): 5432 mode.append(self._prev.text) 5433 5434 if mode: 5435 modes.append(" ".join(mode)) 5436 if not self._match(TokenType.COMMA): 5437 break 5438 5439 return self.expression(exp.Transaction, this=this, modes=modes) 5440 5441 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5442 chain = None 5443 savepoint = None 5444 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5445 5446 self._match_texts(("TRANSACTION", "WORK")) 5447 5448 if self._match_text_seq("TO"): 5449 self._match_text_seq("SAVEPOINT") 5450 savepoint = self._parse_id_var() 5451 5452 if self._match(TokenType.AND): 5453 chain = not self._match_text_seq("NO") 5454 self._match_text_seq("CHAIN") 5455 5456 if is_rollback: 5457 return self.expression(exp.Rollback, savepoint=savepoint) 5458 5459 return self.expression(exp.Commit, chain=chain) 5460 5461 def _parse_refresh(self) -> exp.Refresh: 5462 self._match(TokenType.TABLE) 5463 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5464 5465 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5466 if not self._match_text_seq("ADD"): 5467 return None 5468 5469 self._match(TokenType.COLUMN) 5470 exists_column = self._parse_exists(not_=True) 5471 expression = self._parse_field_def() 5472 5473 if expression: 5474 expression.set("exists", exists_column) 5475 5476 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5477 if self._match_texts(("FIRST", "AFTER")): 5478 position = self._prev.text 5479 column_position = self.expression( 5480 exp.ColumnPosition, this=self._parse_column(), position=position 5481 ) 5482 expression.set("position", column_position) 5483 5484 return expression 5485 5486 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5487 drop = self._match(TokenType.DROP) and self._parse_drop() 5488 if drop and not isinstance(drop, exp.Command): 5489 drop.set("kind", drop.args.get("kind", "COLUMN")) 5490 return drop 5491 5492 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5493 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5494 return self.expression( 5495 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5496 ) 5497 5498 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5499 index = self._index - 1 5500 5501 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5502 return self._parse_csv( 5503 lambda: self.expression( 5504 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5505 ) 5506 ) 5507 5508 self._retreat(index) 5509 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5510 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5511 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5512 5513 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5514 self._match(TokenType.COLUMN) 5515 column = self._parse_field(any_token=True) 5516 5517 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5518 return self.expression(exp.AlterColumn, this=column, drop=True) 5519 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5520 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5521 if self._match(TokenType.COMMENT): 5522 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5523 5524 self._match_text_seq("SET", "DATA") 5525 return self.expression( 5526 exp.AlterColumn, 5527 this=column, 5528 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5529 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5530 using=self._match(TokenType.USING) and self._parse_conjunction(), 5531 ) 5532 5533 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5534 index = self._index - 1 5535 5536 partition_exists = self._parse_exists() 5537 if self._match(TokenType.PARTITION, advance=False): 5538 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5539 5540 self._retreat(index) 5541 return self._parse_csv(self._parse_drop_column) 5542 5543 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5544 if self._match(TokenType.COLUMN): 5545 exists = self._parse_exists() 5546 old_column = self._parse_column() 5547 to = self._match_text_seq("TO") 5548 new_column = self._parse_column() 5549 5550 if old_column is None or to is None or new_column is None: 5551 return None 5552 5553 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5554 5555 self._match_text_seq("TO") 5556 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5557 5558 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5559 start = self._prev 5560 5561 if not self._match(TokenType.TABLE): 5562 return self._parse_as_command(start) 5563 5564 exists = self._parse_exists() 5565 only = self._match_text_seq("ONLY") 5566 this = self._parse_table(schema=True) 5567 5568 if self._next: 5569 self._advance() 5570 5571 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5572 if parser: 5573 actions = ensure_list(parser(self)) 5574 options = self._parse_csv(self._parse_property) 5575 5576 if not self._curr and actions: 5577 return self.expression( 5578 exp.AlterTable, 5579 this=this, 5580 exists=exists, 5581 actions=actions, 5582 only=only, 5583 options=options, 5584 ) 5585 5586 return self._parse_as_command(start) 5587 5588 def _parse_merge(self) -> exp.Merge: 5589 self._match(TokenType.INTO) 5590 target = self._parse_table() 5591 5592 if target and self._match(TokenType.ALIAS, advance=False): 5593 target.set("alias", self._parse_table_alias()) 5594 5595 self._match(TokenType.USING) 5596 using = self._parse_table() 5597 5598 self._match(TokenType.ON) 5599 on = self._parse_conjunction() 5600 5601 return self.expression( 5602 exp.Merge, 5603 this=target, 5604 using=using, 5605 on=on, 5606 expressions=self._parse_when_matched(), 5607 ) 5608 5609 def _parse_when_matched(self) -> t.List[exp.When]: 5610 whens = [] 5611 5612 while self._match(TokenType.WHEN): 5613 matched = not self._match(TokenType.NOT) 5614 self._match_text_seq("MATCHED") 5615 source = ( 5616 False 5617 if self._match_text_seq("BY", "TARGET") 5618 else self._match_text_seq("BY", "SOURCE") 5619 ) 5620 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5621 5622 self._match(TokenType.THEN) 5623 5624 if self._match(TokenType.INSERT): 5625 _this = self._parse_star() 5626 if _this: 5627 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5628 else: 5629 then = self.expression( 5630 exp.Insert, 5631 this=self._parse_value(), 5632 expression=self._match_text_seq("VALUES") and self._parse_value(), 5633 ) 5634 elif self._match(TokenType.UPDATE): 5635 expressions = self._parse_star() 5636 if expressions: 5637 then = self.expression(exp.Update, expressions=expressions) 5638 else: 5639 then = self.expression( 5640 exp.Update, 5641 expressions=self._match(TokenType.SET) 5642 and self._parse_csv(self._parse_equality), 5643 ) 5644 elif self._match(TokenType.DELETE): 5645 then = self.expression(exp.Var, this=self._prev.text) 5646 else: 5647 then = None 5648 5649 whens.append( 5650 self.expression( 5651 exp.When, 5652 matched=matched, 5653 source=source, 5654 condition=condition, 5655 then=then, 5656 ) 5657 ) 5658 return whens 5659 5660 def _parse_show(self) -> t.Optional[exp.Expression]: 5661 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5662 if parser: 5663 return parser(self) 5664 return self._parse_as_command(self._prev) 5665 5666 def _parse_set_item_assignment( 5667 self, kind: t.Optional[str] = None 5668 ) -> t.Optional[exp.Expression]: 5669 index = self._index 5670 5671 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5672 return self._parse_set_transaction(global_=kind == "GLOBAL") 5673 5674 left = self._parse_primary() or self._parse_id_var() 5675 assignment_delimiter = self._match_texts(("=", "TO")) 5676 5677 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5678 self._retreat(index) 5679 return None 5680 5681 right = self._parse_statement() or self._parse_id_var() 5682 this = self.expression(exp.EQ, this=left, expression=right) 5683 5684 return self.expression(exp.SetItem, this=this, kind=kind) 5685 5686 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5687 self._match_text_seq("TRANSACTION") 5688 characteristics = self._parse_csv( 5689 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5690 ) 5691 return self.expression( 5692 exp.SetItem, 5693 expressions=characteristics, 5694 kind="TRANSACTION", 5695 **{"global": global_}, # type: ignore 5696 ) 5697 5698 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5699 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5700 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5701 5702 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5703 index = self._index 5704 set_ = self.expression( 5705 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5706 ) 5707 5708 if self._curr: 5709 self._retreat(index) 5710 return self._parse_as_command(self._prev) 5711 5712 return set_ 5713 5714 def _parse_var_from_options( 5715 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5716 ) -> t.Optional[exp.Var]: 5717 start = self._curr 5718 if not start: 5719 return None 5720 5721 option = start.text.upper() 5722 continuations = options.get(option) 5723 5724 index = self._index 5725 self._advance() 5726 for keywords in continuations or []: 5727 if isinstance(keywords, str): 5728 keywords = (keywords,) 5729 5730 if self._match_text_seq(*keywords): 5731 option = f"{option} {' '.join(keywords)}" 5732 break 5733 else: 5734 if continuations or continuations is None: 5735 if raise_unmatched: 5736 self.raise_error(f"Unknown option {option}") 5737 5738 self._retreat(index) 5739 return None 5740 5741 return exp.var(option) 5742 5743 def _parse_as_command(self, start: Token) -> exp.Command: 5744 while self._curr: 5745 self._advance() 5746 text = self._find_sql(start, self._prev) 5747 size = len(start.text) 5748 self._warn_unsupported() 5749 return exp.Command(this=text[:size], expression=text[size:]) 5750 5751 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5752 settings = [] 5753 5754 self._match_l_paren() 5755 kind = self._parse_id_var() 5756 5757 if self._match(TokenType.L_PAREN): 5758 while True: 5759 key = self._parse_id_var() 5760 value = self._parse_primary() 5761 5762 if not key and value is None: 5763 break 5764 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5765 self._match(TokenType.R_PAREN) 5766 5767 self._match_r_paren() 5768 5769 return self.expression( 5770 exp.DictProperty, 5771 this=this, 5772 kind=kind.this if kind else None, 5773 settings=settings, 5774 ) 5775 5776 def _parse_dict_range(self, this: str) -> exp.DictRange: 5777 self._match_l_paren() 5778 has_min = self._match_text_seq("MIN") 5779 if has_min: 5780 min = self._parse_var() or self._parse_primary() 5781 self._match_text_seq("MAX") 5782 max = self._parse_var() or self._parse_primary() 5783 else: 5784 max = self._parse_var() or self._parse_primary() 5785 min = exp.Literal.number(0) 5786 self._match_r_paren() 5787 return self.expression(exp.DictRange, this=this, min=min, max=max) 5788 5789 def _parse_comprehension( 5790 self, this: t.Optional[exp.Expression] 5791 ) -> t.Optional[exp.Comprehension]: 5792 index = self._index 5793 expression = self._parse_column() 5794 if not self._match(TokenType.IN): 5795 self._retreat(index - 1) 5796 return None 5797 iterator = self._parse_column() 5798 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5799 return self.expression( 5800 exp.Comprehension, 5801 this=this, 5802 expression=expression, 5803 iterator=iterator, 5804 condition=condition, 5805 ) 5806 5807 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5808 if self._match(TokenType.HEREDOC_STRING): 5809 return self.expression(exp.Heredoc, this=self._prev.text) 5810 5811 if not self._match_text_seq("$"): 5812 return None 5813 5814 tags = ["$"] 5815 tag_text = None 5816 5817 if self._is_connected(): 5818 self._advance() 5819 tags.append(self._prev.text.upper()) 5820 else: 5821 self.raise_error("No closing $ found") 5822 5823 if tags[-1] != "$": 5824 if self._is_connected() and self._match_text_seq("$"): 5825 tag_text = tags[-1] 5826 tags.append("$") 5827 else: 5828 self.raise_error("No closing $ found") 5829 5830 heredoc_start = self._curr 5831 5832 while self._curr: 5833 if self._match_text_seq(*tags, advance=False): 5834 this = self._find_sql(heredoc_start, self._prev) 5835 self._advance(len(tags)) 5836 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5837 5838 self._advance() 5839 5840 self.raise_error(f"No closing {''.join(tags)} found") 5841 return None 5842 5843 def _find_parser( 5844 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5845 ) -> t.Optional[t.Callable]: 5846 if not self._curr: 5847 return None 5848 5849 index = self._index 5850 this = [] 5851 while True: 5852 # The current token might be multiple words 5853 curr = self._curr.text.upper() 5854 key = curr.split(" ") 5855 this.append(curr) 5856 5857 self._advance() 5858 result, trie = in_trie(trie, key) 5859 if result == TrieResult.FAILED: 5860 break 5861 5862 if result == TrieResult.EXISTS: 5863 subparser = parsers[" ".join(this)] 5864 return subparser 5865 5866 self._retreat(index) 5867 return None 5868 5869 def _match(self, token_type, advance=True, expression=None): 5870 if not self._curr: 5871 return None 5872 5873 if self._curr.token_type == token_type: 5874 if advance: 5875 self._advance() 5876 self._add_comments(expression) 5877 return True 5878 5879 return None 5880 5881 def _match_set(self, types, advance=True): 5882 if not self._curr: 5883 return None 5884 5885 if self._curr.token_type in types: 5886 if advance: 5887 self._advance() 5888 return True 5889 5890 return None 5891 5892 def _match_pair(self, token_type_a, token_type_b, advance=True): 5893 if not self._curr or not self._next: 5894 return None 5895 5896 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5897 if advance: 5898 self._advance(2) 5899 return True 5900 5901 return None 5902 5903 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5904 if not self._match(TokenType.L_PAREN, expression=expression): 5905 self.raise_error("Expecting (") 5906 5907 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5908 if not self._match(TokenType.R_PAREN, expression=expression): 5909 self.raise_error("Expecting )") 5910 5911 def _match_texts(self, texts, advance=True): 5912 if self._curr and self._curr.text.upper() in texts: 5913 if advance: 5914 self._advance() 5915 return True 5916 return None 5917 5918 def _match_text_seq(self, *texts, advance=True): 5919 index = self._index 5920 for text in texts: 5921 if self._curr and self._curr.text.upper() == text: 5922 self._advance() 5923 else: 5924 self._retreat(index) 5925 return None 5926 5927 if not advance: 5928 self._retreat(index) 5929 5930 return True 5931 5932 @t.overload 5933 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ... 5934 5935 @t.overload 5936 def _replace_columns_with_dots( 5937 self, this: t.Optional[exp.Expression] 5938 ) -> t.Optional[exp.Expression]: ... 5939 5940 def _replace_columns_with_dots(self, this): 5941 if isinstance(this, exp.Dot): 5942 exp.replace_children(this, self._replace_columns_with_dots) 5943 elif isinstance(this, exp.Column): 5944 exp.replace_children(this, self._replace_columns_with_dots) 5945 table = this.args.get("table") 5946 this = ( 5947 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5948 ) 5949 5950 return this 5951 5952 def _replace_lambda( 5953 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5954 ) -> t.Optional[exp.Expression]: 5955 if not node: 5956 return node 5957 5958 for column in node.find_all(exp.Column): 5959 if column.parts[0].name in lambda_variables: 5960 dot_or_id = column.to_dot() if column.table else column.this 5961 parent = column.parent 5962 5963 while isinstance(parent, exp.Dot): 5964 if not isinstance(parent.parent, exp.Dot): 5965 parent.replace(dot_or_id) 5966 break 5967 parent = parent.parent 5968 else: 5969 if column is node: 5970 node = dot_or_id 5971 else: 5972 column.replace(dot_or_id) 5973 return node 5974 5975 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 5976 start = self._prev 5977 5978 # Not to be confused with TRUNCATE(number, decimals) function call 5979 if self._match(TokenType.L_PAREN): 5980 self._retreat(self._index - 2) 5981 return self._parse_function() 5982 5983 # Clickhouse supports TRUNCATE DATABASE as well 5984 is_database = self._match(TokenType.DATABASE) 5985 5986 self._match(TokenType.TABLE) 5987 5988 exists = self._parse_exists(not_=False) 5989 5990 expressions = self._parse_csv( 5991 lambda: self._parse_table(schema=True, is_db_reference=is_database) 5992 ) 5993 5994 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5995 5996 if self._match_text_seq("RESTART", "IDENTITY"): 5997 identity = "RESTART" 5998 elif self._match_text_seq("CONTINUE", "IDENTITY"): 5999 identity = "CONTINUE" 6000 else: 6001 identity = None 6002 6003 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6004 option = self._prev.text 6005 else: 6006 option = None 6007 6008 partition = self._parse_partition() 6009 6010 # Fallback case 6011 if self._curr: 6012 return self._parse_as_command(start) 6013 6014 return self.expression( 6015 exp.TruncateTable, 6016 expressions=expressions, 6017 is_database=is_database, 6018 exists=exists, 6019 cluster=cluster, 6020 identity=identity, 6021 option=option, 6022 partition=partition, 6023 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1061 def __init__( 1062 self, 1063 error_level: t.Optional[ErrorLevel] = None, 1064 error_message_context: int = 100, 1065 max_errors: int = 3, 1066 dialect: DialectType = None, 1067 ): 1068 from sqlglot.dialects import Dialect 1069 1070 self.error_level = error_level or ErrorLevel.IMMEDIATE 1071 self.error_message_context = error_message_context 1072 self.max_errors = max_errors 1073 self.dialect = Dialect.get_or_raise(dialect) 1074 self.reset()
1086 def parse( 1087 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1088 ) -> t.List[t.Optional[exp.Expression]]: 1089 """ 1090 Parses a list of tokens and returns a list of syntax trees, one tree 1091 per parsed SQL statement. 1092 1093 Args: 1094 raw_tokens: The list of tokens. 1095 sql: The original SQL string, used to produce helpful debug messages. 1096 1097 Returns: 1098 The list of the produced syntax trees. 1099 """ 1100 return self._parse( 1101 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1102 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1104 def parse_into( 1105 self, 1106 expression_types: exp.IntoType, 1107 raw_tokens: t.List[Token], 1108 sql: t.Optional[str] = None, 1109 ) -> t.List[t.Optional[exp.Expression]]: 1110 """ 1111 Parses a list of tokens into a given Expression type. If a collection of Expression 1112 types is given instead, this method will try to parse the token list into each one 1113 of them, stopping at the first for which the parsing succeeds. 1114 1115 Args: 1116 expression_types: The expression type(s) to try and parse the token list into. 1117 raw_tokens: The list of tokens. 1118 sql: The original SQL string, used to produce helpful debug messages. 1119 1120 Returns: 1121 The target Expression. 1122 """ 1123 errors = [] 1124 for expression_type in ensure_list(expression_types): 1125 parser = self.EXPRESSION_PARSERS.get(expression_type) 1126 if not parser: 1127 raise TypeError(f"No parser registered for {expression_type}") 1128 1129 try: 1130 return self._parse(parser, raw_tokens, sql) 1131 except ParseError as e: 1132 e.errors[0]["into_expression"] = expression_type 1133 errors.append(e) 1134 1135 raise ParseError( 1136 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1137 errors=merge_errors(errors), 1138 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1175 def check_errors(self) -> None: 1176 """Logs or raises any found errors, depending on the chosen error level setting.""" 1177 if self.error_level == ErrorLevel.WARN: 1178 for error in self.errors: 1179 logger.error(str(error)) 1180 elif self.error_level == ErrorLevel.RAISE and self.errors: 1181 raise ParseError( 1182 concat_messages(self.errors, self.max_errors), 1183 errors=merge_errors(self.errors), 1184 )
Logs or raises any found errors, depending on the chosen error level setting.
1186 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1187 """ 1188 Appends an error in the list of recorded errors or raises it, depending on the chosen 1189 error level setting. 1190 """ 1191 token = token or self._curr or self._prev or Token.string("") 1192 start = token.start 1193 end = token.end + 1 1194 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1195 highlight = self.sql[start:end] 1196 end_context = self.sql[end : end + self.error_message_context] 1197 1198 error = ParseError.new( 1199 f"{message}. Line {token.line}, Col: {token.col}.\n" 1200 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1201 description=message, 1202 line=token.line, 1203 col=token.col, 1204 start_context=start_context, 1205 highlight=highlight, 1206 end_context=end_context, 1207 ) 1208 1209 if self.error_level == ErrorLevel.IMMEDIATE: 1210 raise error 1211 1212 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1214 def expression( 1215 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1216 ) -> E: 1217 """ 1218 Creates a new, validated Expression. 1219 1220 Args: 1221 exp_class: The expression class to instantiate. 1222 comments: An optional list of comments to attach to the expression. 1223 kwargs: The arguments to set for the expression along with their respective values. 1224 1225 Returns: 1226 The target expression. 1227 """ 1228 instance = exp_class(**kwargs) 1229 instance.add_comments(comments) if comments else self._add_comments(instance) 1230 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1237 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1238 """ 1239 Validates an Expression, making sure that all its mandatory arguments are set. 1240 1241 Args: 1242 expression: The expression to validate. 1243 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1244 1245 Returns: 1246 The validated expression. 1247 """ 1248 if self.error_level != ErrorLevel.IGNORE: 1249 for error_message in expression.error_messages(args): 1250 self.raise_error(error_message) 1251 1252 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.