sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 21 22 23def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 24 if len(args) == 1 and args[0].is_star: 25 return exp.StarMap(this=args[0]) 26 27 keys = [] 28 values = [] 29 for i in range(0, len(args), 2): 30 keys.append(args[i]) 31 values.append(args[i + 1]) 32 33 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 34 35 36def build_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 50 # Default argument order is base, expression 51 this = seq_get(args, 0) 52 expression = seq_get(args, 1) 53 54 if expression: 55 if not dialect.LOG_BASE_FIRST: 56 this, expression = expression, this 57 return exp.Log(this=this, expression=expression) 58 59 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 60 61 62def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 63 def _builder(args: t.List, dialect: Dialect) -> E: 64 expression = expr_type( 65 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 66 ) 67 if len(args) > 2 and expr_type is exp.JSONExtract: 68 expression.set("expressions", args[2:]) 69 70 return expression 71 72 return _builder 73 74 75class _Parser(type): 76 def __new__(cls, clsname, bases, attrs): 77 klass = super().__new__(cls, clsname, bases, attrs) 78 79 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 80 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 81 82 return klass 83 84 85class Parser(metaclass=_Parser): 86 """ 87 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 88 89 Args: 90 error_level: The desired error level. 91 Default: ErrorLevel.IMMEDIATE 92 error_message_context: The amount of context to capture from a query string when displaying 93 the error message (in number of characters). 94 Default: 100 95 max_errors: Maximum number of error messages to include in a raised ParseError. 96 This is only relevant if error_level is ErrorLevel.RAISE. 97 Default: 3 98 """ 99 100 FUNCTIONS: t.Dict[str, t.Callable] = { 101 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 102 "CONCAT": lambda args, dialect: exp.Concat( 103 expressions=args, 104 safe=not dialect.STRICT_STRING_CONCAT, 105 coalesce=dialect.CONCAT_COALESCE, 106 ), 107 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 108 expressions=args, 109 safe=not dialect.STRICT_STRING_CONCAT, 110 coalesce=dialect.CONCAT_COALESCE, 111 ), 112 "DATE_TO_DATE_STR": lambda args: exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 117 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 118 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 119 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 120 "LIKE": build_like, 121 "LOG": build_logarithm, 122 "TIME_TO_TIME_STR": lambda args: exp.Cast( 123 this=seq_get(args, 0), 124 to=exp.DataType(this=exp.DataType.Type.TEXT), 125 ), 126 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 127 this=exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 start=exp.Literal.number(1), 132 length=exp.Literal.number(10), 133 ), 134 "VAR_MAP": build_var_map, 135 } 136 137 NO_PAREN_FUNCTIONS = { 138 TokenType.CURRENT_DATE: exp.CurrentDate, 139 TokenType.CURRENT_DATETIME: exp.CurrentDate, 140 TokenType.CURRENT_TIME: exp.CurrentTime, 141 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 142 TokenType.CURRENT_USER: exp.CurrentUser, 143 } 144 145 STRUCT_TYPE_TOKENS = { 146 TokenType.NESTED, 147 TokenType.STRUCT, 148 } 149 150 NESTED_TYPE_TOKENS = { 151 TokenType.ARRAY, 152 TokenType.LOWCARDINALITY, 153 TokenType.MAP, 154 TokenType.NULLABLE, 155 *STRUCT_TYPE_TOKENS, 156 } 157 158 ENUM_TYPE_TOKENS = { 159 TokenType.ENUM, 160 TokenType.ENUM8, 161 TokenType.ENUM16, 162 } 163 164 AGGREGATE_TYPE_TOKENS = { 165 TokenType.AGGREGATEFUNCTION, 166 TokenType.SIMPLEAGGREGATEFUNCTION, 167 } 168 169 TYPE_TOKENS = { 170 TokenType.BIT, 171 TokenType.BOOLEAN, 172 TokenType.TINYINT, 173 TokenType.UTINYINT, 174 TokenType.SMALLINT, 175 TokenType.USMALLINT, 176 TokenType.INT, 177 TokenType.UINT, 178 TokenType.BIGINT, 179 TokenType.UBIGINT, 180 TokenType.INT128, 181 TokenType.UINT128, 182 TokenType.INT256, 183 TokenType.UINT256, 184 TokenType.MEDIUMINT, 185 TokenType.UMEDIUMINT, 186 TokenType.FIXEDSTRING, 187 TokenType.FLOAT, 188 TokenType.DOUBLE, 189 TokenType.CHAR, 190 TokenType.NCHAR, 191 TokenType.VARCHAR, 192 TokenType.NVARCHAR, 193 TokenType.BPCHAR, 194 TokenType.TEXT, 195 TokenType.MEDIUMTEXT, 196 TokenType.LONGTEXT, 197 TokenType.MEDIUMBLOB, 198 TokenType.LONGBLOB, 199 TokenType.BINARY, 200 TokenType.VARBINARY, 201 TokenType.JSON, 202 TokenType.JSONB, 203 TokenType.INTERVAL, 204 TokenType.TINYBLOB, 205 TokenType.TINYTEXT, 206 TokenType.TIME, 207 TokenType.TIMETZ, 208 TokenType.TIMESTAMP, 209 TokenType.TIMESTAMP_S, 210 TokenType.TIMESTAMP_MS, 211 TokenType.TIMESTAMP_NS, 212 TokenType.TIMESTAMPTZ, 213 TokenType.TIMESTAMPLTZ, 214 TokenType.DATETIME, 215 TokenType.DATETIME64, 216 TokenType.DATE, 217 TokenType.DATE32, 218 TokenType.INT4RANGE, 219 TokenType.INT4MULTIRANGE, 220 TokenType.INT8RANGE, 221 TokenType.INT8MULTIRANGE, 222 TokenType.NUMRANGE, 223 TokenType.NUMMULTIRANGE, 224 TokenType.TSRANGE, 225 TokenType.TSMULTIRANGE, 226 TokenType.TSTZRANGE, 227 TokenType.TSTZMULTIRANGE, 228 TokenType.DATERANGE, 229 TokenType.DATEMULTIRANGE, 230 TokenType.DECIMAL, 231 TokenType.UDECIMAL, 232 TokenType.BIGDECIMAL, 233 TokenType.UUID, 234 TokenType.GEOGRAPHY, 235 TokenType.GEOMETRY, 236 TokenType.HLLSKETCH, 237 TokenType.HSTORE, 238 TokenType.PSEUDO_TYPE, 239 TokenType.SUPER, 240 TokenType.SERIAL, 241 TokenType.SMALLSERIAL, 242 TokenType.BIGSERIAL, 243 TokenType.XML, 244 TokenType.YEAR, 245 TokenType.UNIQUEIDENTIFIER, 246 TokenType.USERDEFINED, 247 TokenType.MONEY, 248 TokenType.SMALLMONEY, 249 TokenType.ROWVERSION, 250 TokenType.IMAGE, 251 TokenType.VARIANT, 252 TokenType.OBJECT, 253 TokenType.OBJECT_IDENTIFIER, 254 TokenType.INET, 255 TokenType.IPADDRESS, 256 TokenType.IPPREFIX, 257 TokenType.IPV4, 258 TokenType.IPV6, 259 TokenType.UNKNOWN, 260 TokenType.NULL, 261 *ENUM_TYPE_TOKENS, 262 *NESTED_TYPE_TOKENS, 263 *AGGREGATE_TYPE_TOKENS, 264 } 265 266 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 267 TokenType.BIGINT: TokenType.UBIGINT, 268 TokenType.INT: TokenType.UINT, 269 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 270 TokenType.SMALLINT: TokenType.USMALLINT, 271 TokenType.TINYINT: TokenType.UTINYINT, 272 TokenType.DECIMAL: TokenType.UDECIMAL, 273 } 274 275 SUBQUERY_PREDICATES = { 276 TokenType.ANY: exp.Any, 277 TokenType.ALL: exp.All, 278 TokenType.EXISTS: exp.Exists, 279 TokenType.SOME: exp.Any, 280 } 281 282 RESERVED_TOKENS = { 283 *Tokenizer.SINGLE_TOKENS.values(), 284 TokenType.SELECT, 285 } 286 287 DB_CREATABLES = { 288 TokenType.DATABASE, 289 TokenType.SCHEMA, 290 TokenType.TABLE, 291 TokenType.VIEW, 292 TokenType.MODEL, 293 TokenType.DICTIONARY, 294 TokenType.STORAGE_INTEGRATION, 295 } 296 297 CREATABLES = { 298 TokenType.COLUMN, 299 TokenType.CONSTRAINT, 300 TokenType.FUNCTION, 301 TokenType.INDEX, 302 TokenType.PROCEDURE, 303 TokenType.FOREIGN_KEY, 304 *DB_CREATABLES, 305 } 306 307 # Tokens that can represent identifiers 308 ID_VAR_TOKENS = { 309 TokenType.VAR, 310 TokenType.ANTI, 311 TokenType.APPLY, 312 TokenType.ASC, 313 TokenType.AUTO_INCREMENT, 314 TokenType.BEGIN, 315 TokenType.BPCHAR, 316 TokenType.CACHE, 317 TokenType.CASE, 318 TokenType.COLLATE, 319 TokenType.COMMAND, 320 TokenType.COMMENT, 321 TokenType.COMMIT, 322 TokenType.CONSTRAINT, 323 TokenType.DEFAULT, 324 TokenType.DELETE, 325 TokenType.DESC, 326 TokenType.DESCRIBE, 327 TokenType.DICTIONARY, 328 TokenType.DIV, 329 TokenType.END, 330 TokenType.EXECUTE, 331 TokenType.ESCAPE, 332 TokenType.FALSE, 333 TokenType.FIRST, 334 TokenType.FILTER, 335 TokenType.FINAL, 336 TokenType.FORMAT, 337 TokenType.FULL, 338 TokenType.IS, 339 TokenType.ISNULL, 340 TokenType.INTERVAL, 341 TokenType.KEEP, 342 TokenType.KILL, 343 TokenType.LEFT, 344 TokenType.LOAD, 345 TokenType.MERGE, 346 TokenType.NATURAL, 347 TokenType.NEXT, 348 TokenType.OFFSET, 349 TokenType.OPERATOR, 350 TokenType.ORDINALITY, 351 TokenType.OVERLAPS, 352 TokenType.OVERWRITE, 353 TokenType.PARTITION, 354 TokenType.PERCENT, 355 TokenType.PIVOT, 356 TokenType.PRAGMA, 357 TokenType.RANGE, 358 TokenType.RECURSIVE, 359 TokenType.REFERENCES, 360 TokenType.REFRESH, 361 TokenType.REPLACE, 362 TokenType.RIGHT, 363 TokenType.ROW, 364 TokenType.ROWS, 365 TokenType.SEMI, 366 TokenType.SET, 367 TokenType.SETTINGS, 368 TokenType.SHOW, 369 TokenType.TEMPORARY, 370 TokenType.TOP, 371 TokenType.TRUE, 372 TokenType.TRUNCATE, 373 TokenType.UNIQUE, 374 TokenType.UNPIVOT, 375 TokenType.UPDATE, 376 TokenType.USE, 377 TokenType.VOLATILE, 378 TokenType.WINDOW, 379 *CREATABLES, 380 *SUBQUERY_PREDICATES, 381 *TYPE_TOKENS, 382 *NO_PAREN_FUNCTIONS, 383 } 384 385 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 386 387 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 388 TokenType.ANTI, 389 TokenType.APPLY, 390 TokenType.ASOF, 391 TokenType.FULL, 392 TokenType.LEFT, 393 TokenType.LOCK, 394 TokenType.NATURAL, 395 TokenType.OFFSET, 396 TokenType.RIGHT, 397 TokenType.SEMI, 398 TokenType.WINDOW, 399 } 400 401 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 402 403 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 404 405 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 406 407 FUNC_TOKENS = { 408 TokenType.COLLATE, 409 TokenType.COMMAND, 410 TokenType.CURRENT_DATE, 411 TokenType.CURRENT_DATETIME, 412 TokenType.CURRENT_TIMESTAMP, 413 TokenType.CURRENT_TIME, 414 TokenType.CURRENT_USER, 415 TokenType.FILTER, 416 TokenType.FIRST, 417 TokenType.FORMAT, 418 TokenType.GLOB, 419 TokenType.IDENTIFIER, 420 TokenType.INDEX, 421 TokenType.ISNULL, 422 TokenType.ILIKE, 423 TokenType.INSERT, 424 TokenType.LIKE, 425 TokenType.MERGE, 426 TokenType.OFFSET, 427 TokenType.PRIMARY_KEY, 428 TokenType.RANGE, 429 TokenType.REPLACE, 430 TokenType.RLIKE, 431 TokenType.ROW, 432 TokenType.UNNEST, 433 TokenType.VAR, 434 TokenType.LEFT, 435 TokenType.RIGHT, 436 TokenType.DATE, 437 TokenType.DATETIME, 438 TokenType.TABLE, 439 TokenType.TIMESTAMP, 440 TokenType.TIMESTAMPTZ, 441 TokenType.TRUNCATE, 442 TokenType.WINDOW, 443 TokenType.XOR, 444 *TYPE_TOKENS, 445 *SUBQUERY_PREDICATES, 446 } 447 448 CONJUNCTION = { 449 TokenType.AND: exp.And, 450 TokenType.OR: exp.Or, 451 } 452 453 EQUALITY = { 454 TokenType.COLON_EQ: exp.PropertyEQ, 455 TokenType.EQ: exp.EQ, 456 TokenType.NEQ: exp.NEQ, 457 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 458 } 459 460 COMPARISON = { 461 TokenType.GT: exp.GT, 462 TokenType.GTE: exp.GTE, 463 TokenType.LT: exp.LT, 464 TokenType.LTE: exp.LTE, 465 } 466 467 BITWISE = { 468 TokenType.AMP: exp.BitwiseAnd, 469 TokenType.CARET: exp.BitwiseXor, 470 TokenType.PIPE: exp.BitwiseOr, 471 } 472 473 TERM = { 474 TokenType.DASH: exp.Sub, 475 TokenType.PLUS: exp.Add, 476 TokenType.MOD: exp.Mod, 477 TokenType.COLLATE: exp.Collate, 478 } 479 480 FACTOR = { 481 TokenType.DIV: exp.IntDiv, 482 TokenType.LR_ARROW: exp.Distance, 483 TokenType.SLASH: exp.Div, 484 TokenType.STAR: exp.Mul, 485 } 486 487 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 488 489 TIMES = { 490 TokenType.TIME, 491 TokenType.TIMETZ, 492 } 493 494 TIMESTAMPS = { 495 TokenType.TIMESTAMP, 496 TokenType.TIMESTAMPTZ, 497 TokenType.TIMESTAMPLTZ, 498 *TIMES, 499 } 500 501 SET_OPERATIONS = { 502 TokenType.UNION, 503 TokenType.INTERSECT, 504 TokenType.EXCEPT, 505 } 506 507 JOIN_METHODS = { 508 TokenType.NATURAL, 509 TokenType.ASOF, 510 } 511 512 JOIN_SIDES = { 513 TokenType.LEFT, 514 TokenType.RIGHT, 515 TokenType.FULL, 516 } 517 518 JOIN_KINDS = { 519 TokenType.INNER, 520 TokenType.OUTER, 521 TokenType.CROSS, 522 TokenType.SEMI, 523 TokenType.ANTI, 524 } 525 526 JOIN_HINTS: t.Set[str] = set() 527 528 LAMBDAS = { 529 TokenType.ARROW: lambda self, expressions: self.expression( 530 exp.Lambda, 531 this=self._replace_lambda( 532 self._parse_conjunction(), 533 {node.name for node in expressions}, 534 ), 535 expressions=expressions, 536 ), 537 TokenType.FARROW: lambda self, expressions: self.expression( 538 exp.Kwarg, 539 this=exp.var(expressions[0].name), 540 expression=self._parse_conjunction(), 541 ), 542 } 543 544 COLUMN_OPERATORS = { 545 TokenType.DOT: None, 546 TokenType.DCOLON: lambda self, this, to: self.expression( 547 exp.Cast if self.STRICT_CAST else exp.TryCast, 548 this=this, 549 to=to, 550 ), 551 TokenType.ARROW: lambda self, this, path: self.expression( 552 exp.JSONExtract, 553 this=this, 554 expression=self.dialect.to_json_path(path), 555 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 556 ), 557 TokenType.DARROW: lambda self, this, path: self.expression( 558 exp.JSONExtractScalar, 559 this=this, 560 expression=self.dialect.to_json_path(path), 561 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 562 ), 563 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 564 exp.JSONBExtract, 565 this=this, 566 expression=path, 567 ), 568 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 569 exp.JSONBExtractScalar, 570 this=this, 571 expression=path, 572 ), 573 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 574 exp.JSONBContains, 575 this=this, 576 expression=key, 577 ), 578 } 579 580 EXPRESSION_PARSERS = { 581 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 582 exp.Column: lambda self: self._parse_column(), 583 exp.Condition: lambda self: self._parse_conjunction(), 584 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 585 exp.Expression: lambda self: self._parse_expression(), 586 exp.From: lambda self: self._parse_from(), 587 exp.Group: lambda self: self._parse_group(), 588 exp.Having: lambda self: self._parse_having(), 589 exp.Identifier: lambda self: self._parse_id_var(), 590 exp.Join: lambda self: self._parse_join(), 591 exp.Lambda: lambda self: self._parse_lambda(), 592 exp.Lateral: lambda self: self._parse_lateral(), 593 exp.Limit: lambda self: self._parse_limit(), 594 exp.Offset: lambda self: self._parse_offset(), 595 exp.Order: lambda self: self._parse_order(), 596 exp.Ordered: lambda self: self._parse_ordered(), 597 exp.Properties: lambda self: self._parse_properties(), 598 exp.Qualify: lambda self: self._parse_qualify(), 599 exp.Returning: lambda self: self._parse_returning(), 600 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 601 exp.Table: lambda self: self._parse_table_parts(), 602 exp.TableAlias: lambda self: self._parse_table_alias(), 603 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 604 exp.Where: lambda self: self._parse_where(), 605 exp.Window: lambda self: self._parse_named_window(), 606 exp.With: lambda self: self._parse_with(), 607 "JOIN_TYPE": lambda self: self._parse_join_parts(), 608 } 609 610 STATEMENT_PARSERS = { 611 TokenType.ALTER: lambda self: self._parse_alter(), 612 TokenType.BEGIN: lambda self: self._parse_transaction(), 613 TokenType.CACHE: lambda self: self._parse_cache(), 614 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 615 TokenType.COMMENT: lambda self: self._parse_comment(), 616 TokenType.CREATE: lambda self: self._parse_create(), 617 TokenType.DELETE: lambda self: self._parse_delete(), 618 TokenType.DESC: lambda self: self._parse_describe(), 619 TokenType.DESCRIBE: lambda self: self._parse_describe(), 620 TokenType.DROP: lambda self: self._parse_drop(), 621 TokenType.INSERT: lambda self: self._parse_insert(), 622 TokenType.KILL: lambda self: self._parse_kill(), 623 TokenType.LOAD: lambda self: self._parse_load(), 624 TokenType.MERGE: lambda self: self._parse_merge(), 625 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 626 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 627 TokenType.REFRESH: lambda self: self._parse_refresh(), 628 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 629 TokenType.SET: lambda self: self._parse_set(), 630 TokenType.UNCACHE: lambda self: self._parse_uncache(), 631 TokenType.UPDATE: lambda self: self._parse_update(), 632 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 633 TokenType.USE: lambda self: self.expression( 634 exp.Use, 635 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 636 this=self._parse_table(schema=False), 637 ), 638 } 639 640 UNARY_PARSERS = { 641 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 642 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 643 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 644 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 645 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 646 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 647 } 648 649 PRIMARY_PARSERS = { 650 TokenType.STRING: lambda self, token: self.expression( 651 exp.Literal, this=token.text, is_string=True 652 ), 653 TokenType.NUMBER: lambda self, token: self.expression( 654 exp.Literal, this=token.text, is_string=False 655 ), 656 TokenType.STAR: lambda self, _: self.expression( 657 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 658 ), 659 TokenType.NULL: lambda self, _: self.expression(exp.Null), 660 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 661 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 662 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 663 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 664 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 665 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 666 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 667 exp.National, this=token.text 668 ), 669 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 670 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 671 exp.RawString, this=token.text 672 ), 673 TokenType.UNICODE_STRING: lambda self, token: self.expression( 674 exp.UnicodeString, 675 this=token.text, 676 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 677 ), 678 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 679 } 680 681 PLACEHOLDER_PARSERS = { 682 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 683 TokenType.PARAMETER: lambda self: self._parse_parameter(), 684 TokenType.COLON: lambda self: ( 685 self.expression(exp.Placeholder, this=self._prev.text) 686 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 687 else None 688 ), 689 } 690 691 RANGE_PARSERS = { 692 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 693 TokenType.GLOB: binary_range_parser(exp.Glob), 694 TokenType.ILIKE: binary_range_parser(exp.ILike), 695 TokenType.IN: lambda self, this: self._parse_in(this), 696 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 697 TokenType.IS: lambda self, this: self._parse_is(this), 698 TokenType.LIKE: binary_range_parser(exp.Like), 699 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 700 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 701 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 702 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 703 } 704 705 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 706 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 707 "AUTO": lambda self: self._parse_auto_property(), 708 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 709 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 710 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 711 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 712 "CHECKSUM": lambda self: self._parse_checksum(), 713 "CLUSTER BY": lambda self: self._parse_cluster(), 714 "CLUSTERED": lambda self: self._parse_clustered_by(), 715 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 716 exp.CollateProperty, **kwargs 717 ), 718 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 719 "CONTAINS": lambda self: self._parse_contains_property(), 720 "COPY": lambda self: self._parse_copy_property(), 721 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 722 "DEFINER": lambda self: self._parse_definer(), 723 "DETERMINISTIC": lambda self: self.expression( 724 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 725 ), 726 "DISTKEY": lambda self: self._parse_distkey(), 727 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 728 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 729 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 730 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 731 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 732 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 733 "FREESPACE": lambda self: self._parse_freespace(), 734 "HEAP": lambda self: self.expression(exp.HeapProperty), 735 "IMMUTABLE": lambda self: self.expression( 736 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 737 ), 738 "INHERITS": lambda self: self.expression( 739 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 740 ), 741 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 742 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 743 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 744 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 745 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 746 "LIKE": lambda self: self._parse_create_like(), 747 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 748 "LOCK": lambda self: self._parse_locking(), 749 "LOCKING": lambda self: self._parse_locking(), 750 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 751 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 752 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 753 "MODIFIES": lambda self: self._parse_modifies_property(), 754 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 755 "NO": lambda self: self._parse_no_property(), 756 "ON": lambda self: self._parse_on_property(), 757 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 758 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 759 "PARTITION": lambda self: self._parse_partitioned_of(), 760 "PARTITION BY": lambda self: self._parse_partitioned_by(), 761 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 762 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 763 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 764 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 765 "READS": lambda self: self._parse_reads_property(), 766 "REMOTE": lambda self: self._parse_remote_with_connection(), 767 "RETURNS": lambda self: self._parse_returns(), 768 "ROW": lambda self: self._parse_row(), 769 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 770 "SAMPLE": lambda self: self.expression( 771 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 772 ), 773 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 774 "SETTINGS": lambda self: self.expression( 775 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 776 ), 777 "SORTKEY": lambda self: self._parse_sortkey(), 778 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 779 "STABLE": lambda self: self.expression( 780 exp.StabilityProperty, this=exp.Literal.string("STABLE") 781 ), 782 "STORED": lambda self: self._parse_stored(), 783 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 784 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 785 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 786 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 787 "TO": lambda self: self._parse_to_table(), 788 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 789 "TRANSFORM": lambda self: self.expression( 790 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 791 ), 792 "TTL": lambda self: self._parse_ttl(), 793 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 794 "VOLATILE": lambda self: self._parse_volatile_property(), 795 "WITH": lambda self: self._parse_with_property(), 796 } 797 798 CONSTRAINT_PARSERS = { 799 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 800 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 801 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 802 "CHARACTER SET": lambda self: self.expression( 803 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 804 ), 805 "CHECK": lambda self: self.expression( 806 exp.CheckColumnConstraint, 807 this=self._parse_wrapped(self._parse_conjunction), 808 enforced=self._match_text_seq("ENFORCED"), 809 ), 810 "COLLATE": lambda self: self.expression( 811 exp.CollateColumnConstraint, this=self._parse_var() 812 ), 813 "COMMENT": lambda self: self.expression( 814 exp.CommentColumnConstraint, this=self._parse_string() 815 ), 816 "COMPRESS": lambda self: self._parse_compress(), 817 "CLUSTERED": lambda self: self.expression( 818 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 819 ), 820 "NONCLUSTERED": lambda self: self.expression( 821 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 822 ), 823 "DEFAULT": lambda self: self.expression( 824 exp.DefaultColumnConstraint, this=self._parse_bitwise() 825 ), 826 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 827 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 828 "FORMAT": lambda self: self.expression( 829 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 830 ), 831 "GENERATED": lambda self: self._parse_generated_as_identity(), 832 "IDENTITY": lambda self: self._parse_auto_increment(), 833 "INLINE": lambda self: self._parse_inline(), 834 "LIKE": lambda self: self._parse_create_like(), 835 "NOT": lambda self: self._parse_not_constraint(), 836 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 837 "ON": lambda self: ( 838 self._match(TokenType.UPDATE) 839 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 840 ) 841 or self.expression(exp.OnProperty, this=self._parse_id_var()), 842 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 843 "PERIOD": lambda self: self._parse_period_for_system_time(), 844 "PRIMARY KEY": lambda self: self._parse_primary_key(), 845 "REFERENCES": lambda self: self._parse_references(match=False), 846 "TITLE": lambda self: self.expression( 847 exp.TitleColumnConstraint, this=self._parse_var_or_string() 848 ), 849 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 850 "UNIQUE": lambda self: self._parse_unique(), 851 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 852 "WITH": lambda self: self.expression( 853 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 854 ), 855 } 856 857 ALTER_PARSERS = { 858 "ADD": lambda self: self._parse_alter_table_add(), 859 "ALTER": lambda self: self._parse_alter_table_alter(), 860 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 861 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 862 "DROP": lambda self: self._parse_alter_table_drop(), 863 "RENAME": lambda self: self._parse_alter_table_rename(), 864 } 865 866 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 867 868 NO_PAREN_FUNCTION_PARSERS = { 869 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 870 "CASE": lambda self: self._parse_case(), 871 "IF": lambda self: self._parse_if(), 872 "NEXT": lambda self: self._parse_next_value_for(), 873 } 874 875 INVALID_FUNC_NAME_TOKENS = { 876 TokenType.IDENTIFIER, 877 TokenType.STRING, 878 } 879 880 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 881 882 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 883 884 FUNCTION_PARSERS = { 885 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 886 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 887 "DECODE": lambda self: self._parse_decode(), 888 "EXTRACT": lambda self: self._parse_extract(), 889 "JSON_OBJECT": lambda self: self._parse_json_object(), 890 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 891 "JSON_TABLE": lambda self: self._parse_json_table(), 892 "MATCH": lambda self: self._parse_match_against(), 893 "OPENJSON": lambda self: self._parse_open_json(), 894 "POSITION": lambda self: self._parse_position(), 895 "PREDICT": lambda self: self._parse_predict(), 896 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 897 "STRING_AGG": lambda self: self._parse_string_agg(), 898 "SUBSTRING": lambda self: self._parse_substring(), 899 "TRIM": lambda self: self._parse_trim(), 900 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 901 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 902 } 903 904 QUERY_MODIFIER_PARSERS = { 905 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 906 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 907 TokenType.WHERE: lambda self: ("where", self._parse_where()), 908 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 909 TokenType.HAVING: lambda self: ("having", self._parse_having()), 910 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 911 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 912 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 913 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 914 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 915 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 916 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 917 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 918 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 919 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 920 TokenType.CLUSTER_BY: lambda self: ( 921 "cluster", 922 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 923 ), 924 TokenType.DISTRIBUTE_BY: lambda self: ( 925 "distribute", 926 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 927 ), 928 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 929 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 930 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 931 } 932 933 SET_PARSERS = { 934 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 935 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 936 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 937 "TRANSACTION": lambda self: self._parse_set_transaction(), 938 } 939 940 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 941 942 TYPE_LITERAL_PARSERS = { 943 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 944 } 945 946 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 947 948 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 949 950 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 951 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 952 "ISOLATION": ( 953 ("LEVEL", "REPEATABLE", "READ"), 954 ("LEVEL", "READ", "COMMITTED"), 955 ("LEVEL", "READ", "UNCOMITTED"), 956 ("LEVEL", "SERIALIZABLE"), 957 ), 958 "READ": ("WRITE", "ONLY"), 959 } 960 961 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 962 963 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 964 965 CLONE_KEYWORDS = {"CLONE", "COPY"} 966 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 967 968 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 969 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 970 971 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 972 973 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 974 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 975 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 976 977 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 978 979 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 980 981 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 982 983 DISTINCT_TOKENS = {TokenType.DISTINCT} 984 985 NULL_TOKENS = {TokenType.NULL} 986 987 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 988 989 STRICT_CAST = True 990 991 PREFIXED_PIVOT_COLUMNS = False 992 IDENTIFY_PIVOT_STRINGS = False 993 994 LOG_DEFAULTS_TO_LN = False 995 996 # Whether ADD is present for each column added by ALTER TABLE 997 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 998 999 # Whether the table sample clause expects CSV syntax 1000 TABLESAMPLE_CSV = False 1001 1002 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1003 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1004 1005 # Whether the TRIM function expects the characters to trim as its first argument 1006 TRIM_PATTERN_FIRST = False 1007 1008 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1009 STRING_ALIASES = False 1010 1011 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1012 MODIFIERS_ATTACHED_TO_UNION = True 1013 UNION_MODIFIERS = {"order", "limit", "offset"} 1014 1015 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1016 NO_PAREN_IF_COMMANDS = True 1017 1018 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1019 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1020 1021 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1022 # If this is True and '(' is not found, the keyword will be treated as an identifier 1023 VALUES_FOLLOWED_BY_PAREN = True 1024 1025 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1026 SUPPORTS_IMPLICIT_UNNEST = False 1027 1028 __slots__ = ( 1029 "error_level", 1030 "error_message_context", 1031 "max_errors", 1032 "dialect", 1033 "sql", 1034 "errors", 1035 "_tokens", 1036 "_index", 1037 "_curr", 1038 "_next", 1039 "_prev", 1040 "_prev_comments", 1041 ) 1042 1043 # Autofilled 1044 SHOW_TRIE: t.Dict = {} 1045 SET_TRIE: t.Dict = {} 1046 1047 def __init__( 1048 self, 1049 error_level: t.Optional[ErrorLevel] = None, 1050 error_message_context: int = 100, 1051 max_errors: int = 3, 1052 dialect: DialectType = None, 1053 ): 1054 from sqlglot.dialects import Dialect 1055 1056 self.error_level = error_level or ErrorLevel.IMMEDIATE 1057 self.error_message_context = error_message_context 1058 self.max_errors = max_errors 1059 self.dialect = Dialect.get_or_raise(dialect) 1060 self.reset() 1061 1062 def reset(self): 1063 self.sql = "" 1064 self.errors = [] 1065 self._tokens = [] 1066 self._index = 0 1067 self._curr = None 1068 self._next = None 1069 self._prev = None 1070 self._prev_comments = None 1071 1072 def parse( 1073 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1074 ) -> t.List[t.Optional[exp.Expression]]: 1075 """ 1076 Parses a list of tokens and returns a list of syntax trees, one tree 1077 per parsed SQL statement. 1078 1079 Args: 1080 raw_tokens: The list of tokens. 1081 sql: The original SQL string, used to produce helpful debug messages. 1082 1083 Returns: 1084 The list of the produced syntax trees. 1085 """ 1086 return self._parse( 1087 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1088 ) 1089 1090 def parse_into( 1091 self, 1092 expression_types: exp.IntoType, 1093 raw_tokens: t.List[Token], 1094 sql: t.Optional[str] = None, 1095 ) -> t.List[t.Optional[exp.Expression]]: 1096 """ 1097 Parses a list of tokens into a given Expression type. If a collection of Expression 1098 types is given instead, this method will try to parse the token list into each one 1099 of them, stopping at the first for which the parsing succeeds. 1100 1101 Args: 1102 expression_types: The expression type(s) to try and parse the token list into. 1103 raw_tokens: The list of tokens. 1104 sql: The original SQL string, used to produce helpful debug messages. 1105 1106 Returns: 1107 The target Expression. 1108 """ 1109 errors = [] 1110 for expression_type in ensure_list(expression_types): 1111 parser = self.EXPRESSION_PARSERS.get(expression_type) 1112 if not parser: 1113 raise TypeError(f"No parser registered for {expression_type}") 1114 1115 try: 1116 return self._parse(parser, raw_tokens, sql) 1117 except ParseError as e: 1118 e.errors[0]["into_expression"] = expression_type 1119 errors.append(e) 1120 1121 raise ParseError( 1122 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1123 errors=merge_errors(errors), 1124 ) from errors[-1] 1125 1126 def _parse( 1127 self, 1128 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1129 raw_tokens: t.List[Token], 1130 sql: t.Optional[str] = None, 1131 ) -> t.List[t.Optional[exp.Expression]]: 1132 self.reset() 1133 self.sql = sql or "" 1134 1135 total = len(raw_tokens) 1136 chunks: t.List[t.List[Token]] = [[]] 1137 1138 for i, token in enumerate(raw_tokens): 1139 if token.token_type == TokenType.SEMICOLON: 1140 if i < total - 1: 1141 chunks.append([]) 1142 else: 1143 chunks[-1].append(token) 1144 1145 expressions = [] 1146 1147 for tokens in chunks: 1148 self._index = -1 1149 self._tokens = tokens 1150 self._advance() 1151 1152 expressions.append(parse_method(self)) 1153 1154 if self._index < len(self._tokens): 1155 self.raise_error("Invalid expression / Unexpected token") 1156 1157 self.check_errors() 1158 1159 return expressions 1160 1161 def check_errors(self) -> None: 1162 """Logs or raises any found errors, depending on the chosen error level setting.""" 1163 if self.error_level == ErrorLevel.WARN: 1164 for error in self.errors: 1165 logger.error(str(error)) 1166 elif self.error_level == ErrorLevel.RAISE and self.errors: 1167 raise ParseError( 1168 concat_messages(self.errors, self.max_errors), 1169 errors=merge_errors(self.errors), 1170 ) 1171 1172 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1173 """ 1174 Appends an error in the list of recorded errors or raises it, depending on the chosen 1175 error level setting. 1176 """ 1177 token = token or self._curr or self._prev or Token.string("") 1178 start = token.start 1179 end = token.end + 1 1180 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1181 highlight = self.sql[start:end] 1182 end_context = self.sql[end : end + self.error_message_context] 1183 1184 error = ParseError.new( 1185 f"{message}. Line {token.line}, Col: {token.col}.\n" 1186 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1187 description=message, 1188 line=token.line, 1189 col=token.col, 1190 start_context=start_context, 1191 highlight=highlight, 1192 end_context=end_context, 1193 ) 1194 1195 if self.error_level == ErrorLevel.IMMEDIATE: 1196 raise error 1197 1198 self.errors.append(error) 1199 1200 def expression( 1201 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1202 ) -> E: 1203 """ 1204 Creates a new, validated Expression. 1205 1206 Args: 1207 exp_class: The expression class to instantiate. 1208 comments: An optional list of comments to attach to the expression. 1209 kwargs: The arguments to set for the expression along with their respective values. 1210 1211 Returns: 1212 The target expression. 1213 """ 1214 instance = exp_class(**kwargs) 1215 instance.add_comments(comments) if comments else self._add_comments(instance) 1216 return self.validate_expression(instance) 1217 1218 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1219 if expression and self._prev_comments: 1220 expression.add_comments(self._prev_comments) 1221 self._prev_comments = None 1222 1223 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1224 """ 1225 Validates an Expression, making sure that all its mandatory arguments are set. 1226 1227 Args: 1228 expression: The expression to validate. 1229 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1230 1231 Returns: 1232 The validated expression. 1233 """ 1234 if self.error_level != ErrorLevel.IGNORE: 1235 for error_message in expression.error_messages(args): 1236 self.raise_error(error_message) 1237 1238 return expression 1239 1240 def _find_sql(self, start: Token, end: Token) -> str: 1241 return self.sql[start.start : end.end + 1] 1242 1243 def _is_connected(self) -> bool: 1244 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1245 1246 def _advance(self, times: int = 1) -> None: 1247 self._index += times 1248 self._curr = seq_get(self._tokens, self._index) 1249 self._next = seq_get(self._tokens, self._index + 1) 1250 1251 if self._index > 0: 1252 self._prev = self._tokens[self._index - 1] 1253 self._prev_comments = self._prev.comments 1254 else: 1255 self._prev = None 1256 self._prev_comments = None 1257 1258 def _retreat(self, index: int) -> None: 1259 if index != self._index: 1260 self._advance(index - self._index) 1261 1262 def _warn_unsupported(self) -> None: 1263 if len(self._tokens) <= 1: 1264 return 1265 1266 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1267 # interested in emitting a warning for the one being currently processed. 1268 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1269 1270 logger.warning( 1271 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1272 ) 1273 1274 def _parse_command(self) -> exp.Command: 1275 self._warn_unsupported() 1276 return self.expression( 1277 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1278 ) 1279 1280 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1281 start = self._prev 1282 exists = self._parse_exists() if allow_exists else None 1283 1284 self._match(TokenType.ON) 1285 1286 kind = self._match_set(self.CREATABLES) and self._prev 1287 if not kind: 1288 return self._parse_as_command(start) 1289 1290 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1291 this = self._parse_user_defined_function(kind=kind.token_type) 1292 elif kind.token_type == TokenType.TABLE: 1293 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1294 elif kind.token_type == TokenType.COLUMN: 1295 this = self._parse_column() 1296 else: 1297 this = self._parse_id_var() 1298 1299 self._match(TokenType.IS) 1300 1301 return self.expression( 1302 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1303 ) 1304 1305 def _parse_to_table( 1306 self, 1307 ) -> exp.ToTableProperty: 1308 table = self._parse_table_parts(schema=True) 1309 return self.expression(exp.ToTableProperty, this=table) 1310 1311 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1312 def _parse_ttl(self) -> exp.Expression: 1313 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1314 this = self._parse_bitwise() 1315 1316 if self._match_text_seq("DELETE"): 1317 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1318 if self._match_text_seq("RECOMPRESS"): 1319 return self.expression( 1320 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1321 ) 1322 if self._match_text_seq("TO", "DISK"): 1323 return self.expression( 1324 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1325 ) 1326 if self._match_text_seq("TO", "VOLUME"): 1327 return self.expression( 1328 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1329 ) 1330 1331 return this 1332 1333 expressions = self._parse_csv(_parse_ttl_action) 1334 where = self._parse_where() 1335 group = self._parse_group() 1336 1337 aggregates = None 1338 if group and self._match(TokenType.SET): 1339 aggregates = self._parse_csv(self._parse_set_item) 1340 1341 return self.expression( 1342 exp.MergeTreeTTL, 1343 expressions=expressions, 1344 where=where, 1345 group=group, 1346 aggregates=aggregates, 1347 ) 1348 1349 def _parse_statement(self) -> t.Optional[exp.Expression]: 1350 if self._curr is None: 1351 return None 1352 1353 if self._match_set(self.STATEMENT_PARSERS): 1354 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1355 1356 if self._match_set(Tokenizer.COMMANDS): 1357 return self._parse_command() 1358 1359 expression = self._parse_expression() 1360 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1361 return self._parse_query_modifiers(expression) 1362 1363 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1364 start = self._prev 1365 temporary = self._match(TokenType.TEMPORARY) 1366 materialized = self._match_text_seq("MATERIALIZED") 1367 1368 kind = self._match_set(self.CREATABLES) and self._prev.text 1369 if not kind: 1370 return self._parse_as_command(start) 1371 1372 return self.expression( 1373 exp.Drop, 1374 comments=start.comments, 1375 exists=exists or self._parse_exists(), 1376 this=self._parse_table( 1377 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1378 ), 1379 kind=kind, 1380 temporary=temporary, 1381 materialized=materialized, 1382 cascade=self._match_text_seq("CASCADE"), 1383 constraints=self._match_text_seq("CONSTRAINTS"), 1384 purge=self._match_text_seq("PURGE"), 1385 ) 1386 1387 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1388 return ( 1389 self._match_text_seq("IF") 1390 and (not not_ or self._match(TokenType.NOT)) 1391 and self._match(TokenType.EXISTS) 1392 ) 1393 1394 def _parse_create(self) -> exp.Create | exp.Command: 1395 # Note: this can't be None because we've matched a statement parser 1396 start = self._prev 1397 comments = self._prev_comments 1398 1399 replace = ( 1400 start.token_type == TokenType.REPLACE 1401 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1402 or self._match_pair(TokenType.OR, TokenType.ALTER) 1403 ) 1404 unique = self._match(TokenType.UNIQUE) 1405 1406 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1407 self._advance() 1408 1409 properties = None 1410 create_token = self._match_set(self.CREATABLES) and self._prev 1411 1412 if not create_token: 1413 # exp.Properties.Location.POST_CREATE 1414 properties = self._parse_properties() 1415 create_token = self._match_set(self.CREATABLES) and self._prev 1416 1417 if not properties or not create_token: 1418 return self._parse_as_command(start) 1419 1420 exists = self._parse_exists(not_=True) 1421 this = None 1422 expression: t.Optional[exp.Expression] = None 1423 indexes = None 1424 no_schema_binding = None 1425 begin = None 1426 end = None 1427 clone = None 1428 1429 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1430 nonlocal properties 1431 if properties and temp_props: 1432 properties.expressions.extend(temp_props.expressions) 1433 elif temp_props: 1434 properties = temp_props 1435 1436 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1437 this = self._parse_user_defined_function(kind=create_token.token_type) 1438 1439 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1440 extend_props(self._parse_properties()) 1441 1442 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1443 1444 if not expression: 1445 if self._match(TokenType.COMMAND): 1446 expression = self._parse_as_command(self._prev) 1447 else: 1448 begin = self._match(TokenType.BEGIN) 1449 return_ = self._match_text_seq("RETURN") 1450 1451 if self._match(TokenType.STRING, advance=False): 1452 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1453 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1454 expression = self._parse_string() 1455 extend_props(self._parse_properties()) 1456 else: 1457 expression = self._parse_statement() 1458 1459 end = self._match_text_seq("END") 1460 1461 if return_: 1462 expression = self.expression(exp.Return, this=expression) 1463 elif create_token.token_type == TokenType.INDEX: 1464 this = self._parse_index(index=self._parse_id_var()) 1465 elif create_token.token_type in self.DB_CREATABLES: 1466 table_parts = self._parse_table_parts( 1467 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1468 ) 1469 1470 # exp.Properties.Location.POST_NAME 1471 self._match(TokenType.COMMA) 1472 extend_props(self._parse_properties(before=True)) 1473 1474 this = self._parse_schema(this=table_parts) 1475 1476 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1477 extend_props(self._parse_properties()) 1478 1479 self._match(TokenType.ALIAS) 1480 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1481 # exp.Properties.Location.POST_ALIAS 1482 extend_props(self._parse_properties()) 1483 1484 expression = self._parse_ddl_select() 1485 1486 if create_token.token_type == TokenType.TABLE: 1487 # exp.Properties.Location.POST_EXPRESSION 1488 extend_props(self._parse_properties()) 1489 1490 indexes = [] 1491 while True: 1492 index = self._parse_index() 1493 1494 # exp.Properties.Location.POST_INDEX 1495 extend_props(self._parse_properties()) 1496 1497 if not index: 1498 break 1499 else: 1500 self._match(TokenType.COMMA) 1501 indexes.append(index) 1502 elif create_token.token_type == TokenType.VIEW: 1503 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1504 no_schema_binding = True 1505 1506 shallow = self._match_text_seq("SHALLOW") 1507 1508 if self._match_texts(self.CLONE_KEYWORDS): 1509 copy = self._prev.text.lower() == "copy" 1510 clone = self.expression( 1511 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1512 ) 1513 1514 if self._curr: 1515 return self._parse_as_command(start) 1516 1517 return self.expression( 1518 exp.Create, 1519 comments=comments, 1520 this=this, 1521 kind=create_token.text.upper(), 1522 replace=replace, 1523 unique=unique, 1524 expression=expression, 1525 exists=exists, 1526 properties=properties, 1527 indexes=indexes, 1528 no_schema_binding=no_schema_binding, 1529 begin=begin, 1530 end=end, 1531 clone=clone, 1532 ) 1533 1534 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1535 # only used for teradata currently 1536 self._match(TokenType.COMMA) 1537 1538 kwargs = { 1539 "no": self._match_text_seq("NO"), 1540 "dual": self._match_text_seq("DUAL"), 1541 "before": self._match_text_seq("BEFORE"), 1542 "default": self._match_text_seq("DEFAULT"), 1543 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1544 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1545 "after": self._match_text_seq("AFTER"), 1546 "minimum": self._match_texts(("MIN", "MINIMUM")), 1547 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1548 } 1549 1550 if self._match_texts(self.PROPERTY_PARSERS): 1551 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1552 try: 1553 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1554 except TypeError: 1555 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1556 1557 return None 1558 1559 def _parse_property(self) -> t.Optional[exp.Expression]: 1560 if self._match_texts(self.PROPERTY_PARSERS): 1561 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1562 1563 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1564 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1565 1566 if self._match_text_seq("COMPOUND", "SORTKEY"): 1567 return self._parse_sortkey(compound=True) 1568 1569 if self._match_text_seq("SQL", "SECURITY"): 1570 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1571 1572 index = self._index 1573 key = self._parse_column() 1574 1575 if not self._match(TokenType.EQ): 1576 self._retreat(index) 1577 return None 1578 1579 return self.expression( 1580 exp.Property, 1581 this=key.to_dot() if isinstance(key, exp.Column) else key, 1582 value=self._parse_column() or self._parse_var(any_token=True), 1583 ) 1584 1585 def _parse_stored(self) -> exp.FileFormatProperty: 1586 self._match(TokenType.ALIAS) 1587 1588 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1589 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1590 1591 return self.expression( 1592 exp.FileFormatProperty, 1593 this=( 1594 self.expression( 1595 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1596 ) 1597 if input_format or output_format 1598 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1599 ), 1600 ) 1601 1602 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1603 self._match(TokenType.EQ) 1604 self._match(TokenType.ALIAS) 1605 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1606 1607 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1608 properties = [] 1609 while True: 1610 if before: 1611 prop = self._parse_property_before() 1612 else: 1613 prop = self._parse_property() 1614 1615 if not prop: 1616 break 1617 for p in ensure_list(prop): 1618 properties.append(p) 1619 1620 if properties: 1621 return self.expression(exp.Properties, expressions=properties) 1622 1623 return None 1624 1625 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1626 return self.expression( 1627 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1628 ) 1629 1630 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1631 if self._index >= 2: 1632 pre_volatile_token = self._tokens[self._index - 2] 1633 else: 1634 pre_volatile_token = None 1635 1636 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1637 return exp.VolatileProperty() 1638 1639 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1640 1641 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1642 self._match_pair(TokenType.EQ, TokenType.ON) 1643 1644 prop = self.expression(exp.WithSystemVersioningProperty) 1645 if self._match(TokenType.L_PAREN): 1646 self._match_text_seq("HISTORY_TABLE", "=") 1647 prop.set("this", self._parse_table_parts()) 1648 1649 if self._match(TokenType.COMMA): 1650 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1651 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1652 1653 self._match_r_paren() 1654 1655 return prop 1656 1657 def _parse_with_property( 1658 self, 1659 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1660 if self._match(TokenType.L_PAREN, advance=False): 1661 return self._parse_wrapped_csv(self._parse_property) 1662 1663 if self._match_text_seq("JOURNAL"): 1664 return self._parse_withjournaltable() 1665 1666 if self._match_text_seq("DATA"): 1667 return self._parse_withdata(no=False) 1668 elif self._match_text_seq("NO", "DATA"): 1669 return self._parse_withdata(no=True) 1670 1671 if not self._next: 1672 return None 1673 1674 return self._parse_withisolatedloading() 1675 1676 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1677 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1678 self._match(TokenType.EQ) 1679 1680 user = self._parse_id_var() 1681 self._match(TokenType.PARAMETER) 1682 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1683 1684 if not user or not host: 1685 return None 1686 1687 return exp.DefinerProperty(this=f"{user}@{host}") 1688 1689 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1690 self._match(TokenType.TABLE) 1691 self._match(TokenType.EQ) 1692 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1693 1694 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1695 return self.expression(exp.LogProperty, no=no) 1696 1697 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1698 return self.expression(exp.JournalProperty, **kwargs) 1699 1700 def _parse_checksum(self) -> exp.ChecksumProperty: 1701 self._match(TokenType.EQ) 1702 1703 on = None 1704 if self._match(TokenType.ON): 1705 on = True 1706 elif self._match_text_seq("OFF"): 1707 on = False 1708 1709 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1710 1711 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1712 return self.expression( 1713 exp.Cluster, 1714 expressions=( 1715 self._parse_wrapped_csv(self._parse_ordered) 1716 if wrapped 1717 else self._parse_csv(self._parse_ordered) 1718 ), 1719 ) 1720 1721 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1722 self._match_text_seq("BY") 1723 1724 self._match_l_paren() 1725 expressions = self._parse_csv(self._parse_column) 1726 self._match_r_paren() 1727 1728 if self._match_text_seq("SORTED", "BY"): 1729 self._match_l_paren() 1730 sorted_by = self._parse_csv(self._parse_ordered) 1731 self._match_r_paren() 1732 else: 1733 sorted_by = None 1734 1735 self._match(TokenType.INTO) 1736 buckets = self._parse_number() 1737 self._match_text_seq("BUCKETS") 1738 1739 return self.expression( 1740 exp.ClusteredByProperty, 1741 expressions=expressions, 1742 sorted_by=sorted_by, 1743 buckets=buckets, 1744 ) 1745 1746 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1747 if not self._match_text_seq("GRANTS"): 1748 self._retreat(self._index - 1) 1749 return None 1750 1751 return self.expression(exp.CopyGrantsProperty) 1752 1753 def _parse_freespace(self) -> exp.FreespaceProperty: 1754 self._match(TokenType.EQ) 1755 return self.expression( 1756 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1757 ) 1758 1759 def _parse_mergeblockratio( 1760 self, no: bool = False, default: bool = False 1761 ) -> exp.MergeBlockRatioProperty: 1762 if self._match(TokenType.EQ): 1763 return self.expression( 1764 exp.MergeBlockRatioProperty, 1765 this=self._parse_number(), 1766 percent=self._match(TokenType.PERCENT), 1767 ) 1768 1769 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1770 1771 def _parse_datablocksize( 1772 self, 1773 default: t.Optional[bool] = None, 1774 minimum: t.Optional[bool] = None, 1775 maximum: t.Optional[bool] = None, 1776 ) -> exp.DataBlocksizeProperty: 1777 self._match(TokenType.EQ) 1778 size = self._parse_number() 1779 1780 units = None 1781 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1782 units = self._prev.text 1783 1784 return self.expression( 1785 exp.DataBlocksizeProperty, 1786 size=size, 1787 units=units, 1788 default=default, 1789 minimum=minimum, 1790 maximum=maximum, 1791 ) 1792 1793 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1794 self._match(TokenType.EQ) 1795 always = self._match_text_seq("ALWAYS") 1796 manual = self._match_text_seq("MANUAL") 1797 never = self._match_text_seq("NEVER") 1798 default = self._match_text_seq("DEFAULT") 1799 1800 autotemp = None 1801 if self._match_text_seq("AUTOTEMP"): 1802 autotemp = self._parse_schema() 1803 1804 return self.expression( 1805 exp.BlockCompressionProperty, 1806 always=always, 1807 manual=manual, 1808 never=never, 1809 default=default, 1810 autotemp=autotemp, 1811 ) 1812 1813 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1814 no = self._match_text_seq("NO") 1815 concurrent = self._match_text_seq("CONCURRENT") 1816 self._match_text_seq("ISOLATED", "LOADING") 1817 for_all = self._match_text_seq("FOR", "ALL") 1818 for_insert = self._match_text_seq("FOR", "INSERT") 1819 for_none = self._match_text_seq("FOR", "NONE") 1820 return self.expression( 1821 exp.IsolatedLoadingProperty, 1822 no=no, 1823 concurrent=concurrent, 1824 for_all=for_all, 1825 for_insert=for_insert, 1826 for_none=for_none, 1827 ) 1828 1829 def _parse_locking(self) -> exp.LockingProperty: 1830 if self._match(TokenType.TABLE): 1831 kind = "TABLE" 1832 elif self._match(TokenType.VIEW): 1833 kind = "VIEW" 1834 elif self._match(TokenType.ROW): 1835 kind = "ROW" 1836 elif self._match_text_seq("DATABASE"): 1837 kind = "DATABASE" 1838 else: 1839 kind = None 1840 1841 if kind in ("DATABASE", "TABLE", "VIEW"): 1842 this = self._parse_table_parts() 1843 else: 1844 this = None 1845 1846 if self._match(TokenType.FOR): 1847 for_or_in = "FOR" 1848 elif self._match(TokenType.IN): 1849 for_or_in = "IN" 1850 else: 1851 for_or_in = None 1852 1853 if self._match_text_seq("ACCESS"): 1854 lock_type = "ACCESS" 1855 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1856 lock_type = "EXCLUSIVE" 1857 elif self._match_text_seq("SHARE"): 1858 lock_type = "SHARE" 1859 elif self._match_text_seq("READ"): 1860 lock_type = "READ" 1861 elif self._match_text_seq("WRITE"): 1862 lock_type = "WRITE" 1863 elif self._match_text_seq("CHECKSUM"): 1864 lock_type = "CHECKSUM" 1865 else: 1866 lock_type = None 1867 1868 override = self._match_text_seq("OVERRIDE") 1869 1870 return self.expression( 1871 exp.LockingProperty, 1872 this=this, 1873 kind=kind, 1874 for_or_in=for_or_in, 1875 lock_type=lock_type, 1876 override=override, 1877 ) 1878 1879 def _parse_partition_by(self) -> t.List[exp.Expression]: 1880 if self._match(TokenType.PARTITION_BY): 1881 return self._parse_csv(self._parse_conjunction) 1882 return [] 1883 1884 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1885 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1886 if self._match_text_seq("MINVALUE"): 1887 return exp.var("MINVALUE") 1888 if self._match_text_seq("MAXVALUE"): 1889 return exp.var("MAXVALUE") 1890 return self._parse_bitwise() 1891 1892 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1893 expression = None 1894 from_expressions = None 1895 to_expressions = None 1896 1897 if self._match(TokenType.IN): 1898 this = self._parse_wrapped_csv(self._parse_bitwise) 1899 elif self._match(TokenType.FROM): 1900 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1901 self._match_text_seq("TO") 1902 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1903 elif self._match_text_seq("WITH", "(", "MODULUS"): 1904 this = self._parse_number() 1905 self._match_text_seq(",", "REMAINDER") 1906 expression = self._parse_number() 1907 self._match_r_paren() 1908 else: 1909 self.raise_error("Failed to parse partition bound spec.") 1910 1911 return self.expression( 1912 exp.PartitionBoundSpec, 1913 this=this, 1914 expression=expression, 1915 from_expressions=from_expressions, 1916 to_expressions=to_expressions, 1917 ) 1918 1919 # https://www.postgresql.org/docs/current/sql-createtable.html 1920 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1921 if not self._match_text_seq("OF"): 1922 self._retreat(self._index - 1) 1923 return None 1924 1925 this = self._parse_table(schema=True) 1926 1927 if self._match(TokenType.DEFAULT): 1928 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1929 elif self._match_text_seq("FOR", "VALUES"): 1930 expression = self._parse_partition_bound_spec() 1931 else: 1932 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1933 1934 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1935 1936 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1937 self._match(TokenType.EQ) 1938 return self.expression( 1939 exp.PartitionedByProperty, 1940 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1941 ) 1942 1943 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1944 if self._match_text_seq("AND", "STATISTICS"): 1945 statistics = True 1946 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1947 statistics = False 1948 else: 1949 statistics = None 1950 1951 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1952 1953 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1954 if self._match_text_seq("SQL"): 1955 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1956 return None 1957 1958 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1959 if self._match_text_seq("SQL", "DATA"): 1960 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1961 return None 1962 1963 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1964 if self._match_text_seq("PRIMARY", "INDEX"): 1965 return exp.NoPrimaryIndexProperty() 1966 if self._match_text_seq("SQL"): 1967 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1968 return None 1969 1970 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1971 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1972 return exp.OnCommitProperty() 1973 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1974 return exp.OnCommitProperty(delete=True) 1975 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1976 1977 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1978 if self._match_text_seq("SQL", "DATA"): 1979 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1980 return None 1981 1982 def _parse_distkey(self) -> exp.DistKeyProperty: 1983 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1984 1985 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1986 table = self._parse_table(schema=True) 1987 1988 options = [] 1989 while self._match_texts(("INCLUDING", "EXCLUDING")): 1990 this = self._prev.text.upper() 1991 1992 id_var = self._parse_id_var() 1993 if not id_var: 1994 return None 1995 1996 options.append( 1997 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1998 ) 1999 2000 return self.expression(exp.LikeProperty, this=table, expressions=options) 2001 2002 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2003 return self.expression( 2004 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2005 ) 2006 2007 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2008 self._match(TokenType.EQ) 2009 return self.expression( 2010 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2011 ) 2012 2013 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2014 self._match_text_seq("WITH", "CONNECTION") 2015 return self.expression( 2016 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2017 ) 2018 2019 def _parse_returns(self) -> exp.ReturnsProperty: 2020 value: t.Optional[exp.Expression] 2021 is_table = self._match(TokenType.TABLE) 2022 2023 if is_table: 2024 if self._match(TokenType.LT): 2025 value = self.expression( 2026 exp.Schema, 2027 this="TABLE", 2028 expressions=self._parse_csv(self._parse_struct_types), 2029 ) 2030 if not self._match(TokenType.GT): 2031 self.raise_error("Expecting >") 2032 else: 2033 value = self._parse_schema(exp.var("TABLE")) 2034 else: 2035 value = self._parse_types() 2036 2037 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2038 2039 def _parse_describe(self) -> exp.Describe: 2040 kind = self._match_set(self.CREATABLES) and self._prev.text 2041 extended = self._match_text_seq("EXTENDED") 2042 this = self._parse_table(schema=True) 2043 properties = self._parse_properties() 2044 expressions = properties.expressions if properties else None 2045 return self.expression( 2046 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2047 ) 2048 2049 def _parse_insert(self) -> exp.Insert: 2050 comments = ensure_list(self._prev_comments) 2051 overwrite = self._match(TokenType.OVERWRITE) 2052 ignore = self._match(TokenType.IGNORE) 2053 local = self._match_text_seq("LOCAL") 2054 alternative = None 2055 2056 if self._match_text_seq("DIRECTORY"): 2057 this: t.Optional[exp.Expression] = self.expression( 2058 exp.Directory, 2059 this=self._parse_var_or_string(), 2060 local=local, 2061 row_format=self._parse_row_format(match_row=True), 2062 ) 2063 else: 2064 if self._match(TokenType.OR): 2065 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2066 2067 self._match(TokenType.INTO) 2068 comments += ensure_list(self._prev_comments) 2069 self._match(TokenType.TABLE) 2070 this = self._parse_table(schema=True) 2071 2072 returning = self._parse_returning() 2073 2074 return self.expression( 2075 exp.Insert, 2076 comments=comments, 2077 this=this, 2078 by_name=self._match_text_seq("BY", "NAME"), 2079 exists=self._parse_exists(), 2080 partition=self._parse_partition(), 2081 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2082 and self._parse_conjunction(), 2083 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2084 conflict=self._parse_on_conflict(), 2085 returning=returning or self._parse_returning(), 2086 overwrite=overwrite, 2087 alternative=alternative, 2088 ignore=ignore, 2089 ) 2090 2091 def _parse_kill(self) -> exp.Kill: 2092 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2093 2094 return self.expression( 2095 exp.Kill, 2096 this=self._parse_primary(), 2097 kind=kind, 2098 ) 2099 2100 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2101 conflict = self._match_text_seq("ON", "CONFLICT") 2102 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2103 2104 if not conflict and not duplicate: 2105 return None 2106 2107 nothing = None 2108 expressions = None 2109 key = None 2110 constraint = None 2111 2112 if conflict: 2113 if self._match_text_seq("ON", "CONSTRAINT"): 2114 constraint = self._parse_id_var() 2115 else: 2116 key = self._parse_csv(self._parse_value) 2117 2118 self._match_text_seq("DO") 2119 if self._match_text_seq("NOTHING"): 2120 nothing = True 2121 else: 2122 self._match(TokenType.UPDATE) 2123 self._match(TokenType.SET) 2124 expressions = self._parse_csv(self._parse_equality) 2125 2126 return self.expression( 2127 exp.OnConflict, 2128 duplicate=duplicate, 2129 expressions=expressions, 2130 nothing=nothing, 2131 key=key, 2132 constraint=constraint, 2133 ) 2134 2135 def _parse_returning(self) -> t.Optional[exp.Returning]: 2136 if not self._match(TokenType.RETURNING): 2137 return None 2138 return self.expression( 2139 exp.Returning, 2140 expressions=self._parse_csv(self._parse_expression), 2141 into=self._match(TokenType.INTO) and self._parse_table_part(), 2142 ) 2143 2144 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2145 if not self._match(TokenType.FORMAT): 2146 return None 2147 return self._parse_row_format() 2148 2149 def _parse_row_format( 2150 self, match_row: bool = False 2151 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2152 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2153 return None 2154 2155 if self._match_text_seq("SERDE"): 2156 this = self._parse_string() 2157 2158 serde_properties = None 2159 if self._match(TokenType.SERDE_PROPERTIES): 2160 serde_properties = self.expression( 2161 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2162 ) 2163 2164 return self.expression( 2165 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2166 ) 2167 2168 self._match_text_seq("DELIMITED") 2169 2170 kwargs = {} 2171 2172 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2173 kwargs["fields"] = self._parse_string() 2174 if self._match_text_seq("ESCAPED", "BY"): 2175 kwargs["escaped"] = self._parse_string() 2176 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2177 kwargs["collection_items"] = self._parse_string() 2178 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2179 kwargs["map_keys"] = self._parse_string() 2180 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2181 kwargs["lines"] = self._parse_string() 2182 if self._match_text_seq("NULL", "DEFINED", "AS"): 2183 kwargs["null"] = self._parse_string() 2184 2185 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2186 2187 def _parse_load(self) -> exp.LoadData | exp.Command: 2188 if self._match_text_seq("DATA"): 2189 local = self._match_text_seq("LOCAL") 2190 self._match_text_seq("INPATH") 2191 inpath = self._parse_string() 2192 overwrite = self._match(TokenType.OVERWRITE) 2193 self._match_pair(TokenType.INTO, TokenType.TABLE) 2194 2195 return self.expression( 2196 exp.LoadData, 2197 this=self._parse_table(schema=True), 2198 local=local, 2199 overwrite=overwrite, 2200 inpath=inpath, 2201 partition=self._parse_partition(), 2202 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2203 serde=self._match_text_seq("SERDE") and self._parse_string(), 2204 ) 2205 return self._parse_as_command(self._prev) 2206 2207 def _parse_delete(self) -> exp.Delete: 2208 # This handles MySQL's "Multiple-Table Syntax" 2209 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2210 tables = None 2211 comments = self._prev_comments 2212 if not self._match(TokenType.FROM, advance=False): 2213 tables = self._parse_csv(self._parse_table) or None 2214 2215 returning = self._parse_returning() 2216 2217 return self.expression( 2218 exp.Delete, 2219 comments=comments, 2220 tables=tables, 2221 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2222 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2223 where=self._parse_where(), 2224 returning=returning or self._parse_returning(), 2225 limit=self._parse_limit(), 2226 ) 2227 2228 def _parse_update(self) -> exp.Update: 2229 comments = self._prev_comments 2230 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2231 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2232 returning = self._parse_returning() 2233 return self.expression( 2234 exp.Update, 2235 comments=comments, 2236 **{ # type: ignore 2237 "this": this, 2238 "expressions": expressions, 2239 "from": self._parse_from(joins=True), 2240 "where": self._parse_where(), 2241 "returning": returning or self._parse_returning(), 2242 "order": self._parse_order(), 2243 "limit": self._parse_limit(), 2244 }, 2245 ) 2246 2247 def _parse_uncache(self) -> exp.Uncache: 2248 if not self._match(TokenType.TABLE): 2249 self.raise_error("Expecting TABLE after UNCACHE") 2250 2251 return self.expression( 2252 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2253 ) 2254 2255 def _parse_cache(self) -> exp.Cache: 2256 lazy = self._match_text_seq("LAZY") 2257 self._match(TokenType.TABLE) 2258 table = self._parse_table(schema=True) 2259 2260 options = [] 2261 if self._match_text_seq("OPTIONS"): 2262 self._match_l_paren() 2263 k = self._parse_string() 2264 self._match(TokenType.EQ) 2265 v = self._parse_string() 2266 options = [k, v] 2267 self._match_r_paren() 2268 2269 self._match(TokenType.ALIAS) 2270 return self.expression( 2271 exp.Cache, 2272 this=table, 2273 lazy=lazy, 2274 options=options, 2275 expression=self._parse_select(nested=True), 2276 ) 2277 2278 def _parse_partition(self) -> t.Optional[exp.Partition]: 2279 if not self._match(TokenType.PARTITION): 2280 return None 2281 2282 return self.expression( 2283 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2284 ) 2285 2286 def _parse_value(self) -> exp.Tuple: 2287 if self._match(TokenType.L_PAREN): 2288 expressions = self._parse_csv(self._parse_expression) 2289 self._match_r_paren() 2290 return self.expression(exp.Tuple, expressions=expressions) 2291 2292 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2293 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2294 2295 def _parse_projections(self) -> t.List[exp.Expression]: 2296 return self._parse_expressions() 2297 2298 def _parse_select( 2299 self, 2300 nested: bool = False, 2301 table: bool = False, 2302 parse_subquery_alias: bool = True, 2303 parse_set_operation: bool = True, 2304 ) -> t.Optional[exp.Expression]: 2305 cte = self._parse_with() 2306 2307 if cte: 2308 this = self._parse_statement() 2309 2310 if not this: 2311 self.raise_error("Failed to parse any statement following CTE") 2312 return cte 2313 2314 if "with" in this.arg_types: 2315 this.set("with", cte) 2316 else: 2317 self.raise_error(f"{this.key} does not support CTE") 2318 this = cte 2319 2320 return this 2321 2322 # duckdb supports leading with FROM x 2323 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2324 2325 if self._match(TokenType.SELECT): 2326 comments = self._prev_comments 2327 2328 hint = self._parse_hint() 2329 all_ = self._match(TokenType.ALL) 2330 distinct = self._match_set(self.DISTINCT_TOKENS) 2331 2332 kind = ( 2333 self._match(TokenType.ALIAS) 2334 and self._match_texts(("STRUCT", "VALUE")) 2335 and self._prev.text.upper() 2336 ) 2337 2338 if distinct: 2339 distinct = self.expression( 2340 exp.Distinct, 2341 on=self._parse_value() if self._match(TokenType.ON) else None, 2342 ) 2343 2344 if all_ and distinct: 2345 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2346 2347 limit = self._parse_limit(top=True) 2348 projections = self._parse_projections() 2349 2350 this = self.expression( 2351 exp.Select, 2352 kind=kind, 2353 hint=hint, 2354 distinct=distinct, 2355 expressions=projections, 2356 limit=limit, 2357 ) 2358 this.comments = comments 2359 2360 into = self._parse_into() 2361 if into: 2362 this.set("into", into) 2363 2364 if not from_: 2365 from_ = self._parse_from() 2366 2367 if from_: 2368 this.set("from", from_) 2369 2370 this = self._parse_query_modifiers(this) 2371 elif (table or nested) and self._match(TokenType.L_PAREN): 2372 if self._match(TokenType.PIVOT): 2373 this = self._parse_simplified_pivot() 2374 elif self._match(TokenType.FROM): 2375 this = exp.select("*").from_( 2376 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2377 ) 2378 else: 2379 this = ( 2380 self._parse_table() 2381 if table 2382 else self._parse_select(nested=True, parse_set_operation=False) 2383 ) 2384 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2385 2386 self._match_r_paren() 2387 2388 # We return early here so that the UNION isn't attached to the subquery by the 2389 # following call to _parse_set_operations, but instead becomes the parent node 2390 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2391 elif self._match(TokenType.VALUES, advance=False): 2392 this = self._parse_derived_table_values() 2393 elif from_: 2394 this = exp.select("*").from_(from_.this, copy=False) 2395 else: 2396 this = None 2397 2398 if parse_set_operation: 2399 return self._parse_set_operations(this) 2400 return this 2401 2402 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2403 if not skip_with_token and not self._match(TokenType.WITH): 2404 return None 2405 2406 comments = self._prev_comments 2407 recursive = self._match(TokenType.RECURSIVE) 2408 2409 expressions = [] 2410 while True: 2411 expressions.append(self._parse_cte()) 2412 2413 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2414 break 2415 else: 2416 self._match(TokenType.WITH) 2417 2418 return self.expression( 2419 exp.With, comments=comments, expressions=expressions, recursive=recursive 2420 ) 2421 2422 def _parse_cte(self) -> exp.CTE: 2423 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2424 if not alias or not alias.this: 2425 self.raise_error("Expected CTE to have alias") 2426 2427 self._match(TokenType.ALIAS) 2428 return self.expression( 2429 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2430 ) 2431 2432 def _parse_table_alias( 2433 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2434 ) -> t.Optional[exp.TableAlias]: 2435 any_token = self._match(TokenType.ALIAS) 2436 alias = ( 2437 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2438 or self._parse_string_as_identifier() 2439 ) 2440 2441 index = self._index 2442 if self._match(TokenType.L_PAREN): 2443 columns = self._parse_csv(self._parse_function_parameter) 2444 self._match_r_paren() if columns else self._retreat(index) 2445 else: 2446 columns = None 2447 2448 if not alias and not columns: 2449 return None 2450 2451 return self.expression(exp.TableAlias, this=alias, columns=columns) 2452 2453 def _parse_subquery( 2454 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2455 ) -> t.Optional[exp.Subquery]: 2456 if not this: 2457 return None 2458 2459 return self.expression( 2460 exp.Subquery, 2461 this=this, 2462 pivots=self._parse_pivots(), 2463 alias=self._parse_table_alias() if parse_alias else None, 2464 ) 2465 2466 def _implicit_unnests_to_explicit(self, this: E) -> E: 2467 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2468 2469 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2470 for i, join in enumerate(this.args.get("joins") or []): 2471 table = join.this 2472 normalized_table = table.copy() 2473 normalized_table.meta["maybe_column"] = True 2474 normalized_table = _norm(normalized_table, dialect=self.dialect) 2475 2476 if isinstance(table, exp.Table) and not join.args.get("on"): 2477 if normalized_table.parts[0].name in refs: 2478 table_as_column = table.to_column() 2479 unnest = exp.Unnest(expressions=[table_as_column]) 2480 2481 # Table.to_column creates a parent Alias node that we want to convert to 2482 # a TableAlias and attach to the Unnest, so it matches the parser's output 2483 if isinstance(table.args.get("alias"), exp.TableAlias): 2484 table_as_column.replace(table_as_column.this) 2485 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2486 2487 table.replace(unnest) 2488 2489 refs.add(normalized_table.alias_or_name) 2490 2491 return this 2492 2493 def _parse_query_modifiers( 2494 self, this: t.Optional[exp.Expression] 2495 ) -> t.Optional[exp.Expression]: 2496 if isinstance(this, (exp.Query, exp.Table)): 2497 for join in iter(self._parse_join, None): 2498 this.append("joins", join) 2499 for lateral in iter(self._parse_lateral, None): 2500 this.append("laterals", lateral) 2501 2502 while True: 2503 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2504 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2505 key, expression = parser(self) 2506 2507 if expression: 2508 this.set(key, expression) 2509 if key == "limit": 2510 offset = expression.args.pop("offset", None) 2511 2512 if offset: 2513 offset = exp.Offset(expression=offset) 2514 this.set("offset", offset) 2515 2516 limit_by_expressions = expression.expressions 2517 expression.set("expressions", None) 2518 offset.set("expressions", limit_by_expressions) 2519 continue 2520 break 2521 2522 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2523 this = self._implicit_unnests_to_explicit(this) 2524 2525 return this 2526 2527 def _parse_hint(self) -> t.Optional[exp.Hint]: 2528 if self._match(TokenType.HINT): 2529 hints = [] 2530 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2531 hints.extend(hint) 2532 2533 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2534 self.raise_error("Expected */ after HINT") 2535 2536 return self.expression(exp.Hint, expressions=hints) 2537 2538 return None 2539 2540 def _parse_into(self) -> t.Optional[exp.Into]: 2541 if not self._match(TokenType.INTO): 2542 return None 2543 2544 temp = self._match(TokenType.TEMPORARY) 2545 unlogged = self._match_text_seq("UNLOGGED") 2546 self._match(TokenType.TABLE) 2547 2548 return self.expression( 2549 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2550 ) 2551 2552 def _parse_from( 2553 self, joins: bool = False, skip_from_token: bool = False 2554 ) -> t.Optional[exp.From]: 2555 if not skip_from_token and not self._match(TokenType.FROM): 2556 return None 2557 2558 return self.expression( 2559 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2560 ) 2561 2562 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2563 if not self._match(TokenType.MATCH_RECOGNIZE): 2564 return None 2565 2566 self._match_l_paren() 2567 2568 partition = self._parse_partition_by() 2569 order = self._parse_order() 2570 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2571 2572 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2573 rows = exp.var("ONE ROW PER MATCH") 2574 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2575 text = "ALL ROWS PER MATCH" 2576 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2577 text += " SHOW EMPTY MATCHES" 2578 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2579 text += " OMIT EMPTY MATCHES" 2580 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2581 text += " WITH UNMATCHED ROWS" 2582 rows = exp.var(text) 2583 else: 2584 rows = None 2585 2586 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2587 text = "AFTER MATCH SKIP" 2588 if self._match_text_seq("PAST", "LAST", "ROW"): 2589 text += " PAST LAST ROW" 2590 elif self._match_text_seq("TO", "NEXT", "ROW"): 2591 text += " TO NEXT ROW" 2592 elif self._match_text_seq("TO", "FIRST"): 2593 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2594 elif self._match_text_seq("TO", "LAST"): 2595 text += f" TO LAST {self._advance_any().text}" # type: ignore 2596 after = exp.var(text) 2597 else: 2598 after = None 2599 2600 if self._match_text_seq("PATTERN"): 2601 self._match_l_paren() 2602 2603 if not self._curr: 2604 self.raise_error("Expecting )", self._curr) 2605 2606 paren = 1 2607 start = self._curr 2608 2609 while self._curr and paren > 0: 2610 if self._curr.token_type == TokenType.L_PAREN: 2611 paren += 1 2612 if self._curr.token_type == TokenType.R_PAREN: 2613 paren -= 1 2614 2615 end = self._prev 2616 self._advance() 2617 2618 if paren > 0: 2619 self.raise_error("Expecting )", self._curr) 2620 2621 pattern = exp.var(self._find_sql(start, end)) 2622 else: 2623 pattern = None 2624 2625 define = ( 2626 self._parse_csv(self._parse_name_as_expression) 2627 if self._match_text_seq("DEFINE") 2628 else None 2629 ) 2630 2631 self._match_r_paren() 2632 2633 return self.expression( 2634 exp.MatchRecognize, 2635 partition_by=partition, 2636 order=order, 2637 measures=measures, 2638 rows=rows, 2639 after=after, 2640 pattern=pattern, 2641 define=define, 2642 alias=self._parse_table_alias(), 2643 ) 2644 2645 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2646 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2647 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2648 cross_apply = False 2649 2650 if cross_apply is not None: 2651 this = self._parse_select(table=True) 2652 view = None 2653 outer = None 2654 elif self._match(TokenType.LATERAL): 2655 this = self._parse_select(table=True) 2656 view = self._match(TokenType.VIEW) 2657 outer = self._match(TokenType.OUTER) 2658 else: 2659 return None 2660 2661 if not this: 2662 this = ( 2663 self._parse_unnest() 2664 or self._parse_function() 2665 or self._parse_id_var(any_token=False) 2666 ) 2667 2668 while self._match(TokenType.DOT): 2669 this = exp.Dot( 2670 this=this, 2671 expression=self._parse_function() or self._parse_id_var(any_token=False), 2672 ) 2673 2674 if view: 2675 table = self._parse_id_var(any_token=False) 2676 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2677 table_alias: t.Optional[exp.TableAlias] = self.expression( 2678 exp.TableAlias, this=table, columns=columns 2679 ) 2680 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2681 # We move the alias from the lateral's child node to the lateral itself 2682 table_alias = this.args["alias"].pop() 2683 else: 2684 table_alias = self._parse_table_alias() 2685 2686 return self.expression( 2687 exp.Lateral, 2688 this=this, 2689 view=view, 2690 outer=outer, 2691 alias=table_alias, 2692 cross_apply=cross_apply, 2693 ) 2694 2695 def _parse_join_parts( 2696 self, 2697 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2698 return ( 2699 self._match_set(self.JOIN_METHODS) and self._prev, 2700 self._match_set(self.JOIN_SIDES) and self._prev, 2701 self._match_set(self.JOIN_KINDS) and self._prev, 2702 ) 2703 2704 def _parse_join( 2705 self, skip_join_token: bool = False, parse_bracket: bool = False 2706 ) -> t.Optional[exp.Join]: 2707 if self._match(TokenType.COMMA): 2708 return self.expression(exp.Join, this=self._parse_table()) 2709 2710 index = self._index 2711 method, side, kind = self._parse_join_parts() 2712 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2713 join = self._match(TokenType.JOIN) 2714 2715 if not skip_join_token and not join: 2716 self._retreat(index) 2717 kind = None 2718 method = None 2719 side = None 2720 2721 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2722 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2723 2724 if not skip_join_token and not join and not outer_apply and not cross_apply: 2725 return None 2726 2727 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2728 2729 if method: 2730 kwargs["method"] = method.text 2731 if side: 2732 kwargs["side"] = side.text 2733 if kind: 2734 kwargs["kind"] = kind.text 2735 if hint: 2736 kwargs["hint"] = hint 2737 2738 if self._match(TokenType.ON): 2739 kwargs["on"] = self._parse_conjunction() 2740 elif self._match(TokenType.USING): 2741 kwargs["using"] = self._parse_wrapped_id_vars() 2742 elif not (kind and kind.token_type == TokenType.CROSS): 2743 index = self._index 2744 join = self._parse_join() 2745 2746 if join and self._match(TokenType.ON): 2747 kwargs["on"] = self._parse_conjunction() 2748 elif join and self._match(TokenType.USING): 2749 kwargs["using"] = self._parse_wrapped_id_vars() 2750 else: 2751 join = None 2752 self._retreat(index) 2753 2754 kwargs["this"].set("joins", [join] if join else None) 2755 2756 comments = [c for token in (method, side, kind) if token for c in token.comments] 2757 return self.expression(exp.Join, comments=comments, **kwargs) 2758 2759 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2760 this = self._parse_conjunction() 2761 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2762 return this 2763 2764 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2765 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2766 2767 return this 2768 2769 def _parse_index( 2770 self, 2771 index: t.Optional[exp.Expression] = None, 2772 ) -> t.Optional[exp.Index]: 2773 if index: 2774 unique = None 2775 primary = None 2776 amp = None 2777 2778 self._match(TokenType.ON) 2779 self._match(TokenType.TABLE) # hive 2780 table = self._parse_table_parts(schema=True) 2781 else: 2782 unique = self._match(TokenType.UNIQUE) 2783 primary = self._match_text_seq("PRIMARY") 2784 amp = self._match_text_seq("AMP") 2785 2786 if not self._match(TokenType.INDEX): 2787 return None 2788 2789 index = self._parse_id_var() 2790 table = None 2791 2792 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2793 2794 if self._match(TokenType.L_PAREN, advance=False): 2795 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2796 else: 2797 columns = None 2798 2799 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2800 2801 return self.expression( 2802 exp.Index, 2803 this=index, 2804 table=table, 2805 using=using, 2806 columns=columns, 2807 unique=unique, 2808 primary=primary, 2809 amp=amp, 2810 include=include, 2811 partition_by=self._parse_partition_by(), 2812 where=self._parse_where(), 2813 ) 2814 2815 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2816 hints: t.List[exp.Expression] = [] 2817 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2818 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2819 hints.append( 2820 self.expression( 2821 exp.WithTableHint, 2822 expressions=self._parse_csv( 2823 lambda: self._parse_function() or self._parse_var(any_token=True) 2824 ), 2825 ) 2826 ) 2827 self._match_r_paren() 2828 else: 2829 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2830 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2831 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2832 2833 self._match_texts(("INDEX", "KEY")) 2834 if self._match(TokenType.FOR): 2835 hint.set("target", self._advance_any() and self._prev.text.upper()) 2836 2837 hint.set("expressions", self._parse_wrapped_id_vars()) 2838 hints.append(hint) 2839 2840 return hints or None 2841 2842 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2843 return ( 2844 (not schema and self._parse_function(optional_parens=False)) 2845 or self._parse_id_var(any_token=False) 2846 or self._parse_string_as_identifier() 2847 or self._parse_placeholder() 2848 ) 2849 2850 def _parse_table_parts( 2851 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2852 ) -> exp.Table: 2853 catalog = None 2854 db = None 2855 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2856 2857 while self._match(TokenType.DOT): 2858 if catalog: 2859 # This allows nesting the table in arbitrarily many dot expressions if needed 2860 table = self.expression( 2861 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2862 ) 2863 else: 2864 catalog = db 2865 db = table 2866 # "" used for tsql FROM a..b case 2867 table = self._parse_table_part(schema=schema) or "" 2868 2869 if ( 2870 wildcard 2871 and self._is_connected() 2872 and (isinstance(table, exp.Identifier) or not table) 2873 and self._match(TokenType.STAR) 2874 ): 2875 if isinstance(table, exp.Identifier): 2876 table.args["this"] += "*" 2877 else: 2878 table = exp.Identifier(this="*") 2879 2880 if is_db_reference: 2881 catalog = db 2882 db = table 2883 table = None 2884 2885 if not table and not is_db_reference: 2886 self.raise_error(f"Expected table name but got {self._curr}") 2887 if not db and is_db_reference: 2888 self.raise_error(f"Expected database name but got {self._curr}") 2889 2890 return self.expression( 2891 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2892 ) 2893 2894 def _parse_table( 2895 self, 2896 schema: bool = False, 2897 joins: bool = False, 2898 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2899 parse_bracket: bool = False, 2900 is_db_reference: bool = False, 2901 ) -> t.Optional[exp.Expression]: 2902 lateral = self._parse_lateral() 2903 if lateral: 2904 return lateral 2905 2906 unnest = self._parse_unnest() 2907 if unnest: 2908 return unnest 2909 2910 values = self._parse_derived_table_values() 2911 if values: 2912 return values 2913 2914 subquery = self._parse_select(table=True) 2915 if subquery: 2916 if not subquery.args.get("pivots"): 2917 subquery.set("pivots", self._parse_pivots()) 2918 return subquery 2919 2920 bracket = parse_bracket and self._parse_bracket(None) 2921 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2922 2923 only = self._match(TokenType.ONLY) 2924 2925 this = t.cast( 2926 exp.Expression, 2927 bracket 2928 or self._parse_bracket( 2929 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2930 ), 2931 ) 2932 2933 if only: 2934 this.set("only", only) 2935 2936 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 2937 self._match_text_seq("*") 2938 2939 if schema: 2940 return self._parse_schema(this=this) 2941 2942 version = self._parse_version() 2943 2944 if version: 2945 this.set("version", version) 2946 2947 if self.dialect.ALIAS_POST_TABLESAMPLE: 2948 table_sample = self._parse_table_sample() 2949 2950 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2951 if alias: 2952 this.set("alias", alias) 2953 2954 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2955 return self.expression( 2956 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2957 ) 2958 2959 this.set("hints", self._parse_table_hints()) 2960 2961 if not this.args.get("pivots"): 2962 this.set("pivots", self._parse_pivots()) 2963 2964 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2965 table_sample = self._parse_table_sample() 2966 2967 if table_sample: 2968 table_sample.set("this", this) 2969 this = table_sample 2970 2971 if joins: 2972 for join in iter(self._parse_join, None): 2973 this.append("joins", join) 2974 2975 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2976 this.set("ordinality", True) 2977 this.set("alias", self._parse_table_alias()) 2978 2979 return this 2980 2981 def _parse_version(self) -> t.Optional[exp.Version]: 2982 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2983 this = "TIMESTAMP" 2984 elif self._match(TokenType.VERSION_SNAPSHOT): 2985 this = "VERSION" 2986 else: 2987 return None 2988 2989 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2990 kind = self._prev.text.upper() 2991 start = self._parse_bitwise() 2992 self._match_texts(("TO", "AND")) 2993 end = self._parse_bitwise() 2994 expression: t.Optional[exp.Expression] = self.expression( 2995 exp.Tuple, expressions=[start, end] 2996 ) 2997 elif self._match_text_seq("CONTAINED", "IN"): 2998 kind = "CONTAINED IN" 2999 expression = self.expression( 3000 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3001 ) 3002 elif self._match(TokenType.ALL): 3003 kind = "ALL" 3004 expression = None 3005 else: 3006 self._match_text_seq("AS", "OF") 3007 kind = "AS OF" 3008 expression = self._parse_type() 3009 3010 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3011 3012 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3013 if not self._match(TokenType.UNNEST): 3014 return None 3015 3016 expressions = self._parse_wrapped_csv(self._parse_equality) 3017 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3018 3019 alias = self._parse_table_alias() if with_alias else None 3020 3021 if alias: 3022 if self.dialect.UNNEST_COLUMN_ONLY: 3023 if alias.args.get("columns"): 3024 self.raise_error("Unexpected extra column alias in unnest.") 3025 3026 alias.set("columns", [alias.this]) 3027 alias.set("this", None) 3028 3029 columns = alias.args.get("columns") or [] 3030 if offset and len(expressions) < len(columns): 3031 offset = columns.pop() 3032 3033 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3034 self._match(TokenType.ALIAS) 3035 offset = self._parse_id_var( 3036 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3037 ) or exp.to_identifier("offset") 3038 3039 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3040 3041 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3042 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3043 if not is_derived and not self._match_text_seq("VALUES"): 3044 return None 3045 3046 expressions = self._parse_csv(self._parse_value) 3047 alias = self._parse_table_alias() 3048 3049 if is_derived: 3050 self._match_r_paren() 3051 3052 return self.expression( 3053 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3054 ) 3055 3056 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3057 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3058 as_modifier and self._match_text_seq("USING", "SAMPLE") 3059 ): 3060 return None 3061 3062 bucket_numerator = None 3063 bucket_denominator = None 3064 bucket_field = None 3065 percent = None 3066 size = None 3067 seed = None 3068 3069 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3070 matched_l_paren = self._match(TokenType.L_PAREN) 3071 3072 if self.TABLESAMPLE_CSV: 3073 num = None 3074 expressions = self._parse_csv(self._parse_primary) 3075 else: 3076 expressions = None 3077 num = ( 3078 self._parse_factor() 3079 if self._match(TokenType.NUMBER, advance=False) 3080 else self._parse_primary() or self._parse_placeholder() 3081 ) 3082 3083 if self._match_text_seq("BUCKET"): 3084 bucket_numerator = self._parse_number() 3085 self._match_text_seq("OUT", "OF") 3086 bucket_denominator = bucket_denominator = self._parse_number() 3087 self._match(TokenType.ON) 3088 bucket_field = self._parse_field() 3089 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3090 percent = num 3091 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3092 size = num 3093 else: 3094 percent = num 3095 3096 if matched_l_paren: 3097 self._match_r_paren() 3098 3099 if self._match(TokenType.L_PAREN): 3100 method = self._parse_var(upper=True) 3101 seed = self._match(TokenType.COMMA) and self._parse_number() 3102 self._match_r_paren() 3103 elif self._match_texts(("SEED", "REPEATABLE")): 3104 seed = self._parse_wrapped(self._parse_number) 3105 3106 return self.expression( 3107 exp.TableSample, 3108 expressions=expressions, 3109 method=method, 3110 bucket_numerator=bucket_numerator, 3111 bucket_denominator=bucket_denominator, 3112 bucket_field=bucket_field, 3113 percent=percent, 3114 size=size, 3115 seed=seed, 3116 ) 3117 3118 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3119 return list(iter(self._parse_pivot, None)) or None 3120 3121 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3122 return list(iter(self._parse_join, None)) or None 3123 3124 # https://duckdb.org/docs/sql/statements/pivot 3125 def _parse_simplified_pivot(self) -> exp.Pivot: 3126 def _parse_on() -> t.Optional[exp.Expression]: 3127 this = self._parse_bitwise() 3128 return self._parse_in(this) if self._match(TokenType.IN) else this 3129 3130 this = self._parse_table() 3131 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3132 using = self._match(TokenType.USING) and self._parse_csv( 3133 lambda: self._parse_alias(self._parse_function()) 3134 ) 3135 group = self._parse_group() 3136 return self.expression( 3137 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3138 ) 3139 3140 def _parse_pivot_in(self) -> exp.In: 3141 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3142 this = self._parse_conjunction() 3143 3144 self._match(TokenType.ALIAS) 3145 alias = self._parse_field() 3146 if alias: 3147 return self.expression(exp.PivotAlias, this=this, alias=alias) 3148 3149 return this 3150 3151 value = self._parse_column() 3152 3153 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3154 self.raise_error("Expecting IN (") 3155 3156 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3157 3158 self._match_r_paren() 3159 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3160 3161 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3162 index = self._index 3163 include_nulls = None 3164 3165 if self._match(TokenType.PIVOT): 3166 unpivot = False 3167 elif self._match(TokenType.UNPIVOT): 3168 unpivot = True 3169 3170 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3171 if self._match_text_seq("INCLUDE", "NULLS"): 3172 include_nulls = True 3173 elif self._match_text_seq("EXCLUDE", "NULLS"): 3174 include_nulls = False 3175 else: 3176 return None 3177 3178 expressions = [] 3179 3180 if not self._match(TokenType.L_PAREN): 3181 self._retreat(index) 3182 return None 3183 3184 if unpivot: 3185 expressions = self._parse_csv(self._parse_column) 3186 else: 3187 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3188 3189 if not expressions: 3190 self.raise_error("Failed to parse PIVOT's aggregation list") 3191 3192 if not self._match(TokenType.FOR): 3193 self.raise_error("Expecting FOR") 3194 3195 field = self._parse_pivot_in() 3196 3197 self._match_r_paren() 3198 3199 pivot = self.expression( 3200 exp.Pivot, 3201 expressions=expressions, 3202 field=field, 3203 unpivot=unpivot, 3204 include_nulls=include_nulls, 3205 ) 3206 3207 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3208 pivot.set("alias", self._parse_table_alias()) 3209 3210 if not unpivot: 3211 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3212 3213 columns: t.List[exp.Expression] = [] 3214 for fld in pivot.args["field"].expressions: 3215 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3216 for name in names: 3217 if self.PREFIXED_PIVOT_COLUMNS: 3218 name = f"{name}_{field_name}" if name else field_name 3219 else: 3220 name = f"{field_name}_{name}" if name else field_name 3221 3222 columns.append(exp.to_identifier(name)) 3223 3224 pivot.set("columns", columns) 3225 3226 return pivot 3227 3228 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3229 return [agg.alias for agg in aggregations] 3230 3231 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3232 if not skip_where_token and not self._match(TokenType.PREWHERE): 3233 return None 3234 3235 return self.expression( 3236 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3237 ) 3238 3239 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3240 if not skip_where_token and not self._match(TokenType.WHERE): 3241 return None 3242 3243 return self.expression( 3244 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3245 ) 3246 3247 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3248 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3249 return None 3250 3251 elements = defaultdict(list) 3252 3253 if self._match(TokenType.ALL): 3254 return self.expression(exp.Group, all=True) 3255 3256 while True: 3257 expressions = self._parse_csv(self._parse_conjunction) 3258 if expressions: 3259 elements["expressions"].extend(expressions) 3260 3261 grouping_sets = self._parse_grouping_sets() 3262 if grouping_sets: 3263 elements["grouping_sets"].extend(grouping_sets) 3264 3265 rollup = None 3266 cube = None 3267 totals = None 3268 3269 index = self._index 3270 with_ = self._match(TokenType.WITH) 3271 if self._match(TokenType.ROLLUP): 3272 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3273 elements["rollup"].extend(ensure_list(rollup)) 3274 3275 if self._match(TokenType.CUBE): 3276 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3277 elements["cube"].extend(ensure_list(cube)) 3278 3279 if self._match_text_seq("TOTALS"): 3280 totals = True 3281 elements["totals"] = True # type: ignore 3282 3283 if not (grouping_sets or rollup or cube or totals): 3284 if with_: 3285 self._retreat(index) 3286 break 3287 3288 return self.expression(exp.Group, **elements) # type: ignore 3289 3290 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3291 if not self._match(TokenType.GROUPING_SETS): 3292 return None 3293 3294 return self._parse_wrapped_csv(self._parse_grouping_set) 3295 3296 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3297 if self._match(TokenType.L_PAREN): 3298 grouping_set = self._parse_csv(self._parse_column) 3299 self._match_r_paren() 3300 return self.expression(exp.Tuple, expressions=grouping_set) 3301 3302 return self._parse_column() 3303 3304 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3305 if not skip_having_token and not self._match(TokenType.HAVING): 3306 return None 3307 return self.expression(exp.Having, this=self._parse_conjunction()) 3308 3309 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3310 if not self._match(TokenType.QUALIFY): 3311 return None 3312 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3313 3314 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3315 if skip_start_token: 3316 start = None 3317 elif self._match(TokenType.START_WITH): 3318 start = self._parse_conjunction() 3319 else: 3320 return None 3321 3322 self._match(TokenType.CONNECT_BY) 3323 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3324 exp.Prior, this=self._parse_bitwise() 3325 ) 3326 connect = self._parse_conjunction() 3327 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3328 3329 if not start and self._match(TokenType.START_WITH): 3330 start = self._parse_conjunction() 3331 3332 return self.expression(exp.Connect, start=start, connect=connect) 3333 3334 def _parse_name_as_expression(self) -> exp.Alias: 3335 return self.expression( 3336 exp.Alias, 3337 alias=self._parse_id_var(any_token=True), 3338 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3339 ) 3340 3341 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3342 if self._match_text_seq("INTERPOLATE"): 3343 return self._parse_wrapped_csv(self._parse_name_as_expression) 3344 return None 3345 3346 def _parse_order( 3347 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3348 ) -> t.Optional[exp.Expression]: 3349 siblings = None 3350 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3351 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3352 return this 3353 3354 siblings = True 3355 3356 return self.expression( 3357 exp.Order, 3358 this=this, 3359 expressions=self._parse_csv(self._parse_ordered), 3360 interpolate=self._parse_interpolate(), 3361 siblings=siblings, 3362 ) 3363 3364 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3365 if not self._match(token): 3366 return None 3367 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3368 3369 def _parse_ordered( 3370 self, parse_method: t.Optional[t.Callable] = None 3371 ) -> t.Optional[exp.Ordered]: 3372 this = parse_method() if parse_method else self._parse_conjunction() 3373 if not this: 3374 return None 3375 3376 asc = self._match(TokenType.ASC) 3377 desc = self._match(TokenType.DESC) or (asc and False) 3378 3379 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3380 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3381 3382 nulls_first = is_nulls_first or False 3383 explicitly_null_ordered = is_nulls_first or is_nulls_last 3384 3385 if ( 3386 not explicitly_null_ordered 3387 and ( 3388 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3389 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3390 ) 3391 and self.dialect.NULL_ORDERING != "nulls_are_last" 3392 ): 3393 nulls_first = True 3394 3395 if self._match_text_seq("WITH", "FILL"): 3396 with_fill = self.expression( 3397 exp.WithFill, 3398 **{ # type: ignore 3399 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3400 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3401 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3402 }, 3403 ) 3404 else: 3405 with_fill = None 3406 3407 return self.expression( 3408 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3409 ) 3410 3411 def _parse_limit( 3412 self, this: t.Optional[exp.Expression] = None, top: bool = False 3413 ) -> t.Optional[exp.Expression]: 3414 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3415 comments = self._prev_comments 3416 if top: 3417 limit_paren = self._match(TokenType.L_PAREN) 3418 expression = self._parse_term() if limit_paren else self._parse_number() 3419 3420 if limit_paren: 3421 self._match_r_paren() 3422 else: 3423 expression = self._parse_term() 3424 3425 if self._match(TokenType.COMMA): 3426 offset = expression 3427 expression = self._parse_term() 3428 else: 3429 offset = None 3430 3431 limit_exp = self.expression( 3432 exp.Limit, 3433 this=this, 3434 expression=expression, 3435 offset=offset, 3436 comments=comments, 3437 expressions=self._parse_limit_by(), 3438 ) 3439 3440 return limit_exp 3441 3442 if self._match(TokenType.FETCH): 3443 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3444 direction = self._prev.text.upper() if direction else "FIRST" 3445 3446 count = self._parse_field(tokens=self.FETCH_TOKENS) 3447 percent = self._match(TokenType.PERCENT) 3448 3449 self._match_set((TokenType.ROW, TokenType.ROWS)) 3450 3451 only = self._match_text_seq("ONLY") 3452 with_ties = self._match_text_seq("WITH", "TIES") 3453 3454 if only and with_ties: 3455 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3456 3457 return self.expression( 3458 exp.Fetch, 3459 direction=direction, 3460 count=count, 3461 percent=percent, 3462 with_ties=with_ties, 3463 ) 3464 3465 return this 3466 3467 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3468 if not self._match(TokenType.OFFSET): 3469 return this 3470 3471 count = self._parse_term() 3472 self._match_set((TokenType.ROW, TokenType.ROWS)) 3473 3474 return self.expression( 3475 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3476 ) 3477 3478 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3479 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3480 3481 def _parse_locks(self) -> t.List[exp.Lock]: 3482 locks = [] 3483 while True: 3484 if self._match_text_seq("FOR", "UPDATE"): 3485 update = True 3486 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3487 "LOCK", "IN", "SHARE", "MODE" 3488 ): 3489 update = False 3490 else: 3491 break 3492 3493 expressions = None 3494 if self._match_text_seq("OF"): 3495 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3496 3497 wait: t.Optional[bool | exp.Expression] = None 3498 if self._match_text_seq("NOWAIT"): 3499 wait = True 3500 elif self._match_text_seq("WAIT"): 3501 wait = self._parse_primary() 3502 elif self._match_text_seq("SKIP", "LOCKED"): 3503 wait = False 3504 3505 locks.append( 3506 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3507 ) 3508 3509 return locks 3510 3511 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3512 while this and self._match_set(self.SET_OPERATIONS): 3513 token_type = self._prev.token_type 3514 3515 if token_type == TokenType.UNION: 3516 operation = exp.Union 3517 elif token_type == TokenType.EXCEPT: 3518 operation = exp.Except 3519 else: 3520 operation = exp.Intersect 3521 3522 comments = self._prev.comments 3523 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3524 by_name = self._match_text_seq("BY", "NAME") 3525 expression = self._parse_select(nested=True, parse_set_operation=False) 3526 3527 this = self.expression( 3528 operation, 3529 comments=comments, 3530 this=this, 3531 distinct=distinct, 3532 by_name=by_name, 3533 expression=expression, 3534 ) 3535 3536 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3537 expression = this.expression 3538 3539 if expression: 3540 for arg in self.UNION_MODIFIERS: 3541 expr = expression.args.get(arg) 3542 if expr: 3543 this.set(arg, expr.pop()) 3544 3545 return this 3546 3547 def _parse_expression(self) -> t.Optional[exp.Expression]: 3548 return self._parse_alias(self._parse_conjunction()) 3549 3550 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3551 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3552 3553 def _parse_equality(self) -> t.Optional[exp.Expression]: 3554 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3555 3556 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3557 return self._parse_tokens(self._parse_range, self.COMPARISON) 3558 3559 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3560 this = this or self._parse_bitwise() 3561 negate = self._match(TokenType.NOT) 3562 3563 if self._match_set(self.RANGE_PARSERS): 3564 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3565 if not expression: 3566 return this 3567 3568 this = expression 3569 elif self._match(TokenType.ISNULL): 3570 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3571 3572 # Postgres supports ISNULL and NOTNULL for conditions. 3573 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3574 if self._match(TokenType.NOTNULL): 3575 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3576 this = self.expression(exp.Not, this=this) 3577 3578 if negate: 3579 this = self.expression(exp.Not, this=this) 3580 3581 if self._match(TokenType.IS): 3582 this = self._parse_is(this) 3583 3584 return this 3585 3586 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3587 index = self._index - 1 3588 negate = self._match(TokenType.NOT) 3589 3590 if self._match_text_seq("DISTINCT", "FROM"): 3591 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3592 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3593 3594 expression = self._parse_null() or self._parse_boolean() 3595 if not expression: 3596 self._retreat(index) 3597 return None 3598 3599 this = self.expression(exp.Is, this=this, expression=expression) 3600 return self.expression(exp.Not, this=this) if negate else this 3601 3602 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3603 unnest = self._parse_unnest(with_alias=False) 3604 if unnest: 3605 this = self.expression(exp.In, this=this, unnest=unnest) 3606 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3607 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3608 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3609 3610 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3611 this = self.expression(exp.In, this=this, query=expressions[0]) 3612 else: 3613 this = self.expression(exp.In, this=this, expressions=expressions) 3614 3615 if matched_l_paren: 3616 self._match_r_paren(this) 3617 elif not self._match(TokenType.R_BRACKET, expression=this): 3618 self.raise_error("Expecting ]") 3619 else: 3620 this = self.expression(exp.In, this=this, field=self._parse_field()) 3621 3622 return this 3623 3624 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3625 low = self._parse_bitwise() 3626 self._match(TokenType.AND) 3627 high = self._parse_bitwise() 3628 return self.expression(exp.Between, this=this, low=low, high=high) 3629 3630 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3631 if not self._match(TokenType.ESCAPE): 3632 return this 3633 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3634 3635 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3636 index = self._index 3637 3638 if not self._match(TokenType.INTERVAL) and match_interval: 3639 return None 3640 3641 if self._match(TokenType.STRING, advance=False): 3642 this = self._parse_primary() 3643 else: 3644 this = self._parse_term() 3645 3646 if not this or ( 3647 isinstance(this, exp.Column) 3648 and not this.table 3649 and not this.this.quoted 3650 and this.name.upper() == "IS" 3651 ): 3652 self._retreat(index) 3653 return None 3654 3655 unit = self._parse_function() or ( 3656 not self._match(TokenType.ALIAS, advance=False) 3657 and self._parse_var(any_token=True, upper=True) 3658 ) 3659 3660 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3661 # each INTERVAL expression into this canonical form so it's easy to transpile 3662 if this and this.is_number: 3663 this = exp.Literal.string(this.name) 3664 elif this and this.is_string: 3665 parts = this.name.split() 3666 3667 if len(parts) == 2: 3668 if unit: 3669 # This is not actually a unit, it's something else (e.g. a "window side") 3670 unit = None 3671 self._retreat(self._index - 1) 3672 3673 this = exp.Literal.string(parts[0]) 3674 unit = self.expression(exp.Var, this=parts[1].upper()) 3675 3676 return self.expression(exp.Interval, this=this, unit=unit) 3677 3678 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3679 this = self._parse_term() 3680 3681 while True: 3682 if self._match_set(self.BITWISE): 3683 this = self.expression( 3684 self.BITWISE[self._prev.token_type], 3685 this=this, 3686 expression=self._parse_term(), 3687 ) 3688 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3689 this = self.expression( 3690 exp.DPipe, 3691 this=this, 3692 expression=self._parse_term(), 3693 safe=not self.dialect.STRICT_STRING_CONCAT, 3694 ) 3695 elif self._match(TokenType.DQMARK): 3696 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3697 elif self._match_pair(TokenType.LT, TokenType.LT): 3698 this = self.expression( 3699 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3700 ) 3701 elif self._match_pair(TokenType.GT, TokenType.GT): 3702 this = self.expression( 3703 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3704 ) 3705 else: 3706 break 3707 3708 return this 3709 3710 def _parse_term(self) -> t.Optional[exp.Expression]: 3711 return self._parse_tokens(self._parse_factor, self.TERM) 3712 3713 def _parse_factor(self) -> t.Optional[exp.Expression]: 3714 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3715 this = parse_method() 3716 3717 while self._match_set(self.FACTOR): 3718 this = self.expression( 3719 self.FACTOR[self._prev.token_type], 3720 this=this, 3721 comments=self._prev_comments, 3722 expression=parse_method(), 3723 ) 3724 if isinstance(this, exp.Div): 3725 this.args["typed"] = self.dialect.TYPED_DIVISION 3726 this.args["safe"] = self.dialect.SAFE_DIVISION 3727 3728 return this 3729 3730 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3731 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3732 3733 def _parse_unary(self) -> t.Optional[exp.Expression]: 3734 if self._match_set(self.UNARY_PARSERS): 3735 return self.UNARY_PARSERS[self._prev.token_type](self) 3736 return self._parse_at_time_zone(self._parse_type()) 3737 3738 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3739 interval = parse_interval and self._parse_interval() 3740 if interval: 3741 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3742 while True: 3743 index = self._index 3744 self._match(TokenType.PLUS) 3745 3746 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3747 self._retreat(index) 3748 break 3749 3750 interval = self.expression( # type: ignore 3751 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3752 ) 3753 3754 return interval 3755 3756 index = self._index 3757 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3758 this = self._parse_column() 3759 3760 if data_type: 3761 if isinstance(this, exp.Literal): 3762 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3763 if parser: 3764 return parser(self, this, data_type) 3765 return self.expression(exp.Cast, this=this, to=data_type) 3766 if not data_type.expressions: 3767 self._retreat(index) 3768 return self._parse_column() 3769 return self._parse_column_ops(data_type) 3770 3771 return this and self._parse_column_ops(this) 3772 3773 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3774 this = self._parse_type() 3775 if not this: 3776 return None 3777 3778 return self.expression( 3779 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3780 ) 3781 3782 def _parse_types( 3783 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3784 ) -> t.Optional[exp.Expression]: 3785 index = self._index 3786 3787 prefix = self._match_text_seq("SYSUDTLIB", ".") 3788 3789 if not self._match_set(self.TYPE_TOKENS): 3790 identifier = allow_identifiers and self._parse_id_var( 3791 any_token=False, tokens=(TokenType.VAR,) 3792 ) 3793 if identifier: 3794 tokens = self.dialect.tokenize(identifier.name) 3795 3796 if len(tokens) != 1: 3797 self.raise_error("Unexpected identifier", self._prev) 3798 3799 if tokens[0].token_type in self.TYPE_TOKENS: 3800 self._prev = tokens[0] 3801 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3802 type_name = identifier.name 3803 3804 while self._match(TokenType.DOT): 3805 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3806 3807 return exp.DataType.build(type_name, udt=True) 3808 else: 3809 self._retreat(self._index - 1) 3810 return None 3811 else: 3812 return None 3813 3814 type_token = self._prev.token_type 3815 3816 if type_token == TokenType.PSEUDO_TYPE: 3817 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3818 3819 if type_token == TokenType.OBJECT_IDENTIFIER: 3820 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3821 3822 nested = type_token in self.NESTED_TYPE_TOKENS 3823 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3824 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3825 expressions = None 3826 maybe_func = False 3827 3828 if self._match(TokenType.L_PAREN): 3829 if is_struct: 3830 expressions = self._parse_csv(self._parse_struct_types) 3831 elif nested: 3832 expressions = self._parse_csv( 3833 lambda: self._parse_types( 3834 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3835 ) 3836 ) 3837 elif type_token in self.ENUM_TYPE_TOKENS: 3838 expressions = self._parse_csv(self._parse_equality) 3839 elif is_aggregate: 3840 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3841 any_token=False, tokens=(TokenType.VAR,) 3842 ) 3843 if not func_or_ident or not self._match(TokenType.COMMA): 3844 return None 3845 expressions = self._parse_csv( 3846 lambda: self._parse_types( 3847 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3848 ) 3849 ) 3850 expressions.insert(0, func_or_ident) 3851 else: 3852 expressions = self._parse_csv(self._parse_type_size) 3853 3854 if not expressions or not self._match(TokenType.R_PAREN): 3855 self._retreat(index) 3856 return None 3857 3858 maybe_func = True 3859 3860 this: t.Optional[exp.Expression] = None 3861 values: t.Optional[t.List[exp.Expression]] = None 3862 3863 if nested and self._match(TokenType.LT): 3864 if is_struct: 3865 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3866 else: 3867 expressions = self._parse_csv( 3868 lambda: self._parse_types( 3869 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3870 ) 3871 ) 3872 3873 if not self._match(TokenType.GT): 3874 self.raise_error("Expecting >") 3875 3876 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3877 values = self._parse_csv(self._parse_conjunction) 3878 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3879 3880 if type_token in self.TIMESTAMPS: 3881 if self._match_text_seq("WITH", "TIME", "ZONE"): 3882 maybe_func = False 3883 tz_type = ( 3884 exp.DataType.Type.TIMETZ 3885 if type_token in self.TIMES 3886 else exp.DataType.Type.TIMESTAMPTZ 3887 ) 3888 this = exp.DataType(this=tz_type, expressions=expressions) 3889 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3890 maybe_func = False 3891 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3892 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3893 maybe_func = False 3894 elif type_token == TokenType.INTERVAL: 3895 unit = self._parse_var() 3896 3897 if self._match_text_seq("TO"): 3898 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3899 else: 3900 span = None 3901 3902 if span or not unit: 3903 this = self.expression( 3904 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3905 ) 3906 else: 3907 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3908 3909 if maybe_func and check_func: 3910 index2 = self._index 3911 peek = self._parse_string() 3912 3913 if not peek: 3914 self._retreat(index) 3915 return None 3916 3917 self._retreat(index2) 3918 3919 if not this: 3920 if self._match_text_seq("UNSIGNED"): 3921 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3922 if not unsigned_type_token: 3923 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3924 3925 type_token = unsigned_type_token or type_token 3926 3927 this = exp.DataType( 3928 this=exp.DataType.Type[type_token.value], 3929 expressions=expressions, 3930 nested=nested, 3931 values=values, 3932 prefix=prefix, 3933 ) 3934 3935 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3936 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3937 3938 return this 3939 3940 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3941 index = self._index 3942 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3943 self._match(TokenType.COLON) 3944 column_def = self._parse_column_def(this) 3945 3946 if type_required and ( 3947 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3948 ): 3949 self._retreat(index) 3950 return self._parse_types() 3951 3952 return column_def 3953 3954 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3955 if not self._match_text_seq("AT", "TIME", "ZONE"): 3956 return this 3957 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3958 3959 def _parse_column(self) -> t.Optional[exp.Expression]: 3960 this = self._parse_column_reference() 3961 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3962 3963 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3964 this = self._parse_field() 3965 if ( 3966 not this 3967 and self._match(TokenType.VALUES, advance=False) 3968 and self.VALUES_FOLLOWED_BY_PAREN 3969 and (not self._next or self._next.token_type != TokenType.L_PAREN) 3970 ): 3971 this = self._parse_id_var() 3972 3973 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 3974 3975 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3976 this = self._parse_bracket(this) 3977 3978 while self._match_set(self.COLUMN_OPERATORS): 3979 op_token = self._prev.token_type 3980 op = self.COLUMN_OPERATORS.get(op_token) 3981 3982 if op_token == TokenType.DCOLON: 3983 field = self._parse_types() 3984 if not field: 3985 self.raise_error("Expected type") 3986 elif op and self._curr: 3987 field = self._parse_column_reference() 3988 else: 3989 field = self._parse_field(anonymous_func=True, any_token=True) 3990 3991 if isinstance(field, exp.Func): 3992 # bigquery allows function calls like x.y.count(...) 3993 # SAFE.SUBSTR(...) 3994 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3995 this = self._replace_columns_with_dots(this) 3996 3997 if op: 3998 this = op(self, this, field) 3999 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4000 this = self.expression( 4001 exp.Column, 4002 this=field, 4003 table=this.this, 4004 db=this.args.get("table"), 4005 catalog=this.args.get("db"), 4006 ) 4007 else: 4008 this = self.expression(exp.Dot, this=this, expression=field) 4009 this = self._parse_bracket(this) 4010 return this 4011 4012 def _parse_primary(self) -> t.Optional[exp.Expression]: 4013 if self._match_set(self.PRIMARY_PARSERS): 4014 token_type = self._prev.token_type 4015 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4016 4017 if token_type == TokenType.STRING: 4018 expressions = [primary] 4019 while self._match(TokenType.STRING): 4020 expressions.append(exp.Literal.string(self._prev.text)) 4021 4022 if len(expressions) > 1: 4023 return self.expression(exp.Concat, expressions=expressions) 4024 4025 return primary 4026 4027 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4028 return exp.Literal.number(f"0.{self._prev.text}") 4029 4030 if self._match(TokenType.L_PAREN): 4031 comments = self._prev_comments 4032 query = self._parse_select() 4033 4034 if query: 4035 expressions = [query] 4036 else: 4037 expressions = self._parse_expressions() 4038 4039 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4040 4041 if isinstance(this, exp.UNWRAPPED_QUERIES): 4042 this = self._parse_set_operations( 4043 self._parse_subquery(this=this, parse_alias=False) 4044 ) 4045 elif len(expressions) > 1: 4046 this = self.expression(exp.Tuple, expressions=expressions) 4047 else: 4048 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 4049 4050 if this: 4051 this.add_comments(comments) 4052 4053 self._match_r_paren(expression=this) 4054 return this 4055 4056 return None 4057 4058 def _parse_field( 4059 self, 4060 any_token: bool = False, 4061 tokens: t.Optional[t.Collection[TokenType]] = None, 4062 anonymous_func: bool = False, 4063 ) -> t.Optional[exp.Expression]: 4064 return ( 4065 self._parse_primary() 4066 or self._parse_function(anonymous=anonymous_func) 4067 or self._parse_id_var(any_token=any_token, tokens=tokens) 4068 ) 4069 4070 def _parse_function( 4071 self, 4072 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4073 anonymous: bool = False, 4074 optional_parens: bool = True, 4075 ) -> t.Optional[exp.Expression]: 4076 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4077 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4078 fn_syntax = False 4079 if ( 4080 self._match(TokenType.L_BRACE, advance=False) 4081 and self._next 4082 and self._next.text.upper() == "FN" 4083 ): 4084 self._advance(2) 4085 fn_syntax = True 4086 4087 func = self._parse_function_call( 4088 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4089 ) 4090 4091 if fn_syntax: 4092 self._match(TokenType.R_BRACE) 4093 4094 return func 4095 4096 def _parse_function_call( 4097 self, 4098 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4099 anonymous: bool = False, 4100 optional_parens: bool = True, 4101 ) -> t.Optional[exp.Expression]: 4102 if not self._curr: 4103 return None 4104 4105 comments = self._curr.comments 4106 token_type = self._curr.token_type 4107 this = self._curr.text 4108 upper = this.upper() 4109 4110 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4111 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4112 self._advance() 4113 return parser(self) 4114 4115 if not self._next or self._next.token_type != TokenType.L_PAREN: 4116 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4117 self._advance() 4118 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4119 4120 return None 4121 4122 if token_type not in self.FUNC_TOKENS: 4123 return None 4124 4125 self._advance(2) 4126 4127 parser = self.FUNCTION_PARSERS.get(upper) 4128 if parser and not anonymous: 4129 this = parser(self) 4130 else: 4131 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4132 4133 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4134 this = self.expression(subquery_predicate, this=self._parse_select()) 4135 self._match_r_paren() 4136 return this 4137 4138 if functions is None: 4139 functions = self.FUNCTIONS 4140 4141 function = functions.get(upper) 4142 4143 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4144 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4145 4146 if alias: 4147 args = self._kv_to_prop_eq(args) 4148 4149 if function and not anonymous: 4150 if "dialect" in function.__code__.co_varnames: 4151 func = function(args, dialect=self.dialect) 4152 else: 4153 func = function(args) 4154 4155 func = self.validate_expression(func, args) 4156 if not self.dialect.NORMALIZE_FUNCTIONS: 4157 func.meta["name"] = this 4158 4159 this = func 4160 else: 4161 if token_type == TokenType.IDENTIFIER: 4162 this = exp.Identifier(this=this, quoted=True) 4163 this = self.expression(exp.Anonymous, this=this, expressions=args) 4164 4165 if isinstance(this, exp.Expression): 4166 this.add_comments(comments) 4167 4168 self._match_r_paren(this) 4169 return self._parse_window(this) 4170 4171 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4172 transformed = [] 4173 4174 for e in expressions: 4175 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4176 if isinstance(e, exp.Alias): 4177 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4178 4179 if not isinstance(e, exp.PropertyEQ): 4180 e = self.expression( 4181 exp.PropertyEQ, this=exp.to_identifier(e.name), expression=e.expression 4182 ) 4183 4184 if isinstance(e.this, exp.Column): 4185 e.this.replace(e.this.this) 4186 4187 transformed.append(e) 4188 4189 return transformed 4190 4191 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4192 return self._parse_column_def(self._parse_id_var()) 4193 4194 def _parse_user_defined_function( 4195 self, kind: t.Optional[TokenType] = None 4196 ) -> t.Optional[exp.Expression]: 4197 this = self._parse_id_var() 4198 4199 while self._match(TokenType.DOT): 4200 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4201 4202 if not self._match(TokenType.L_PAREN): 4203 return this 4204 4205 expressions = self._parse_csv(self._parse_function_parameter) 4206 self._match_r_paren() 4207 return self.expression( 4208 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4209 ) 4210 4211 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4212 literal = self._parse_primary() 4213 if literal: 4214 return self.expression(exp.Introducer, this=token.text, expression=literal) 4215 4216 return self.expression(exp.Identifier, this=token.text) 4217 4218 def _parse_session_parameter(self) -> exp.SessionParameter: 4219 kind = None 4220 this = self._parse_id_var() or self._parse_primary() 4221 4222 if this and self._match(TokenType.DOT): 4223 kind = this.name 4224 this = self._parse_var() or self._parse_primary() 4225 4226 return self.expression(exp.SessionParameter, this=this, kind=kind) 4227 4228 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4229 index = self._index 4230 4231 if self._match(TokenType.L_PAREN): 4232 expressions = t.cast( 4233 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4234 ) 4235 4236 if not self._match(TokenType.R_PAREN): 4237 self._retreat(index) 4238 else: 4239 expressions = [self._parse_id_var()] 4240 4241 if self._match_set(self.LAMBDAS): 4242 return self.LAMBDAS[self._prev.token_type](self, expressions) 4243 4244 self._retreat(index) 4245 4246 this: t.Optional[exp.Expression] 4247 4248 if self._match(TokenType.DISTINCT): 4249 this = self.expression( 4250 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4251 ) 4252 else: 4253 this = self._parse_select_or_expression(alias=alias) 4254 4255 return self._parse_limit( 4256 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4257 ) 4258 4259 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4260 index = self._index 4261 4262 if not self.errors: 4263 try: 4264 if self._parse_select(nested=True): 4265 return this 4266 except ParseError: 4267 pass 4268 finally: 4269 self.errors.clear() 4270 self._retreat(index) 4271 4272 if not self._match(TokenType.L_PAREN): 4273 return this 4274 4275 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4276 4277 self._match_r_paren() 4278 return self.expression(exp.Schema, this=this, expressions=args) 4279 4280 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4281 return self._parse_column_def(self._parse_field(any_token=True)) 4282 4283 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4284 # column defs are not really columns, they're identifiers 4285 if isinstance(this, exp.Column): 4286 this = this.this 4287 4288 kind = self._parse_types(schema=True) 4289 4290 if self._match_text_seq("FOR", "ORDINALITY"): 4291 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4292 4293 constraints: t.List[exp.Expression] = [] 4294 4295 if not kind and self._match(TokenType.ALIAS): 4296 constraints.append( 4297 self.expression( 4298 exp.ComputedColumnConstraint, 4299 this=self._parse_conjunction(), 4300 persisted=self._match_text_seq("PERSISTED"), 4301 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4302 ) 4303 ) 4304 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4305 self._match(TokenType.ALIAS) 4306 constraints.append( 4307 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4308 ) 4309 4310 while True: 4311 constraint = self._parse_column_constraint() 4312 if not constraint: 4313 break 4314 constraints.append(constraint) 4315 4316 if not kind and not constraints: 4317 return this 4318 4319 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4320 4321 def _parse_auto_increment( 4322 self, 4323 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4324 start = None 4325 increment = None 4326 4327 if self._match(TokenType.L_PAREN, advance=False): 4328 args = self._parse_wrapped_csv(self._parse_bitwise) 4329 start = seq_get(args, 0) 4330 increment = seq_get(args, 1) 4331 elif self._match_text_seq("START"): 4332 start = self._parse_bitwise() 4333 self._match_text_seq("INCREMENT") 4334 increment = self._parse_bitwise() 4335 4336 if start and increment: 4337 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4338 4339 return exp.AutoIncrementColumnConstraint() 4340 4341 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4342 if not self._match_text_seq("REFRESH"): 4343 self._retreat(self._index - 1) 4344 return None 4345 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4346 4347 def _parse_compress(self) -> exp.CompressColumnConstraint: 4348 if self._match(TokenType.L_PAREN, advance=False): 4349 return self.expression( 4350 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4351 ) 4352 4353 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4354 4355 def _parse_generated_as_identity( 4356 self, 4357 ) -> ( 4358 exp.GeneratedAsIdentityColumnConstraint 4359 | exp.ComputedColumnConstraint 4360 | exp.GeneratedAsRowColumnConstraint 4361 ): 4362 if self._match_text_seq("BY", "DEFAULT"): 4363 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4364 this = self.expression( 4365 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4366 ) 4367 else: 4368 self._match_text_seq("ALWAYS") 4369 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4370 4371 self._match(TokenType.ALIAS) 4372 4373 if self._match_text_seq("ROW"): 4374 start = self._match_text_seq("START") 4375 if not start: 4376 self._match(TokenType.END) 4377 hidden = self._match_text_seq("HIDDEN") 4378 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4379 4380 identity = self._match_text_seq("IDENTITY") 4381 4382 if self._match(TokenType.L_PAREN): 4383 if self._match(TokenType.START_WITH): 4384 this.set("start", self._parse_bitwise()) 4385 if self._match_text_seq("INCREMENT", "BY"): 4386 this.set("increment", self._parse_bitwise()) 4387 if self._match_text_seq("MINVALUE"): 4388 this.set("minvalue", self._parse_bitwise()) 4389 if self._match_text_seq("MAXVALUE"): 4390 this.set("maxvalue", self._parse_bitwise()) 4391 4392 if self._match_text_seq("CYCLE"): 4393 this.set("cycle", True) 4394 elif self._match_text_seq("NO", "CYCLE"): 4395 this.set("cycle", False) 4396 4397 if not identity: 4398 this.set("expression", self._parse_bitwise()) 4399 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4400 args = self._parse_csv(self._parse_bitwise) 4401 this.set("start", seq_get(args, 0)) 4402 this.set("increment", seq_get(args, 1)) 4403 4404 self._match_r_paren() 4405 4406 return this 4407 4408 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4409 self._match_text_seq("LENGTH") 4410 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4411 4412 def _parse_not_constraint( 4413 self, 4414 ) -> t.Optional[exp.Expression]: 4415 if self._match_text_seq("NULL"): 4416 return self.expression(exp.NotNullColumnConstraint) 4417 if self._match_text_seq("CASESPECIFIC"): 4418 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4419 if self._match_text_seq("FOR", "REPLICATION"): 4420 return self.expression(exp.NotForReplicationColumnConstraint) 4421 return None 4422 4423 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4424 if self._match(TokenType.CONSTRAINT): 4425 this = self._parse_id_var() 4426 else: 4427 this = None 4428 4429 if self._match_texts(self.CONSTRAINT_PARSERS): 4430 return self.expression( 4431 exp.ColumnConstraint, 4432 this=this, 4433 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4434 ) 4435 4436 return this 4437 4438 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4439 if not self._match(TokenType.CONSTRAINT): 4440 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4441 4442 this = self._parse_id_var() 4443 expressions = [] 4444 4445 while True: 4446 constraint = self._parse_unnamed_constraint() or self._parse_function() 4447 if not constraint: 4448 break 4449 expressions.append(constraint) 4450 4451 return self.expression(exp.Constraint, this=this, expressions=expressions) 4452 4453 def _parse_unnamed_constraint( 4454 self, constraints: t.Optional[t.Collection[str]] = None 4455 ) -> t.Optional[exp.Expression]: 4456 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4457 constraints or self.CONSTRAINT_PARSERS 4458 ): 4459 return None 4460 4461 constraint = self._prev.text.upper() 4462 if constraint not in self.CONSTRAINT_PARSERS: 4463 self.raise_error(f"No parser found for schema constraint {constraint}.") 4464 4465 return self.CONSTRAINT_PARSERS[constraint](self) 4466 4467 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4468 self._match_text_seq("KEY") 4469 return self.expression( 4470 exp.UniqueColumnConstraint, 4471 this=self._parse_schema(self._parse_id_var(any_token=False)), 4472 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4473 ) 4474 4475 def _parse_key_constraint_options(self) -> t.List[str]: 4476 options = [] 4477 while True: 4478 if not self._curr: 4479 break 4480 4481 if self._match(TokenType.ON): 4482 action = None 4483 on = self._advance_any() and self._prev.text 4484 4485 if self._match_text_seq("NO", "ACTION"): 4486 action = "NO ACTION" 4487 elif self._match_text_seq("CASCADE"): 4488 action = "CASCADE" 4489 elif self._match_text_seq("RESTRICT"): 4490 action = "RESTRICT" 4491 elif self._match_pair(TokenType.SET, TokenType.NULL): 4492 action = "SET NULL" 4493 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4494 action = "SET DEFAULT" 4495 else: 4496 self.raise_error("Invalid key constraint") 4497 4498 options.append(f"ON {on} {action}") 4499 elif self._match_text_seq("NOT", "ENFORCED"): 4500 options.append("NOT ENFORCED") 4501 elif self._match_text_seq("DEFERRABLE"): 4502 options.append("DEFERRABLE") 4503 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4504 options.append("INITIALLY DEFERRED") 4505 elif self._match_text_seq("NORELY"): 4506 options.append("NORELY") 4507 elif self._match_text_seq("MATCH", "FULL"): 4508 options.append("MATCH FULL") 4509 else: 4510 break 4511 4512 return options 4513 4514 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4515 if match and not self._match(TokenType.REFERENCES): 4516 return None 4517 4518 expressions = None 4519 this = self._parse_table(schema=True) 4520 options = self._parse_key_constraint_options() 4521 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4522 4523 def _parse_foreign_key(self) -> exp.ForeignKey: 4524 expressions = self._parse_wrapped_id_vars() 4525 reference = self._parse_references() 4526 options = {} 4527 4528 while self._match(TokenType.ON): 4529 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4530 self.raise_error("Expected DELETE or UPDATE") 4531 4532 kind = self._prev.text.lower() 4533 4534 if self._match_text_seq("NO", "ACTION"): 4535 action = "NO ACTION" 4536 elif self._match(TokenType.SET): 4537 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4538 action = "SET " + self._prev.text.upper() 4539 else: 4540 self._advance() 4541 action = self._prev.text.upper() 4542 4543 options[kind] = action 4544 4545 return self.expression( 4546 exp.ForeignKey, 4547 expressions=expressions, 4548 reference=reference, 4549 **options, # type: ignore 4550 ) 4551 4552 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4553 return self._parse_field() 4554 4555 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4556 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4557 self._retreat(self._index - 1) 4558 return None 4559 4560 id_vars = self._parse_wrapped_id_vars() 4561 return self.expression( 4562 exp.PeriodForSystemTimeConstraint, 4563 this=seq_get(id_vars, 0), 4564 expression=seq_get(id_vars, 1), 4565 ) 4566 4567 def _parse_primary_key( 4568 self, wrapped_optional: bool = False, in_props: bool = False 4569 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4570 desc = ( 4571 self._match_set((TokenType.ASC, TokenType.DESC)) 4572 and self._prev.token_type == TokenType.DESC 4573 ) 4574 4575 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4576 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4577 4578 expressions = self._parse_wrapped_csv( 4579 self._parse_primary_key_part, optional=wrapped_optional 4580 ) 4581 options = self._parse_key_constraint_options() 4582 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4583 4584 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4585 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4586 4587 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4588 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4589 return this 4590 4591 bracket_kind = self._prev.token_type 4592 expressions = self._parse_csv( 4593 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4594 ) 4595 4596 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4597 self.raise_error("Expected ]") 4598 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4599 self.raise_error("Expected }") 4600 4601 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4602 if bracket_kind == TokenType.L_BRACE: 4603 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4604 elif not this or this.name.upper() == "ARRAY": 4605 this = self.expression(exp.Array, expressions=expressions) 4606 else: 4607 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4608 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4609 4610 self._add_comments(this) 4611 return self._parse_bracket(this) 4612 4613 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4614 if self._match(TokenType.COLON): 4615 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4616 return this 4617 4618 def _parse_case(self) -> t.Optional[exp.Expression]: 4619 ifs = [] 4620 default = None 4621 4622 comments = self._prev_comments 4623 expression = self._parse_conjunction() 4624 4625 while self._match(TokenType.WHEN): 4626 this = self._parse_conjunction() 4627 self._match(TokenType.THEN) 4628 then = self._parse_conjunction() 4629 ifs.append(self.expression(exp.If, this=this, true=then)) 4630 4631 if self._match(TokenType.ELSE): 4632 default = self._parse_conjunction() 4633 4634 if not self._match(TokenType.END): 4635 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4636 default = exp.column("interval") 4637 else: 4638 self.raise_error("Expected END after CASE", self._prev) 4639 4640 return self._parse_window( 4641 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4642 ) 4643 4644 def _parse_if(self) -> t.Optional[exp.Expression]: 4645 if self._match(TokenType.L_PAREN): 4646 args = self._parse_csv(self._parse_conjunction) 4647 this = self.validate_expression(exp.If.from_arg_list(args), args) 4648 self._match_r_paren() 4649 else: 4650 index = self._index - 1 4651 4652 if self.NO_PAREN_IF_COMMANDS and index == 0: 4653 return self._parse_as_command(self._prev) 4654 4655 condition = self._parse_conjunction() 4656 4657 if not condition: 4658 self._retreat(index) 4659 return None 4660 4661 self._match(TokenType.THEN) 4662 true = self._parse_conjunction() 4663 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4664 self._match(TokenType.END) 4665 this = self.expression(exp.If, this=condition, true=true, false=false) 4666 4667 return self._parse_window(this) 4668 4669 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4670 if not self._match_text_seq("VALUE", "FOR"): 4671 self._retreat(self._index - 1) 4672 return None 4673 4674 return self.expression( 4675 exp.NextValueFor, 4676 this=self._parse_column(), 4677 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4678 ) 4679 4680 def _parse_extract(self) -> exp.Extract: 4681 this = self._parse_function() or self._parse_var() or self._parse_type() 4682 4683 if self._match(TokenType.FROM): 4684 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4685 4686 if not self._match(TokenType.COMMA): 4687 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4688 4689 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4690 4691 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4692 this = self._parse_conjunction() 4693 4694 if not self._match(TokenType.ALIAS): 4695 if self._match(TokenType.COMMA): 4696 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4697 4698 self.raise_error("Expected AS after CAST") 4699 4700 fmt = None 4701 to = self._parse_types() 4702 4703 if self._match(TokenType.FORMAT): 4704 fmt_string = self._parse_string() 4705 fmt = self._parse_at_time_zone(fmt_string) 4706 4707 if not to: 4708 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4709 if to.this in exp.DataType.TEMPORAL_TYPES: 4710 this = self.expression( 4711 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4712 this=this, 4713 format=exp.Literal.string( 4714 format_time( 4715 fmt_string.this if fmt_string else "", 4716 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4717 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4718 ) 4719 ), 4720 ) 4721 4722 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4723 this.set("zone", fmt.args["zone"]) 4724 return this 4725 elif not to: 4726 self.raise_error("Expected TYPE after CAST") 4727 elif isinstance(to, exp.Identifier): 4728 to = exp.DataType.build(to.name, udt=True) 4729 elif to.this == exp.DataType.Type.CHAR: 4730 if self._match(TokenType.CHARACTER_SET): 4731 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4732 4733 return self.expression( 4734 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4735 ) 4736 4737 def _parse_string_agg(self) -> exp.Expression: 4738 if self._match(TokenType.DISTINCT): 4739 args: t.List[t.Optional[exp.Expression]] = [ 4740 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4741 ] 4742 if self._match(TokenType.COMMA): 4743 args.extend(self._parse_csv(self._parse_conjunction)) 4744 else: 4745 args = self._parse_csv(self._parse_conjunction) # type: ignore 4746 4747 index = self._index 4748 if not self._match(TokenType.R_PAREN) and args: 4749 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4750 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4751 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4752 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4753 4754 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4755 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4756 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4757 if not self._match_text_seq("WITHIN", "GROUP"): 4758 self._retreat(index) 4759 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4760 4761 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4762 order = self._parse_order(this=seq_get(args, 0)) 4763 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4764 4765 def _parse_convert( 4766 self, strict: bool, safe: t.Optional[bool] = None 4767 ) -> t.Optional[exp.Expression]: 4768 this = self._parse_bitwise() 4769 4770 if self._match(TokenType.USING): 4771 to: t.Optional[exp.Expression] = self.expression( 4772 exp.CharacterSet, this=self._parse_var() 4773 ) 4774 elif self._match(TokenType.COMMA): 4775 to = self._parse_types() 4776 else: 4777 to = None 4778 4779 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4780 4781 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4782 """ 4783 There are generally two variants of the DECODE function: 4784 4785 - DECODE(bin, charset) 4786 - DECODE(expression, search, result [, search, result] ... [, default]) 4787 4788 The second variant will always be parsed into a CASE expression. Note that NULL 4789 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4790 instead of relying on pattern matching. 4791 """ 4792 args = self._parse_csv(self._parse_conjunction) 4793 4794 if len(args) < 3: 4795 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4796 4797 expression, *expressions = args 4798 if not expression: 4799 return None 4800 4801 ifs = [] 4802 for search, result in zip(expressions[::2], expressions[1::2]): 4803 if not search or not result: 4804 return None 4805 4806 if isinstance(search, exp.Literal): 4807 ifs.append( 4808 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4809 ) 4810 elif isinstance(search, exp.Null): 4811 ifs.append( 4812 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4813 ) 4814 else: 4815 cond = exp.or_( 4816 exp.EQ(this=expression.copy(), expression=search), 4817 exp.and_( 4818 exp.Is(this=expression.copy(), expression=exp.Null()), 4819 exp.Is(this=search.copy(), expression=exp.Null()), 4820 copy=False, 4821 ), 4822 copy=False, 4823 ) 4824 ifs.append(exp.If(this=cond, true=result)) 4825 4826 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4827 4828 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4829 self._match_text_seq("KEY") 4830 key = self._parse_column() 4831 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4832 self._match_text_seq("VALUE") 4833 value = self._parse_bitwise() 4834 4835 if not key and not value: 4836 return None 4837 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4838 4839 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4840 if not this or not self._match_text_seq("FORMAT", "JSON"): 4841 return this 4842 4843 return self.expression(exp.FormatJson, this=this) 4844 4845 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4846 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4847 for value in values: 4848 if self._match_text_seq(value, "ON", on): 4849 return f"{value} ON {on}" 4850 4851 return None 4852 4853 @t.overload 4854 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 4855 4856 @t.overload 4857 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 4858 4859 def _parse_json_object(self, agg=False): 4860 star = self._parse_star() 4861 expressions = ( 4862 [star] 4863 if star 4864 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4865 ) 4866 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4867 4868 unique_keys = None 4869 if self._match_text_seq("WITH", "UNIQUE"): 4870 unique_keys = True 4871 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4872 unique_keys = False 4873 4874 self._match_text_seq("KEYS") 4875 4876 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4877 self._parse_type() 4878 ) 4879 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4880 4881 return self.expression( 4882 exp.JSONObjectAgg if agg else exp.JSONObject, 4883 expressions=expressions, 4884 null_handling=null_handling, 4885 unique_keys=unique_keys, 4886 return_type=return_type, 4887 encoding=encoding, 4888 ) 4889 4890 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4891 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4892 if not self._match_text_seq("NESTED"): 4893 this = self._parse_id_var() 4894 kind = self._parse_types(allow_identifiers=False) 4895 nested = None 4896 else: 4897 this = None 4898 kind = None 4899 nested = True 4900 4901 path = self._match_text_seq("PATH") and self._parse_string() 4902 nested_schema = nested and self._parse_json_schema() 4903 4904 return self.expression( 4905 exp.JSONColumnDef, 4906 this=this, 4907 kind=kind, 4908 path=path, 4909 nested_schema=nested_schema, 4910 ) 4911 4912 def _parse_json_schema(self) -> exp.JSONSchema: 4913 self._match_text_seq("COLUMNS") 4914 return self.expression( 4915 exp.JSONSchema, 4916 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4917 ) 4918 4919 def _parse_json_table(self) -> exp.JSONTable: 4920 this = self._parse_format_json(self._parse_bitwise()) 4921 path = self._match(TokenType.COMMA) and self._parse_string() 4922 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4923 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4924 schema = self._parse_json_schema() 4925 4926 return exp.JSONTable( 4927 this=this, 4928 schema=schema, 4929 path=path, 4930 error_handling=error_handling, 4931 empty_handling=empty_handling, 4932 ) 4933 4934 def _parse_match_against(self) -> exp.MatchAgainst: 4935 expressions = self._parse_csv(self._parse_column) 4936 4937 self._match_text_seq(")", "AGAINST", "(") 4938 4939 this = self._parse_string() 4940 4941 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4942 modifier = "IN NATURAL LANGUAGE MODE" 4943 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4944 modifier = f"{modifier} WITH QUERY EXPANSION" 4945 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4946 modifier = "IN BOOLEAN MODE" 4947 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4948 modifier = "WITH QUERY EXPANSION" 4949 else: 4950 modifier = None 4951 4952 return self.expression( 4953 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4954 ) 4955 4956 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4957 def _parse_open_json(self) -> exp.OpenJSON: 4958 this = self._parse_bitwise() 4959 path = self._match(TokenType.COMMA) and self._parse_string() 4960 4961 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4962 this = self._parse_field(any_token=True) 4963 kind = self._parse_types() 4964 path = self._parse_string() 4965 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4966 4967 return self.expression( 4968 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4969 ) 4970 4971 expressions = None 4972 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4973 self._match_l_paren() 4974 expressions = self._parse_csv(_parse_open_json_column_def) 4975 4976 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4977 4978 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4979 args = self._parse_csv(self._parse_bitwise) 4980 4981 if self._match(TokenType.IN): 4982 return self.expression( 4983 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4984 ) 4985 4986 if haystack_first: 4987 haystack = seq_get(args, 0) 4988 needle = seq_get(args, 1) 4989 else: 4990 needle = seq_get(args, 0) 4991 haystack = seq_get(args, 1) 4992 4993 return self.expression( 4994 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4995 ) 4996 4997 def _parse_predict(self) -> exp.Predict: 4998 self._match_text_seq("MODEL") 4999 this = self._parse_table() 5000 5001 self._match(TokenType.COMMA) 5002 self._match_text_seq("TABLE") 5003 5004 return self.expression( 5005 exp.Predict, 5006 this=this, 5007 expression=self._parse_table(), 5008 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5009 ) 5010 5011 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5012 args = self._parse_csv(self._parse_table) 5013 return exp.JoinHint(this=func_name.upper(), expressions=args) 5014 5015 def _parse_substring(self) -> exp.Substring: 5016 # Postgres supports the form: substring(string [from int] [for int]) 5017 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5018 5019 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5020 5021 if self._match(TokenType.FROM): 5022 args.append(self._parse_bitwise()) 5023 if self._match(TokenType.FOR): 5024 args.append(self._parse_bitwise()) 5025 5026 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5027 5028 def _parse_trim(self) -> exp.Trim: 5029 # https://www.w3resource.com/sql/character-functions/trim.php 5030 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5031 5032 position = None 5033 collation = None 5034 expression = None 5035 5036 if self._match_texts(self.TRIM_TYPES): 5037 position = self._prev.text.upper() 5038 5039 this = self._parse_bitwise() 5040 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5041 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5042 expression = self._parse_bitwise() 5043 5044 if invert_order: 5045 this, expression = expression, this 5046 5047 if self._match(TokenType.COLLATE): 5048 collation = self._parse_bitwise() 5049 5050 return self.expression( 5051 exp.Trim, this=this, position=position, expression=expression, collation=collation 5052 ) 5053 5054 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5055 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5056 5057 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5058 return self._parse_window(self._parse_id_var(), alias=True) 5059 5060 def _parse_respect_or_ignore_nulls( 5061 self, this: t.Optional[exp.Expression] 5062 ) -> t.Optional[exp.Expression]: 5063 if self._match_text_seq("IGNORE", "NULLS"): 5064 return self.expression(exp.IgnoreNulls, this=this) 5065 if self._match_text_seq("RESPECT", "NULLS"): 5066 return self.expression(exp.RespectNulls, this=this) 5067 return this 5068 5069 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5070 if self._match(TokenType.HAVING): 5071 self._match_texts(("MAX", "MIN")) 5072 max = self._prev.text.upper() != "MIN" 5073 return self.expression( 5074 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5075 ) 5076 5077 return this 5078 5079 def _parse_window( 5080 self, this: t.Optional[exp.Expression], alias: bool = False 5081 ) -> t.Optional[exp.Expression]: 5082 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5083 self._match(TokenType.WHERE) 5084 this = self.expression( 5085 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5086 ) 5087 self._match_r_paren() 5088 5089 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5090 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5091 if self._match_text_seq("WITHIN", "GROUP"): 5092 order = self._parse_wrapped(self._parse_order) 5093 this = self.expression(exp.WithinGroup, this=this, expression=order) 5094 5095 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5096 # Some dialects choose to implement and some do not. 5097 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5098 5099 # There is some code above in _parse_lambda that handles 5100 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5101 5102 # The below changes handle 5103 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5104 5105 # Oracle allows both formats 5106 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5107 # and Snowflake chose to do the same for familiarity 5108 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5109 if isinstance(this, exp.AggFunc): 5110 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5111 5112 if ignore_respect and ignore_respect is not this: 5113 ignore_respect.replace(ignore_respect.this) 5114 this = self.expression(ignore_respect.__class__, this=this) 5115 5116 this = self._parse_respect_or_ignore_nulls(this) 5117 5118 # bigquery select from window x AS (partition by ...) 5119 if alias: 5120 over = None 5121 self._match(TokenType.ALIAS) 5122 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5123 return this 5124 else: 5125 over = self._prev.text.upper() 5126 5127 if not self._match(TokenType.L_PAREN): 5128 return self.expression( 5129 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5130 ) 5131 5132 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5133 5134 first = self._match(TokenType.FIRST) 5135 if self._match_text_seq("LAST"): 5136 first = False 5137 5138 partition, order = self._parse_partition_and_order() 5139 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5140 5141 if kind: 5142 self._match(TokenType.BETWEEN) 5143 start = self._parse_window_spec() 5144 self._match(TokenType.AND) 5145 end = self._parse_window_spec() 5146 5147 spec = self.expression( 5148 exp.WindowSpec, 5149 kind=kind, 5150 start=start["value"], 5151 start_side=start["side"], 5152 end=end["value"], 5153 end_side=end["side"], 5154 ) 5155 else: 5156 spec = None 5157 5158 self._match_r_paren() 5159 5160 window = self.expression( 5161 exp.Window, 5162 this=this, 5163 partition_by=partition, 5164 order=order, 5165 spec=spec, 5166 alias=window_alias, 5167 over=over, 5168 first=first, 5169 ) 5170 5171 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5172 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5173 return self._parse_window(window, alias=alias) 5174 5175 return window 5176 5177 def _parse_partition_and_order( 5178 self, 5179 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5180 return self._parse_partition_by(), self._parse_order() 5181 5182 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5183 self._match(TokenType.BETWEEN) 5184 5185 return { 5186 "value": ( 5187 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5188 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5189 or self._parse_bitwise() 5190 ), 5191 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5192 } 5193 5194 def _parse_alias( 5195 self, this: t.Optional[exp.Expression], explicit: bool = False 5196 ) -> t.Optional[exp.Expression]: 5197 any_token = self._match(TokenType.ALIAS) 5198 comments = self._prev_comments 5199 5200 if explicit and not any_token: 5201 return this 5202 5203 if self._match(TokenType.L_PAREN): 5204 aliases = self.expression( 5205 exp.Aliases, 5206 comments=comments, 5207 this=this, 5208 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5209 ) 5210 self._match_r_paren(aliases) 5211 return aliases 5212 5213 alias = self._parse_id_var(any_token) or ( 5214 self.STRING_ALIASES and self._parse_string_as_identifier() 5215 ) 5216 5217 if alias: 5218 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5219 column = this.this 5220 5221 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5222 if not this.comments and column and column.comments: 5223 this.comments = column.comments 5224 column.comments = None 5225 5226 return this 5227 5228 def _parse_id_var( 5229 self, 5230 any_token: bool = True, 5231 tokens: t.Optional[t.Collection[TokenType]] = None, 5232 ) -> t.Optional[exp.Expression]: 5233 identifier = self._parse_identifier() 5234 5235 if identifier: 5236 return identifier 5237 5238 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5239 quoted = self._prev.token_type == TokenType.STRING 5240 return exp.Identifier(this=self._prev.text, quoted=quoted) 5241 5242 return None 5243 5244 def _parse_string(self) -> t.Optional[exp.Expression]: 5245 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5246 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5247 return self._parse_placeholder() 5248 5249 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5250 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5251 5252 def _parse_number(self) -> t.Optional[exp.Expression]: 5253 if self._match(TokenType.NUMBER): 5254 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5255 return self._parse_placeholder() 5256 5257 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5258 if self._match(TokenType.IDENTIFIER): 5259 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5260 return self._parse_placeholder() 5261 5262 def _parse_var( 5263 self, 5264 any_token: bool = False, 5265 tokens: t.Optional[t.Collection[TokenType]] = None, 5266 upper: bool = False, 5267 ) -> t.Optional[exp.Expression]: 5268 if ( 5269 (any_token and self._advance_any()) 5270 or self._match(TokenType.VAR) 5271 or (self._match_set(tokens) if tokens else False) 5272 ): 5273 return self.expression( 5274 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5275 ) 5276 return self._parse_placeholder() 5277 5278 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5279 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5280 self._advance() 5281 return self._prev 5282 return None 5283 5284 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5285 return self._parse_var() or self._parse_string() 5286 5287 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5288 return self._parse_primary() or self._parse_var(any_token=True) 5289 5290 def _parse_null(self) -> t.Optional[exp.Expression]: 5291 if self._match_set(self.NULL_TOKENS): 5292 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5293 return self._parse_placeholder() 5294 5295 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5296 if self._match(TokenType.TRUE): 5297 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5298 if self._match(TokenType.FALSE): 5299 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5300 return self._parse_placeholder() 5301 5302 def _parse_star(self) -> t.Optional[exp.Expression]: 5303 if self._match(TokenType.STAR): 5304 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5305 return self._parse_placeholder() 5306 5307 def _parse_parameter(self) -> exp.Parameter: 5308 self._match(TokenType.L_BRACE) 5309 this = self._parse_identifier() or self._parse_primary_or_var() 5310 expression = self._match(TokenType.COLON) and ( 5311 self._parse_identifier() or self._parse_primary_or_var() 5312 ) 5313 self._match(TokenType.R_BRACE) 5314 return self.expression(exp.Parameter, this=this, expression=expression) 5315 5316 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5317 if self._match_set(self.PLACEHOLDER_PARSERS): 5318 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5319 if placeholder: 5320 return placeholder 5321 self._advance(-1) 5322 return None 5323 5324 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5325 if not self._match(TokenType.EXCEPT): 5326 return None 5327 if self._match(TokenType.L_PAREN, advance=False): 5328 return self._parse_wrapped_csv(self._parse_column) 5329 5330 except_column = self._parse_column() 5331 return [except_column] if except_column else None 5332 5333 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5334 if not self._match(TokenType.REPLACE): 5335 return None 5336 if self._match(TokenType.L_PAREN, advance=False): 5337 return self._parse_wrapped_csv(self._parse_expression) 5338 5339 replace_expression = self._parse_expression() 5340 return [replace_expression] if replace_expression else None 5341 5342 def _parse_csv( 5343 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5344 ) -> t.List[exp.Expression]: 5345 parse_result = parse_method() 5346 items = [parse_result] if parse_result is not None else [] 5347 5348 while self._match(sep): 5349 self._add_comments(parse_result) 5350 parse_result = parse_method() 5351 if parse_result is not None: 5352 items.append(parse_result) 5353 5354 return items 5355 5356 def _parse_tokens( 5357 self, parse_method: t.Callable, expressions: t.Dict 5358 ) -> t.Optional[exp.Expression]: 5359 this = parse_method() 5360 5361 while self._match_set(expressions): 5362 this = self.expression( 5363 expressions[self._prev.token_type], 5364 this=this, 5365 comments=self._prev_comments, 5366 expression=parse_method(), 5367 ) 5368 5369 return this 5370 5371 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5372 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5373 5374 def _parse_wrapped_csv( 5375 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5376 ) -> t.List[exp.Expression]: 5377 return self._parse_wrapped( 5378 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5379 ) 5380 5381 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5382 wrapped = self._match(TokenType.L_PAREN) 5383 if not wrapped and not optional: 5384 self.raise_error("Expecting (") 5385 parse_result = parse_method() 5386 if wrapped: 5387 self._match_r_paren() 5388 return parse_result 5389 5390 def _parse_expressions(self) -> t.List[exp.Expression]: 5391 return self._parse_csv(self._parse_expression) 5392 5393 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5394 return self._parse_select() or self._parse_set_operations( 5395 self._parse_expression() if alias else self._parse_conjunction() 5396 ) 5397 5398 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5399 return self._parse_query_modifiers( 5400 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5401 ) 5402 5403 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5404 this = None 5405 if self._match_texts(self.TRANSACTION_KIND): 5406 this = self._prev.text 5407 5408 self._match_texts(("TRANSACTION", "WORK")) 5409 5410 modes = [] 5411 while True: 5412 mode = [] 5413 while self._match(TokenType.VAR): 5414 mode.append(self._prev.text) 5415 5416 if mode: 5417 modes.append(" ".join(mode)) 5418 if not self._match(TokenType.COMMA): 5419 break 5420 5421 return self.expression(exp.Transaction, this=this, modes=modes) 5422 5423 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5424 chain = None 5425 savepoint = None 5426 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5427 5428 self._match_texts(("TRANSACTION", "WORK")) 5429 5430 if self._match_text_seq("TO"): 5431 self._match_text_seq("SAVEPOINT") 5432 savepoint = self._parse_id_var() 5433 5434 if self._match(TokenType.AND): 5435 chain = not self._match_text_seq("NO") 5436 self._match_text_seq("CHAIN") 5437 5438 if is_rollback: 5439 return self.expression(exp.Rollback, savepoint=savepoint) 5440 5441 return self.expression(exp.Commit, chain=chain) 5442 5443 def _parse_refresh(self) -> exp.Refresh: 5444 self._match(TokenType.TABLE) 5445 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5446 5447 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5448 if not self._match_text_seq("ADD"): 5449 return None 5450 5451 self._match(TokenType.COLUMN) 5452 exists_column = self._parse_exists(not_=True) 5453 expression = self._parse_field_def() 5454 5455 if expression: 5456 expression.set("exists", exists_column) 5457 5458 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5459 if self._match_texts(("FIRST", "AFTER")): 5460 position = self._prev.text 5461 column_position = self.expression( 5462 exp.ColumnPosition, this=self._parse_column(), position=position 5463 ) 5464 expression.set("position", column_position) 5465 5466 return expression 5467 5468 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5469 drop = self._match(TokenType.DROP) and self._parse_drop() 5470 if drop and not isinstance(drop, exp.Command): 5471 drop.set("kind", drop.args.get("kind", "COLUMN")) 5472 return drop 5473 5474 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5475 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5476 return self.expression( 5477 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5478 ) 5479 5480 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5481 index = self._index - 1 5482 5483 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5484 return self._parse_csv( 5485 lambda: self.expression( 5486 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5487 ) 5488 ) 5489 5490 self._retreat(index) 5491 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5492 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5493 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5494 5495 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5496 self._match(TokenType.COLUMN) 5497 column = self._parse_field(any_token=True) 5498 5499 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5500 return self.expression(exp.AlterColumn, this=column, drop=True) 5501 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5502 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5503 if self._match(TokenType.COMMENT): 5504 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5505 5506 self._match_text_seq("SET", "DATA") 5507 return self.expression( 5508 exp.AlterColumn, 5509 this=column, 5510 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5511 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5512 using=self._match(TokenType.USING) and self._parse_conjunction(), 5513 ) 5514 5515 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5516 index = self._index - 1 5517 5518 partition_exists = self._parse_exists() 5519 if self._match(TokenType.PARTITION, advance=False): 5520 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5521 5522 self._retreat(index) 5523 return self._parse_csv(self._parse_drop_column) 5524 5525 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5526 if self._match(TokenType.COLUMN): 5527 exists = self._parse_exists() 5528 old_column = self._parse_column() 5529 to = self._match_text_seq("TO") 5530 new_column = self._parse_column() 5531 5532 if old_column is None or to is None or new_column is None: 5533 return None 5534 5535 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5536 5537 self._match_text_seq("TO") 5538 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5539 5540 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5541 start = self._prev 5542 5543 if not self._match(TokenType.TABLE): 5544 return self._parse_as_command(start) 5545 5546 exists = self._parse_exists() 5547 only = self._match_text_seq("ONLY") 5548 this = self._parse_table(schema=True) 5549 5550 if self._next: 5551 self._advance() 5552 5553 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5554 if parser: 5555 actions = ensure_list(parser(self)) 5556 options = self._parse_csv(self._parse_property) 5557 5558 if not self._curr and actions: 5559 return self.expression( 5560 exp.AlterTable, 5561 this=this, 5562 exists=exists, 5563 actions=actions, 5564 only=only, 5565 options=options, 5566 ) 5567 5568 return self._parse_as_command(start) 5569 5570 def _parse_merge(self) -> exp.Merge: 5571 self._match(TokenType.INTO) 5572 target = self._parse_table() 5573 5574 if target and self._match(TokenType.ALIAS, advance=False): 5575 target.set("alias", self._parse_table_alias()) 5576 5577 self._match(TokenType.USING) 5578 using = self._parse_table() 5579 5580 self._match(TokenType.ON) 5581 on = self._parse_conjunction() 5582 5583 return self.expression( 5584 exp.Merge, 5585 this=target, 5586 using=using, 5587 on=on, 5588 expressions=self._parse_when_matched(), 5589 ) 5590 5591 def _parse_when_matched(self) -> t.List[exp.When]: 5592 whens = [] 5593 5594 while self._match(TokenType.WHEN): 5595 matched = not self._match(TokenType.NOT) 5596 self._match_text_seq("MATCHED") 5597 source = ( 5598 False 5599 if self._match_text_seq("BY", "TARGET") 5600 else self._match_text_seq("BY", "SOURCE") 5601 ) 5602 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5603 5604 self._match(TokenType.THEN) 5605 5606 if self._match(TokenType.INSERT): 5607 _this = self._parse_star() 5608 if _this: 5609 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5610 else: 5611 then = self.expression( 5612 exp.Insert, 5613 this=self._parse_value(), 5614 expression=self._match_text_seq("VALUES") and self._parse_value(), 5615 ) 5616 elif self._match(TokenType.UPDATE): 5617 expressions = self._parse_star() 5618 if expressions: 5619 then = self.expression(exp.Update, expressions=expressions) 5620 else: 5621 then = self.expression( 5622 exp.Update, 5623 expressions=self._match(TokenType.SET) 5624 and self._parse_csv(self._parse_equality), 5625 ) 5626 elif self._match(TokenType.DELETE): 5627 then = self.expression(exp.Var, this=self._prev.text) 5628 else: 5629 then = None 5630 5631 whens.append( 5632 self.expression( 5633 exp.When, 5634 matched=matched, 5635 source=source, 5636 condition=condition, 5637 then=then, 5638 ) 5639 ) 5640 return whens 5641 5642 def _parse_show(self) -> t.Optional[exp.Expression]: 5643 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5644 if parser: 5645 return parser(self) 5646 return self._parse_as_command(self._prev) 5647 5648 def _parse_set_item_assignment( 5649 self, kind: t.Optional[str] = None 5650 ) -> t.Optional[exp.Expression]: 5651 index = self._index 5652 5653 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5654 return self._parse_set_transaction(global_=kind == "GLOBAL") 5655 5656 left = self._parse_primary() or self._parse_id_var() 5657 assignment_delimiter = self._match_texts(("=", "TO")) 5658 5659 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5660 self._retreat(index) 5661 return None 5662 5663 right = self._parse_statement() or self._parse_id_var() 5664 this = self.expression(exp.EQ, this=left, expression=right) 5665 5666 return self.expression(exp.SetItem, this=this, kind=kind) 5667 5668 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5669 self._match_text_seq("TRANSACTION") 5670 characteristics = self._parse_csv( 5671 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5672 ) 5673 return self.expression( 5674 exp.SetItem, 5675 expressions=characteristics, 5676 kind="TRANSACTION", 5677 **{"global": global_}, # type: ignore 5678 ) 5679 5680 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5681 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5682 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5683 5684 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5685 index = self._index 5686 set_ = self.expression( 5687 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5688 ) 5689 5690 if self._curr: 5691 self._retreat(index) 5692 return self._parse_as_command(self._prev) 5693 5694 return set_ 5695 5696 def _parse_var_from_options( 5697 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5698 ) -> t.Optional[exp.Var]: 5699 start = self._curr 5700 if not start: 5701 return None 5702 5703 option = start.text.upper() 5704 continuations = options.get(option) 5705 5706 index = self._index 5707 self._advance() 5708 for keywords in continuations or []: 5709 if isinstance(keywords, str): 5710 keywords = (keywords,) 5711 5712 if self._match_text_seq(*keywords): 5713 option = f"{option} {' '.join(keywords)}" 5714 break 5715 else: 5716 if continuations or continuations is None: 5717 if raise_unmatched: 5718 self.raise_error(f"Unknown option {option}") 5719 5720 self._retreat(index) 5721 return None 5722 5723 return exp.var(option) 5724 5725 def _parse_as_command(self, start: Token) -> exp.Command: 5726 while self._curr: 5727 self._advance() 5728 text = self._find_sql(start, self._prev) 5729 size = len(start.text) 5730 self._warn_unsupported() 5731 return exp.Command(this=text[:size], expression=text[size:]) 5732 5733 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5734 settings = [] 5735 5736 self._match_l_paren() 5737 kind = self._parse_id_var() 5738 5739 if self._match(TokenType.L_PAREN): 5740 while True: 5741 key = self._parse_id_var() 5742 value = self._parse_primary() 5743 5744 if not key and value is None: 5745 break 5746 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5747 self._match(TokenType.R_PAREN) 5748 5749 self._match_r_paren() 5750 5751 return self.expression( 5752 exp.DictProperty, 5753 this=this, 5754 kind=kind.this if kind else None, 5755 settings=settings, 5756 ) 5757 5758 def _parse_dict_range(self, this: str) -> exp.DictRange: 5759 self._match_l_paren() 5760 has_min = self._match_text_seq("MIN") 5761 if has_min: 5762 min = self._parse_var() or self._parse_primary() 5763 self._match_text_seq("MAX") 5764 max = self._parse_var() or self._parse_primary() 5765 else: 5766 max = self._parse_var() or self._parse_primary() 5767 min = exp.Literal.number(0) 5768 self._match_r_paren() 5769 return self.expression(exp.DictRange, this=this, min=min, max=max) 5770 5771 def _parse_comprehension( 5772 self, this: t.Optional[exp.Expression] 5773 ) -> t.Optional[exp.Comprehension]: 5774 index = self._index 5775 expression = self._parse_column() 5776 if not self._match(TokenType.IN): 5777 self._retreat(index - 1) 5778 return None 5779 iterator = self._parse_column() 5780 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5781 return self.expression( 5782 exp.Comprehension, 5783 this=this, 5784 expression=expression, 5785 iterator=iterator, 5786 condition=condition, 5787 ) 5788 5789 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5790 if self._match(TokenType.HEREDOC_STRING): 5791 return self.expression(exp.Heredoc, this=self._prev.text) 5792 5793 if not self._match_text_seq("$"): 5794 return None 5795 5796 tags = ["$"] 5797 tag_text = None 5798 5799 if self._is_connected(): 5800 self._advance() 5801 tags.append(self._prev.text.upper()) 5802 else: 5803 self.raise_error("No closing $ found") 5804 5805 if tags[-1] != "$": 5806 if self._is_connected() and self._match_text_seq("$"): 5807 tag_text = tags[-1] 5808 tags.append("$") 5809 else: 5810 self.raise_error("No closing $ found") 5811 5812 heredoc_start = self._curr 5813 5814 while self._curr: 5815 if self._match_text_seq(*tags, advance=False): 5816 this = self._find_sql(heredoc_start, self._prev) 5817 self._advance(len(tags)) 5818 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5819 5820 self._advance() 5821 5822 self.raise_error(f"No closing {''.join(tags)} found") 5823 return None 5824 5825 def _find_parser( 5826 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5827 ) -> t.Optional[t.Callable]: 5828 if not self._curr: 5829 return None 5830 5831 index = self._index 5832 this = [] 5833 while True: 5834 # The current token might be multiple words 5835 curr = self._curr.text.upper() 5836 key = curr.split(" ") 5837 this.append(curr) 5838 5839 self._advance() 5840 result, trie = in_trie(trie, key) 5841 if result == TrieResult.FAILED: 5842 break 5843 5844 if result == TrieResult.EXISTS: 5845 subparser = parsers[" ".join(this)] 5846 return subparser 5847 5848 self._retreat(index) 5849 return None 5850 5851 def _match(self, token_type, advance=True, expression=None): 5852 if not self._curr: 5853 return None 5854 5855 if self._curr.token_type == token_type: 5856 if advance: 5857 self._advance() 5858 self._add_comments(expression) 5859 return True 5860 5861 return None 5862 5863 def _match_set(self, types, advance=True): 5864 if not self._curr: 5865 return None 5866 5867 if self._curr.token_type in types: 5868 if advance: 5869 self._advance() 5870 return True 5871 5872 return None 5873 5874 def _match_pair(self, token_type_a, token_type_b, advance=True): 5875 if not self._curr or not self._next: 5876 return None 5877 5878 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5879 if advance: 5880 self._advance(2) 5881 return True 5882 5883 return None 5884 5885 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5886 if not self._match(TokenType.L_PAREN, expression=expression): 5887 self.raise_error("Expecting (") 5888 5889 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5890 if not self._match(TokenType.R_PAREN, expression=expression): 5891 self.raise_error("Expecting )") 5892 5893 def _match_texts(self, texts, advance=True): 5894 if self._curr and self._curr.text.upper() in texts: 5895 if advance: 5896 self._advance() 5897 return True 5898 return None 5899 5900 def _match_text_seq(self, *texts, advance=True): 5901 index = self._index 5902 for text in texts: 5903 if self._curr and self._curr.text.upper() == text: 5904 self._advance() 5905 else: 5906 self._retreat(index) 5907 return None 5908 5909 if not advance: 5910 self._retreat(index) 5911 5912 return True 5913 5914 @t.overload 5915 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ... 5916 5917 @t.overload 5918 def _replace_columns_with_dots( 5919 self, this: t.Optional[exp.Expression] 5920 ) -> t.Optional[exp.Expression]: ... 5921 5922 def _replace_columns_with_dots(self, this): 5923 if isinstance(this, exp.Dot): 5924 exp.replace_children(this, self._replace_columns_with_dots) 5925 elif isinstance(this, exp.Column): 5926 exp.replace_children(this, self._replace_columns_with_dots) 5927 table = this.args.get("table") 5928 this = ( 5929 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5930 ) 5931 5932 return this 5933 5934 def _replace_lambda( 5935 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5936 ) -> t.Optional[exp.Expression]: 5937 if not node: 5938 return node 5939 5940 for column in node.find_all(exp.Column): 5941 if column.parts[0].name in lambda_variables: 5942 dot_or_id = column.to_dot() if column.table else column.this 5943 parent = column.parent 5944 5945 while isinstance(parent, exp.Dot): 5946 if not isinstance(parent.parent, exp.Dot): 5947 parent.replace(dot_or_id) 5948 break 5949 parent = parent.parent 5950 else: 5951 if column is node: 5952 node = dot_or_id 5953 else: 5954 column.replace(dot_or_id) 5955 return node 5956 5957 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 5958 start = self._prev 5959 5960 # Not to be confused with TRUNCATE(number, decimals) function call 5961 if self._match(TokenType.L_PAREN): 5962 self._retreat(self._index - 2) 5963 return self._parse_function() 5964 5965 # Clickhouse supports TRUNCATE DATABASE as well 5966 is_database = self._match(TokenType.DATABASE) 5967 5968 self._match(TokenType.TABLE) 5969 5970 exists = self._parse_exists(not_=False) 5971 5972 expressions = self._parse_csv( 5973 lambda: self._parse_table(schema=True, is_db_reference=is_database) 5974 ) 5975 5976 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5977 5978 if self._match_text_seq("RESTART", "IDENTITY"): 5979 identity = "RESTART" 5980 elif self._match_text_seq("CONTINUE", "IDENTITY"): 5981 identity = "CONTINUE" 5982 else: 5983 identity = None 5984 5985 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 5986 option = self._prev.text 5987 else: 5988 option = None 5989 5990 partition = self._parse_partition() 5991 5992 # Fallback case 5993 if self._curr: 5994 return self._parse_as_command(start) 5995 5996 return self.expression( 5997 exp.TruncateTable, 5998 expressions=expressions, 5999 is_database=is_database, 6000 exists=exists, 6001 cluster=cluster, 6002 identity=identity, 6003 option=option, 6004 partition=partition, 6005 )
24def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 25 if len(args) == 1 and args[0].is_star: 26 return exp.StarMap(this=args[0]) 27 28 keys = [] 29 values = [] 30 for i in range(0, len(args), 2): 31 keys.append(args[i]) 32 values.append(args[i + 1]) 33 34 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
63def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 64 def _builder(args: t.List, dialect: Dialect) -> E: 65 expression = expr_type( 66 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 67 ) 68 if len(args) > 2 and expr_type is exp.JSONExtract: 69 expression.set("expressions", args[2:]) 70 71 return expression 72 73 return _builder
86class Parser(metaclass=_Parser): 87 """ 88 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 89 90 Args: 91 error_level: The desired error level. 92 Default: ErrorLevel.IMMEDIATE 93 error_message_context: The amount of context to capture from a query string when displaying 94 the error message (in number of characters). 95 Default: 100 96 max_errors: Maximum number of error messages to include in a raised ParseError. 97 This is only relevant if error_level is ErrorLevel.RAISE. 98 Default: 3 99 """ 100 101 FUNCTIONS: t.Dict[str, t.Callable] = { 102 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 103 "CONCAT": lambda args, dialect: exp.Concat( 104 expressions=args, 105 safe=not dialect.STRICT_STRING_CONCAT, 106 coalesce=dialect.CONCAT_COALESCE, 107 ), 108 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 109 expressions=args, 110 safe=not dialect.STRICT_STRING_CONCAT, 111 coalesce=dialect.CONCAT_COALESCE, 112 ), 113 "DATE_TO_DATE_STR": lambda args: exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 118 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 119 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 120 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 121 "LIKE": build_like, 122 "LOG": build_logarithm, 123 "TIME_TO_TIME_STR": lambda args: exp.Cast( 124 this=seq_get(args, 0), 125 to=exp.DataType(this=exp.DataType.Type.TEXT), 126 ), 127 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 128 this=exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 start=exp.Literal.number(1), 133 length=exp.Literal.number(10), 134 ), 135 "VAR_MAP": build_var_map, 136 } 137 138 NO_PAREN_FUNCTIONS = { 139 TokenType.CURRENT_DATE: exp.CurrentDate, 140 TokenType.CURRENT_DATETIME: exp.CurrentDate, 141 TokenType.CURRENT_TIME: exp.CurrentTime, 142 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 143 TokenType.CURRENT_USER: exp.CurrentUser, 144 } 145 146 STRUCT_TYPE_TOKENS = { 147 TokenType.NESTED, 148 TokenType.STRUCT, 149 } 150 151 NESTED_TYPE_TOKENS = { 152 TokenType.ARRAY, 153 TokenType.LOWCARDINALITY, 154 TokenType.MAP, 155 TokenType.NULLABLE, 156 *STRUCT_TYPE_TOKENS, 157 } 158 159 ENUM_TYPE_TOKENS = { 160 TokenType.ENUM, 161 TokenType.ENUM8, 162 TokenType.ENUM16, 163 } 164 165 AGGREGATE_TYPE_TOKENS = { 166 TokenType.AGGREGATEFUNCTION, 167 TokenType.SIMPLEAGGREGATEFUNCTION, 168 } 169 170 TYPE_TOKENS = { 171 TokenType.BIT, 172 TokenType.BOOLEAN, 173 TokenType.TINYINT, 174 TokenType.UTINYINT, 175 TokenType.SMALLINT, 176 TokenType.USMALLINT, 177 TokenType.INT, 178 TokenType.UINT, 179 TokenType.BIGINT, 180 TokenType.UBIGINT, 181 TokenType.INT128, 182 TokenType.UINT128, 183 TokenType.INT256, 184 TokenType.UINT256, 185 TokenType.MEDIUMINT, 186 TokenType.UMEDIUMINT, 187 TokenType.FIXEDSTRING, 188 TokenType.FLOAT, 189 TokenType.DOUBLE, 190 TokenType.CHAR, 191 TokenType.NCHAR, 192 TokenType.VARCHAR, 193 TokenType.NVARCHAR, 194 TokenType.BPCHAR, 195 TokenType.TEXT, 196 TokenType.MEDIUMTEXT, 197 TokenType.LONGTEXT, 198 TokenType.MEDIUMBLOB, 199 TokenType.LONGBLOB, 200 TokenType.BINARY, 201 TokenType.VARBINARY, 202 TokenType.JSON, 203 TokenType.JSONB, 204 TokenType.INTERVAL, 205 TokenType.TINYBLOB, 206 TokenType.TINYTEXT, 207 TokenType.TIME, 208 TokenType.TIMETZ, 209 TokenType.TIMESTAMP, 210 TokenType.TIMESTAMP_S, 211 TokenType.TIMESTAMP_MS, 212 TokenType.TIMESTAMP_NS, 213 TokenType.TIMESTAMPTZ, 214 TokenType.TIMESTAMPLTZ, 215 TokenType.DATETIME, 216 TokenType.DATETIME64, 217 TokenType.DATE, 218 TokenType.DATE32, 219 TokenType.INT4RANGE, 220 TokenType.INT4MULTIRANGE, 221 TokenType.INT8RANGE, 222 TokenType.INT8MULTIRANGE, 223 TokenType.NUMRANGE, 224 TokenType.NUMMULTIRANGE, 225 TokenType.TSRANGE, 226 TokenType.TSMULTIRANGE, 227 TokenType.TSTZRANGE, 228 TokenType.TSTZMULTIRANGE, 229 TokenType.DATERANGE, 230 TokenType.DATEMULTIRANGE, 231 TokenType.DECIMAL, 232 TokenType.UDECIMAL, 233 TokenType.BIGDECIMAL, 234 TokenType.UUID, 235 TokenType.GEOGRAPHY, 236 TokenType.GEOMETRY, 237 TokenType.HLLSKETCH, 238 TokenType.HSTORE, 239 TokenType.PSEUDO_TYPE, 240 TokenType.SUPER, 241 TokenType.SERIAL, 242 TokenType.SMALLSERIAL, 243 TokenType.BIGSERIAL, 244 TokenType.XML, 245 TokenType.YEAR, 246 TokenType.UNIQUEIDENTIFIER, 247 TokenType.USERDEFINED, 248 TokenType.MONEY, 249 TokenType.SMALLMONEY, 250 TokenType.ROWVERSION, 251 TokenType.IMAGE, 252 TokenType.VARIANT, 253 TokenType.OBJECT, 254 TokenType.OBJECT_IDENTIFIER, 255 TokenType.INET, 256 TokenType.IPADDRESS, 257 TokenType.IPPREFIX, 258 TokenType.IPV4, 259 TokenType.IPV6, 260 TokenType.UNKNOWN, 261 TokenType.NULL, 262 *ENUM_TYPE_TOKENS, 263 *NESTED_TYPE_TOKENS, 264 *AGGREGATE_TYPE_TOKENS, 265 } 266 267 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 268 TokenType.BIGINT: TokenType.UBIGINT, 269 TokenType.INT: TokenType.UINT, 270 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 271 TokenType.SMALLINT: TokenType.USMALLINT, 272 TokenType.TINYINT: TokenType.UTINYINT, 273 TokenType.DECIMAL: TokenType.UDECIMAL, 274 } 275 276 SUBQUERY_PREDICATES = { 277 TokenType.ANY: exp.Any, 278 TokenType.ALL: exp.All, 279 TokenType.EXISTS: exp.Exists, 280 TokenType.SOME: exp.Any, 281 } 282 283 RESERVED_TOKENS = { 284 *Tokenizer.SINGLE_TOKENS.values(), 285 TokenType.SELECT, 286 } 287 288 DB_CREATABLES = { 289 TokenType.DATABASE, 290 TokenType.SCHEMA, 291 TokenType.TABLE, 292 TokenType.VIEW, 293 TokenType.MODEL, 294 TokenType.DICTIONARY, 295 TokenType.STORAGE_INTEGRATION, 296 } 297 298 CREATABLES = { 299 TokenType.COLUMN, 300 TokenType.CONSTRAINT, 301 TokenType.FUNCTION, 302 TokenType.INDEX, 303 TokenType.PROCEDURE, 304 TokenType.FOREIGN_KEY, 305 *DB_CREATABLES, 306 } 307 308 # Tokens that can represent identifiers 309 ID_VAR_TOKENS = { 310 TokenType.VAR, 311 TokenType.ANTI, 312 TokenType.APPLY, 313 TokenType.ASC, 314 TokenType.AUTO_INCREMENT, 315 TokenType.BEGIN, 316 TokenType.BPCHAR, 317 TokenType.CACHE, 318 TokenType.CASE, 319 TokenType.COLLATE, 320 TokenType.COMMAND, 321 TokenType.COMMENT, 322 TokenType.COMMIT, 323 TokenType.CONSTRAINT, 324 TokenType.DEFAULT, 325 TokenType.DELETE, 326 TokenType.DESC, 327 TokenType.DESCRIBE, 328 TokenType.DICTIONARY, 329 TokenType.DIV, 330 TokenType.END, 331 TokenType.EXECUTE, 332 TokenType.ESCAPE, 333 TokenType.FALSE, 334 TokenType.FIRST, 335 TokenType.FILTER, 336 TokenType.FINAL, 337 TokenType.FORMAT, 338 TokenType.FULL, 339 TokenType.IS, 340 TokenType.ISNULL, 341 TokenType.INTERVAL, 342 TokenType.KEEP, 343 TokenType.KILL, 344 TokenType.LEFT, 345 TokenType.LOAD, 346 TokenType.MERGE, 347 TokenType.NATURAL, 348 TokenType.NEXT, 349 TokenType.OFFSET, 350 TokenType.OPERATOR, 351 TokenType.ORDINALITY, 352 TokenType.OVERLAPS, 353 TokenType.OVERWRITE, 354 TokenType.PARTITION, 355 TokenType.PERCENT, 356 TokenType.PIVOT, 357 TokenType.PRAGMA, 358 TokenType.RANGE, 359 TokenType.RECURSIVE, 360 TokenType.REFERENCES, 361 TokenType.REFRESH, 362 TokenType.REPLACE, 363 TokenType.RIGHT, 364 TokenType.ROW, 365 TokenType.ROWS, 366 TokenType.SEMI, 367 TokenType.SET, 368 TokenType.SETTINGS, 369 TokenType.SHOW, 370 TokenType.TEMPORARY, 371 TokenType.TOP, 372 TokenType.TRUE, 373 TokenType.TRUNCATE, 374 TokenType.UNIQUE, 375 TokenType.UNPIVOT, 376 TokenType.UPDATE, 377 TokenType.USE, 378 TokenType.VOLATILE, 379 TokenType.WINDOW, 380 *CREATABLES, 381 *SUBQUERY_PREDICATES, 382 *TYPE_TOKENS, 383 *NO_PAREN_FUNCTIONS, 384 } 385 386 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 387 388 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 389 TokenType.ANTI, 390 TokenType.APPLY, 391 TokenType.ASOF, 392 TokenType.FULL, 393 TokenType.LEFT, 394 TokenType.LOCK, 395 TokenType.NATURAL, 396 TokenType.OFFSET, 397 TokenType.RIGHT, 398 TokenType.SEMI, 399 TokenType.WINDOW, 400 } 401 402 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 403 404 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 405 406 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 407 408 FUNC_TOKENS = { 409 TokenType.COLLATE, 410 TokenType.COMMAND, 411 TokenType.CURRENT_DATE, 412 TokenType.CURRENT_DATETIME, 413 TokenType.CURRENT_TIMESTAMP, 414 TokenType.CURRENT_TIME, 415 TokenType.CURRENT_USER, 416 TokenType.FILTER, 417 TokenType.FIRST, 418 TokenType.FORMAT, 419 TokenType.GLOB, 420 TokenType.IDENTIFIER, 421 TokenType.INDEX, 422 TokenType.ISNULL, 423 TokenType.ILIKE, 424 TokenType.INSERT, 425 TokenType.LIKE, 426 TokenType.MERGE, 427 TokenType.OFFSET, 428 TokenType.PRIMARY_KEY, 429 TokenType.RANGE, 430 TokenType.REPLACE, 431 TokenType.RLIKE, 432 TokenType.ROW, 433 TokenType.UNNEST, 434 TokenType.VAR, 435 TokenType.LEFT, 436 TokenType.RIGHT, 437 TokenType.DATE, 438 TokenType.DATETIME, 439 TokenType.TABLE, 440 TokenType.TIMESTAMP, 441 TokenType.TIMESTAMPTZ, 442 TokenType.TRUNCATE, 443 TokenType.WINDOW, 444 TokenType.XOR, 445 *TYPE_TOKENS, 446 *SUBQUERY_PREDICATES, 447 } 448 449 CONJUNCTION = { 450 TokenType.AND: exp.And, 451 TokenType.OR: exp.Or, 452 } 453 454 EQUALITY = { 455 TokenType.COLON_EQ: exp.PropertyEQ, 456 TokenType.EQ: exp.EQ, 457 TokenType.NEQ: exp.NEQ, 458 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 459 } 460 461 COMPARISON = { 462 TokenType.GT: exp.GT, 463 TokenType.GTE: exp.GTE, 464 TokenType.LT: exp.LT, 465 TokenType.LTE: exp.LTE, 466 } 467 468 BITWISE = { 469 TokenType.AMP: exp.BitwiseAnd, 470 TokenType.CARET: exp.BitwiseXor, 471 TokenType.PIPE: exp.BitwiseOr, 472 } 473 474 TERM = { 475 TokenType.DASH: exp.Sub, 476 TokenType.PLUS: exp.Add, 477 TokenType.MOD: exp.Mod, 478 TokenType.COLLATE: exp.Collate, 479 } 480 481 FACTOR = { 482 TokenType.DIV: exp.IntDiv, 483 TokenType.LR_ARROW: exp.Distance, 484 TokenType.SLASH: exp.Div, 485 TokenType.STAR: exp.Mul, 486 } 487 488 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 489 490 TIMES = { 491 TokenType.TIME, 492 TokenType.TIMETZ, 493 } 494 495 TIMESTAMPS = { 496 TokenType.TIMESTAMP, 497 TokenType.TIMESTAMPTZ, 498 TokenType.TIMESTAMPLTZ, 499 *TIMES, 500 } 501 502 SET_OPERATIONS = { 503 TokenType.UNION, 504 TokenType.INTERSECT, 505 TokenType.EXCEPT, 506 } 507 508 JOIN_METHODS = { 509 TokenType.NATURAL, 510 TokenType.ASOF, 511 } 512 513 JOIN_SIDES = { 514 TokenType.LEFT, 515 TokenType.RIGHT, 516 TokenType.FULL, 517 } 518 519 JOIN_KINDS = { 520 TokenType.INNER, 521 TokenType.OUTER, 522 TokenType.CROSS, 523 TokenType.SEMI, 524 TokenType.ANTI, 525 } 526 527 JOIN_HINTS: t.Set[str] = set() 528 529 LAMBDAS = { 530 TokenType.ARROW: lambda self, expressions: self.expression( 531 exp.Lambda, 532 this=self._replace_lambda( 533 self._parse_conjunction(), 534 {node.name for node in expressions}, 535 ), 536 expressions=expressions, 537 ), 538 TokenType.FARROW: lambda self, expressions: self.expression( 539 exp.Kwarg, 540 this=exp.var(expressions[0].name), 541 expression=self._parse_conjunction(), 542 ), 543 } 544 545 COLUMN_OPERATORS = { 546 TokenType.DOT: None, 547 TokenType.DCOLON: lambda self, this, to: self.expression( 548 exp.Cast if self.STRICT_CAST else exp.TryCast, 549 this=this, 550 to=to, 551 ), 552 TokenType.ARROW: lambda self, this, path: self.expression( 553 exp.JSONExtract, 554 this=this, 555 expression=self.dialect.to_json_path(path), 556 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 557 ), 558 TokenType.DARROW: lambda self, this, path: self.expression( 559 exp.JSONExtractScalar, 560 this=this, 561 expression=self.dialect.to_json_path(path), 562 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 563 ), 564 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 565 exp.JSONBExtract, 566 this=this, 567 expression=path, 568 ), 569 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 570 exp.JSONBExtractScalar, 571 this=this, 572 expression=path, 573 ), 574 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 575 exp.JSONBContains, 576 this=this, 577 expression=key, 578 ), 579 } 580 581 EXPRESSION_PARSERS = { 582 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 583 exp.Column: lambda self: self._parse_column(), 584 exp.Condition: lambda self: self._parse_conjunction(), 585 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 586 exp.Expression: lambda self: self._parse_expression(), 587 exp.From: lambda self: self._parse_from(), 588 exp.Group: lambda self: self._parse_group(), 589 exp.Having: lambda self: self._parse_having(), 590 exp.Identifier: lambda self: self._parse_id_var(), 591 exp.Join: lambda self: self._parse_join(), 592 exp.Lambda: lambda self: self._parse_lambda(), 593 exp.Lateral: lambda self: self._parse_lateral(), 594 exp.Limit: lambda self: self._parse_limit(), 595 exp.Offset: lambda self: self._parse_offset(), 596 exp.Order: lambda self: self._parse_order(), 597 exp.Ordered: lambda self: self._parse_ordered(), 598 exp.Properties: lambda self: self._parse_properties(), 599 exp.Qualify: lambda self: self._parse_qualify(), 600 exp.Returning: lambda self: self._parse_returning(), 601 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 602 exp.Table: lambda self: self._parse_table_parts(), 603 exp.TableAlias: lambda self: self._parse_table_alias(), 604 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 605 exp.Where: lambda self: self._parse_where(), 606 exp.Window: lambda self: self._parse_named_window(), 607 exp.With: lambda self: self._parse_with(), 608 "JOIN_TYPE": lambda self: self._parse_join_parts(), 609 } 610 611 STATEMENT_PARSERS = { 612 TokenType.ALTER: lambda self: self._parse_alter(), 613 TokenType.BEGIN: lambda self: self._parse_transaction(), 614 TokenType.CACHE: lambda self: self._parse_cache(), 615 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 616 TokenType.COMMENT: lambda self: self._parse_comment(), 617 TokenType.CREATE: lambda self: self._parse_create(), 618 TokenType.DELETE: lambda self: self._parse_delete(), 619 TokenType.DESC: lambda self: self._parse_describe(), 620 TokenType.DESCRIBE: lambda self: self._parse_describe(), 621 TokenType.DROP: lambda self: self._parse_drop(), 622 TokenType.INSERT: lambda self: self._parse_insert(), 623 TokenType.KILL: lambda self: self._parse_kill(), 624 TokenType.LOAD: lambda self: self._parse_load(), 625 TokenType.MERGE: lambda self: self._parse_merge(), 626 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 627 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 628 TokenType.REFRESH: lambda self: self._parse_refresh(), 629 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 630 TokenType.SET: lambda self: self._parse_set(), 631 TokenType.UNCACHE: lambda self: self._parse_uncache(), 632 TokenType.UPDATE: lambda self: self._parse_update(), 633 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 634 TokenType.USE: lambda self: self.expression( 635 exp.Use, 636 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 637 this=self._parse_table(schema=False), 638 ), 639 } 640 641 UNARY_PARSERS = { 642 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 643 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 644 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 645 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 646 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 647 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 648 } 649 650 PRIMARY_PARSERS = { 651 TokenType.STRING: lambda self, token: self.expression( 652 exp.Literal, this=token.text, is_string=True 653 ), 654 TokenType.NUMBER: lambda self, token: self.expression( 655 exp.Literal, this=token.text, is_string=False 656 ), 657 TokenType.STAR: lambda self, _: self.expression( 658 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 659 ), 660 TokenType.NULL: lambda self, _: self.expression(exp.Null), 661 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 662 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 663 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 664 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 665 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 666 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 667 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 668 exp.National, this=token.text 669 ), 670 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 671 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 672 exp.RawString, this=token.text 673 ), 674 TokenType.UNICODE_STRING: lambda self, token: self.expression( 675 exp.UnicodeString, 676 this=token.text, 677 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 678 ), 679 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 680 } 681 682 PLACEHOLDER_PARSERS = { 683 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 684 TokenType.PARAMETER: lambda self: self._parse_parameter(), 685 TokenType.COLON: lambda self: ( 686 self.expression(exp.Placeholder, this=self._prev.text) 687 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 688 else None 689 ), 690 } 691 692 RANGE_PARSERS = { 693 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 694 TokenType.GLOB: binary_range_parser(exp.Glob), 695 TokenType.ILIKE: binary_range_parser(exp.ILike), 696 TokenType.IN: lambda self, this: self._parse_in(this), 697 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 698 TokenType.IS: lambda self, this: self._parse_is(this), 699 TokenType.LIKE: binary_range_parser(exp.Like), 700 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 701 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 702 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 703 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 704 } 705 706 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 707 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 708 "AUTO": lambda self: self._parse_auto_property(), 709 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 710 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 711 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 712 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 713 "CHECKSUM": lambda self: self._parse_checksum(), 714 "CLUSTER BY": lambda self: self._parse_cluster(), 715 "CLUSTERED": lambda self: self._parse_clustered_by(), 716 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 717 exp.CollateProperty, **kwargs 718 ), 719 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 720 "CONTAINS": lambda self: self._parse_contains_property(), 721 "COPY": lambda self: self._parse_copy_property(), 722 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 723 "DEFINER": lambda self: self._parse_definer(), 724 "DETERMINISTIC": lambda self: self.expression( 725 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 726 ), 727 "DISTKEY": lambda self: self._parse_distkey(), 728 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 729 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 730 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 731 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 732 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 733 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 734 "FREESPACE": lambda self: self._parse_freespace(), 735 "HEAP": lambda self: self.expression(exp.HeapProperty), 736 "IMMUTABLE": lambda self: self.expression( 737 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 738 ), 739 "INHERITS": lambda self: self.expression( 740 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 741 ), 742 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 743 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 744 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 745 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 746 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 747 "LIKE": lambda self: self._parse_create_like(), 748 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 749 "LOCK": lambda self: self._parse_locking(), 750 "LOCKING": lambda self: self._parse_locking(), 751 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 752 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 753 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 754 "MODIFIES": lambda self: self._parse_modifies_property(), 755 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 756 "NO": lambda self: self._parse_no_property(), 757 "ON": lambda self: self._parse_on_property(), 758 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 759 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 760 "PARTITION": lambda self: self._parse_partitioned_of(), 761 "PARTITION BY": lambda self: self._parse_partitioned_by(), 762 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 763 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 764 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 765 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 766 "READS": lambda self: self._parse_reads_property(), 767 "REMOTE": lambda self: self._parse_remote_with_connection(), 768 "RETURNS": lambda self: self._parse_returns(), 769 "ROW": lambda self: self._parse_row(), 770 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 771 "SAMPLE": lambda self: self.expression( 772 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 773 ), 774 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 775 "SETTINGS": lambda self: self.expression( 776 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 777 ), 778 "SORTKEY": lambda self: self._parse_sortkey(), 779 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 780 "STABLE": lambda self: self.expression( 781 exp.StabilityProperty, this=exp.Literal.string("STABLE") 782 ), 783 "STORED": lambda self: self._parse_stored(), 784 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 785 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 786 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 787 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 788 "TO": lambda self: self._parse_to_table(), 789 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 790 "TRANSFORM": lambda self: self.expression( 791 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 792 ), 793 "TTL": lambda self: self._parse_ttl(), 794 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 795 "VOLATILE": lambda self: self._parse_volatile_property(), 796 "WITH": lambda self: self._parse_with_property(), 797 } 798 799 CONSTRAINT_PARSERS = { 800 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 801 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 802 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 803 "CHARACTER SET": lambda self: self.expression( 804 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 805 ), 806 "CHECK": lambda self: self.expression( 807 exp.CheckColumnConstraint, 808 this=self._parse_wrapped(self._parse_conjunction), 809 enforced=self._match_text_seq("ENFORCED"), 810 ), 811 "COLLATE": lambda self: self.expression( 812 exp.CollateColumnConstraint, this=self._parse_var() 813 ), 814 "COMMENT": lambda self: self.expression( 815 exp.CommentColumnConstraint, this=self._parse_string() 816 ), 817 "COMPRESS": lambda self: self._parse_compress(), 818 "CLUSTERED": lambda self: self.expression( 819 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 820 ), 821 "NONCLUSTERED": lambda self: self.expression( 822 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 823 ), 824 "DEFAULT": lambda self: self.expression( 825 exp.DefaultColumnConstraint, this=self._parse_bitwise() 826 ), 827 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 828 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 829 "FORMAT": lambda self: self.expression( 830 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 831 ), 832 "GENERATED": lambda self: self._parse_generated_as_identity(), 833 "IDENTITY": lambda self: self._parse_auto_increment(), 834 "INLINE": lambda self: self._parse_inline(), 835 "LIKE": lambda self: self._parse_create_like(), 836 "NOT": lambda self: self._parse_not_constraint(), 837 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 838 "ON": lambda self: ( 839 self._match(TokenType.UPDATE) 840 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 841 ) 842 or self.expression(exp.OnProperty, this=self._parse_id_var()), 843 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 844 "PERIOD": lambda self: self._parse_period_for_system_time(), 845 "PRIMARY KEY": lambda self: self._parse_primary_key(), 846 "REFERENCES": lambda self: self._parse_references(match=False), 847 "TITLE": lambda self: self.expression( 848 exp.TitleColumnConstraint, this=self._parse_var_or_string() 849 ), 850 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 851 "UNIQUE": lambda self: self._parse_unique(), 852 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 853 "WITH": lambda self: self.expression( 854 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 855 ), 856 } 857 858 ALTER_PARSERS = { 859 "ADD": lambda self: self._parse_alter_table_add(), 860 "ALTER": lambda self: self._parse_alter_table_alter(), 861 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 862 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 863 "DROP": lambda self: self._parse_alter_table_drop(), 864 "RENAME": lambda self: self._parse_alter_table_rename(), 865 } 866 867 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 868 869 NO_PAREN_FUNCTION_PARSERS = { 870 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 871 "CASE": lambda self: self._parse_case(), 872 "IF": lambda self: self._parse_if(), 873 "NEXT": lambda self: self._parse_next_value_for(), 874 } 875 876 INVALID_FUNC_NAME_TOKENS = { 877 TokenType.IDENTIFIER, 878 TokenType.STRING, 879 } 880 881 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 882 883 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 884 885 FUNCTION_PARSERS = { 886 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 887 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 888 "DECODE": lambda self: self._parse_decode(), 889 "EXTRACT": lambda self: self._parse_extract(), 890 "JSON_OBJECT": lambda self: self._parse_json_object(), 891 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 892 "JSON_TABLE": lambda self: self._parse_json_table(), 893 "MATCH": lambda self: self._parse_match_against(), 894 "OPENJSON": lambda self: self._parse_open_json(), 895 "POSITION": lambda self: self._parse_position(), 896 "PREDICT": lambda self: self._parse_predict(), 897 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 898 "STRING_AGG": lambda self: self._parse_string_agg(), 899 "SUBSTRING": lambda self: self._parse_substring(), 900 "TRIM": lambda self: self._parse_trim(), 901 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 902 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 903 } 904 905 QUERY_MODIFIER_PARSERS = { 906 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 907 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 908 TokenType.WHERE: lambda self: ("where", self._parse_where()), 909 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 910 TokenType.HAVING: lambda self: ("having", self._parse_having()), 911 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 912 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 913 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 914 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 915 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 916 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 917 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 918 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 919 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 920 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 921 TokenType.CLUSTER_BY: lambda self: ( 922 "cluster", 923 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 924 ), 925 TokenType.DISTRIBUTE_BY: lambda self: ( 926 "distribute", 927 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 928 ), 929 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 930 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 931 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 932 } 933 934 SET_PARSERS = { 935 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 936 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 937 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 938 "TRANSACTION": lambda self: self._parse_set_transaction(), 939 } 940 941 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 942 943 TYPE_LITERAL_PARSERS = { 944 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 945 } 946 947 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 948 949 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 950 951 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 952 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 953 "ISOLATION": ( 954 ("LEVEL", "REPEATABLE", "READ"), 955 ("LEVEL", "READ", "COMMITTED"), 956 ("LEVEL", "READ", "UNCOMITTED"), 957 ("LEVEL", "SERIALIZABLE"), 958 ), 959 "READ": ("WRITE", "ONLY"), 960 } 961 962 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 963 964 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 965 966 CLONE_KEYWORDS = {"CLONE", "COPY"} 967 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 968 969 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 970 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 971 972 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 973 974 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 975 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 976 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 977 978 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 979 980 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 981 982 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 983 984 DISTINCT_TOKENS = {TokenType.DISTINCT} 985 986 NULL_TOKENS = {TokenType.NULL} 987 988 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 989 990 STRICT_CAST = True 991 992 PREFIXED_PIVOT_COLUMNS = False 993 IDENTIFY_PIVOT_STRINGS = False 994 995 LOG_DEFAULTS_TO_LN = False 996 997 # Whether ADD is present for each column added by ALTER TABLE 998 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 999 1000 # Whether the table sample clause expects CSV syntax 1001 TABLESAMPLE_CSV = False 1002 1003 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1004 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1005 1006 # Whether the TRIM function expects the characters to trim as its first argument 1007 TRIM_PATTERN_FIRST = False 1008 1009 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1010 STRING_ALIASES = False 1011 1012 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1013 MODIFIERS_ATTACHED_TO_UNION = True 1014 UNION_MODIFIERS = {"order", "limit", "offset"} 1015 1016 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1017 NO_PAREN_IF_COMMANDS = True 1018 1019 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1020 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1021 1022 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1023 # If this is True and '(' is not found, the keyword will be treated as an identifier 1024 VALUES_FOLLOWED_BY_PAREN = True 1025 1026 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1027 SUPPORTS_IMPLICIT_UNNEST = False 1028 1029 __slots__ = ( 1030 "error_level", 1031 "error_message_context", 1032 "max_errors", 1033 "dialect", 1034 "sql", 1035 "errors", 1036 "_tokens", 1037 "_index", 1038 "_curr", 1039 "_next", 1040 "_prev", 1041 "_prev_comments", 1042 ) 1043 1044 # Autofilled 1045 SHOW_TRIE: t.Dict = {} 1046 SET_TRIE: t.Dict = {} 1047 1048 def __init__( 1049 self, 1050 error_level: t.Optional[ErrorLevel] = None, 1051 error_message_context: int = 100, 1052 max_errors: int = 3, 1053 dialect: DialectType = None, 1054 ): 1055 from sqlglot.dialects import Dialect 1056 1057 self.error_level = error_level or ErrorLevel.IMMEDIATE 1058 self.error_message_context = error_message_context 1059 self.max_errors = max_errors 1060 self.dialect = Dialect.get_or_raise(dialect) 1061 self.reset() 1062 1063 def reset(self): 1064 self.sql = "" 1065 self.errors = [] 1066 self._tokens = [] 1067 self._index = 0 1068 self._curr = None 1069 self._next = None 1070 self._prev = None 1071 self._prev_comments = None 1072 1073 def parse( 1074 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1075 ) -> t.List[t.Optional[exp.Expression]]: 1076 """ 1077 Parses a list of tokens and returns a list of syntax trees, one tree 1078 per parsed SQL statement. 1079 1080 Args: 1081 raw_tokens: The list of tokens. 1082 sql: The original SQL string, used to produce helpful debug messages. 1083 1084 Returns: 1085 The list of the produced syntax trees. 1086 """ 1087 return self._parse( 1088 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1089 ) 1090 1091 def parse_into( 1092 self, 1093 expression_types: exp.IntoType, 1094 raw_tokens: t.List[Token], 1095 sql: t.Optional[str] = None, 1096 ) -> t.List[t.Optional[exp.Expression]]: 1097 """ 1098 Parses a list of tokens into a given Expression type. If a collection of Expression 1099 types is given instead, this method will try to parse the token list into each one 1100 of them, stopping at the first for which the parsing succeeds. 1101 1102 Args: 1103 expression_types: The expression type(s) to try and parse the token list into. 1104 raw_tokens: The list of tokens. 1105 sql: The original SQL string, used to produce helpful debug messages. 1106 1107 Returns: 1108 The target Expression. 1109 """ 1110 errors = [] 1111 for expression_type in ensure_list(expression_types): 1112 parser = self.EXPRESSION_PARSERS.get(expression_type) 1113 if not parser: 1114 raise TypeError(f"No parser registered for {expression_type}") 1115 1116 try: 1117 return self._parse(parser, raw_tokens, sql) 1118 except ParseError as e: 1119 e.errors[0]["into_expression"] = expression_type 1120 errors.append(e) 1121 1122 raise ParseError( 1123 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1124 errors=merge_errors(errors), 1125 ) from errors[-1] 1126 1127 def _parse( 1128 self, 1129 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1130 raw_tokens: t.List[Token], 1131 sql: t.Optional[str] = None, 1132 ) -> t.List[t.Optional[exp.Expression]]: 1133 self.reset() 1134 self.sql = sql or "" 1135 1136 total = len(raw_tokens) 1137 chunks: t.List[t.List[Token]] = [[]] 1138 1139 for i, token in enumerate(raw_tokens): 1140 if token.token_type == TokenType.SEMICOLON: 1141 if i < total - 1: 1142 chunks.append([]) 1143 else: 1144 chunks[-1].append(token) 1145 1146 expressions = [] 1147 1148 for tokens in chunks: 1149 self._index = -1 1150 self._tokens = tokens 1151 self._advance() 1152 1153 expressions.append(parse_method(self)) 1154 1155 if self._index < len(self._tokens): 1156 self.raise_error("Invalid expression / Unexpected token") 1157 1158 self.check_errors() 1159 1160 return expressions 1161 1162 def check_errors(self) -> None: 1163 """Logs or raises any found errors, depending on the chosen error level setting.""" 1164 if self.error_level == ErrorLevel.WARN: 1165 for error in self.errors: 1166 logger.error(str(error)) 1167 elif self.error_level == ErrorLevel.RAISE and self.errors: 1168 raise ParseError( 1169 concat_messages(self.errors, self.max_errors), 1170 errors=merge_errors(self.errors), 1171 ) 1172 1173 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1174 """ 1175 Appends an error in the list of recorded errors or raises it, depending on the chosen 1176 error level setting. 1177 """ 1178 token = token or self._curr or self._prev or Token.string("") 1179 start = token.start 1180 end = token.end + 1 1181 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1182 highlight = self.sql[start:end] 1183 end_context = self.sql[end : end + self.error_message_context] 1184 1185 error = ParseError.new( 1186 f"{message}. Line {token.line}, Col: {token.col}.\n" 1187 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1188 description=message, 1189 line=token.line, 1190 col=token.col, 1191 start_context=start_context, 1192 highlight=highlight, 1193 end_context=end_context, 1194 ) 1195 1196 if self.error_level == ErrorLevel.IMMEDIATE: 1197 raise error 1198 1199 self.errors.append(error) 1200 1201 def expression( 1202 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1203 ) -> E: 1204 """ 1205 Creates a new, validated Expression. 1206 1207 Args: 1208 exp_class: The expression class to instantiate. 1209 comments: An optional list of comments to attach to the expression. 1210 kwargs: The arguments to set for the expression along with their respective values. 1211 1212 Returns: 1213 The target expression. 1214 """ 1215 instance = exp_class(**kwargs) 1216 instance.add_comments(comments) if comments else self._add_comments(instance) 1217 return self.validate_expression(instance) 1218 1219 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1220 if expression and self._prev_comments: 1221 expression.add_comments(self._prev_comments) 1222 self._prev_comments = None 1223 1224 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1225 """ 1226 Validates an Expression, making sure that all its mandatory arguments are set. 1227 1228 Args: 1229 expression: The expression to validate. 1230 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1231 1232 Returns: 1233 The validated expression. 1234 """ 1235 if self.error_level != ErrorLevel.IGNORE: 1236 for error_message in expression.error_messages(args): 1237 self.raise_error(error_message) 1238 1239 return expression 1240 1241 def _find_sql(self, start: Token, end: Token) -> str: 1242 return self.sql[start.start : end.end + 1] 1243 1244 def _is_connected(self) -> bool: 1245 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1246 1247 def _advance(self, times: int = 1) -> None: 1248 self._index += times 1249 self._curr = seq_get(self._tokens, self._index) 1250 self._next = seq_get(self._tokens, self._index + 1) 1251 1252 if self._index > 0: 1253 self._prev = self._tokens[self._index - 1] 1254 self._prev_comments = self._prev.comments 1255 else: 1256 self._prev = None 1257 self._prev_comments = None 1258 1259 def _retreat(self, index: int) -> None: 1260 if index != self._index: 1261 self._advance(index - self._index) 1262 1263 def _warn_unsupported(self) -> None: 1264 if len(self._tokens) <= 1: 1265 return 1266 1267 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1268 # interested in emitting a warning for the one being currently processed. 1269 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1270 1271 logger.warning( 1272 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1273 ) 1274 1275 def _parse_command(self) -> exp.Command: 1276 self._warn_unsupported() 1277 return self.expression( 1278 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1279 ) 1280 1281 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1282 start = self._prev 1283 exists = self._parse_exists() if allow_exists else None 1284 1285 self._match(TokenType.ON) 1286 1287 kind = self._match_set(self.CREATABLES) and self._prev 1288 if not kind: 1289 return self._parse_as_command(start) 1290 1291 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1292 this = self._parse_user_defined_function(kind=kind.token_type) 1293 elif kind.token_type == TokenType.TABLE: 1294 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1295 elif kind.token_type == TokenType.COLUMN: 1296 this = self._parse_column() 1297 else: 1298 this = self._parse_id_var() 1299 1300 self._match(TokenType.IS) 1301 1302 return self.expression( 1303 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1304 ) 1305 1306 def _parse_to_table( 1307 self, 1308 ) -> exp.ToTableProperty: 1309 table = self._parse_table_parts(schema=True) 1310 return self.expression(exp.ToTableProperty, this=table) 1311 1312 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1313 def _parse_ttl(self) -> exp.Expression: 1314 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1315 this = self._parse_bitwise() 1316 1317 if self._match_text_seq("DELETE"): 1318 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1319 if self._match_text_seq("RECOMPRESS"): 1320 return self.expression( 1321 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1322 ) 1323 if self._match_text_seq("TO", "DISK"): 1324 return self.expression( 1325 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1326 ) 1327 if self._match_text_seq("TO", "VOLUME"): 1328 return self.expression( 1329 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1330 ) 1331 1332 return this 1333 1334 expressions = self._parse_csv(_parse_ttl_action) 1335 where = self._parse_where() 1336 group = self._parse_group() 1337 1338 aggregates = None 1339 if group and self._match(TokenType.SET): 1340 aggregates = self._parse_csv(self._parse_set_item) 1341 1342 return self.expression( 1343 exp.MergeTreeTTL, 1344 expressions=expressions, 1345 where=where, 1346 group=group, 1347 aggregates=aggregates, 1348 ) 1349 1350 def _parse_statement(self) -> t.Optional[exp.Expression]: 1351 if self._curr is None: 1352 return None 1353 1354 if self._match_set(self.STATEMENT_PARSERS): 1355 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1356 1357 if self._match_set(Tokenizer.COMMANDS): 1358 return self._parse_command() 1359 1360 expression = self._parse_expression() 1361 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1362 return self._parse_query_modifiers(expression) 1363 1364 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1365 start = self._prev 1366 temporary = self._match(TokenType.TEMPORARY) 1367 materialized = self._match_text_seq("MATERIALIZED") 1368 1369 kind = self._match_set(self.CREATABLES) and self._prev.text 1370 if not kind: 1371 return self._parse_as_command(start) 1372 1373 return self.expression( 1374 exp.Drop, 1375 comments=start.comments, 1376 exists=exists or self._parse_exists(), 1377 this=self._parse_table( 1378 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1379 ), 1380 kind=kind, 1381 temporary=temporary, 1382 materialized=materialized, 1383 cascade=self._match_text_seq("CASCADE"), 1384 constraints=self._match_text_seq("CONSTRAINTS"), 1385 purge=self._match_text_seq("PURGE"), 1386 ) 1387 1388 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1389 return ( 1390 self._match_text_seq("IF") 1391 and (not not_ or self._match(TokenType.NOT)) 1392 and self._match(TokenType.EXISTS) 1393 ) 1394 1395 def _parse_create(self) -> exp.Create | exp.Command: 1396 # Note: this can't be None because we've matched a statement parser 1397 start = self._prev 1398 comments = self._prev_comments 1399 1400 replace = ( 1401 start.token_type == TokenType.REPLACE 1402 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1403 or self._match_pair(TokenType.OR, TokenType.ALTER) 1404 ) 1405 unique = self._match(TokenType.UNIQUE) 1406 1407 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1408 self._advance() 1409 1410 properties = None 1411 create_token = self._match_set(self.CREATABLES) and self._prev 1412 1413 if not create_token: 1414 # exp.Properties.Location.POST_CREATE 1415 properties = self._parse_properties() 1416 create_token = self._match_set(self.CREATABLES) and self._prev 1417 1418 if not properties or not create_token: 1419 return self._parse_as_command(start) 1420 1421 exists = self._parse_exists(not_=True) 1422 this = None 1423 expression: t.Optional[exp.Expression] = None 1424 indexes = None 1425 no_schema_binding = None 1426 begin = None 1427 end = None 1428 clone = None 1429 1430 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1431 nonlocal properties 1432 if properties and temp_props: 1433 properties.expressions.extend(temp_props.expressions) 1434 elif temp_props: 1435 properties = temp_props 1436 1437 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1438 this = self._parse_user_defined_function(kind=create_token.token_type) 1439 1440 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1441 extend_props(self._parse_properties()) 1442 1443 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1444 1445 if not expression: 1446 if self._match(TokenType.COMMAND): 1447 expression = self._parse_as_command(self._prev) 1448 else: 1449 begin = self._match(TokenType.BEGIN) 1450 return_ = self._match_text_seq("RETURN") 1451 1452 if self._match(TokenType.STRING, advance=False): 1453 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1454 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1455 expression = self._parse_string() 1456 extend_props(self._parse_properties()) 1457 else: 1458 expression = self._parse_statement() 1459 1460 end = self._match_text_seq("END") 1461 1462 if return_: 1463 expression = self.expression(exp.Return, this=expression) 1464 elif create_token.token_type == TokenType.INDEX: 1465 this = self._parse_index(index=self._parse_id_var()) 1466 elif create_token.token_type in self.DB_CREATABLES: 1467 table_parts = self._parse_table_parts( 1468 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1469 ) 1470 1471 # exp.Properties.Location.POST_NAME 1472 self._match(TokenType.COMMA) 1473 extend_props(self._parse_properties(before=True)) 1474 1475 this = self._parse_schema(this=table_parts) 1476 1477 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1478 extend_props(self._parse_properties()) 1479 1480 self._match(TokenType.ALIAS) 1481 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1482 # exp.Properties.Location.POST_ALIAS 1483 extend_props(self._parse_properties()) 1484 1485 expression = self._parse_ddl_select() 1486 1487 if create_token.token_type == TokenType.TABLE: 1488 # exp.Properties.Location.POST_EXPRESSION 1489 extend_props(self._parse_properties()) 1490 1491 indexes = [] 1492 while True: 1493 index = self._parse_index() 1494 1495 # exp.Properties.Location.POST_INDEX 1496 extend_props(self._parse_properties()) 1497 1498 if not index: 1499 break 1500 else: 1501 self._match(TokenType.COMMA) 1502 indexes.append(index) 1503 elif create_token.token_type == TokenType.VIEW: 1504 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1505 no_schema_binding = True 1506 1507 shallow = self._match_text_seq("SHALLOW") 1508 1509 if self._match_texts(self.CLONE_KEYWORDS): 1510 copy = self._prev.text.lower() == "copy" 1511 clone = self.expression( 1512 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1513 ) 1514 1515 if self._curr: 1516 return self._parse_as_command(start) 1517 1518 return self.expression( 1519 exp.Create, 1520 comments=comments, 1521 this=this, 1522 kind=create_token.text.upper(), 1523 replace=replace, 1524 unique=unique, 1525 expression=expression, 1526 exists=exists, 1527 properties=properties, 1528 indexes=indexes, 1529 no_schema_binding=no_schema_binding, 1530 begin=begin, 1531 end=end, 1532 clone=clone, 1533 ) 1534 1535 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1536 # only used for teradata currently 1537 self._match(TokenType.COMMA) 1538 1539 kwargs = { 1540 "no": self._match_text_seq("NO"), 1541 "dual": self._match_text_seq("DUAL"), 1542 "before": self._match_text_seq("BEFORE"), 1543 "default": self._match_text_seq("DEFAULT"), 1544 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1545 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1546 "after": self._match_text_seq("AFTER"), 1547 "minimum": self._match_texts(("MIN", "MINIMUM")), 1548 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1549 } 1550 1551 if self._match_texts(self.PROPERTY_PARSERS): 1552 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1553 try: 1554 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1555 except TypeError: 1556 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1557 1558 return None 1559 1560 def _parse_property(self) -> t.Optional[exp.Expression]: 1561 if self._match_texts(self.PROPERTY_PARSERS): 1562 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1563 1564 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1565 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1566 1567 if self._match_text_seq("COMPOUND", "SORTKEY"): 1568 return self._parse_sortkey(compound=True) 1569 1570 if self._match_text_seq("SQL", "SECURITY"): 1571 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1572 1573 index = self._index 1574 key = self._parse_column() 1575 1576 if not self._match(TokenType.EQ): 1577 self._retreat(index) 1578 return None 1579 1580 return self.expression( 1581 exp.Property, 1582 this=key.to_dot() if isinstance(key, exp.Column) else key, 1583 value=self._parse_column() or self._parse_var(any_token=True), 1584 ) 1585 1586 def _parse_stored(self) -> exp.FileFormatProperty: 1587 self._match(TokenType.ALIAS) 1588 1589 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1590 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1591 1592 return self.expression( 1593 exp.FileFormatProperty, 1594 this=( 1595 self.expression( 1596 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1597 ) 1598 if input_format or output_format 1599 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1600 ), 1601 ) 1602 1603 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1604 self._match(TokenType.EQ) 1605 self._match(TokenType.ALIAS) 1606 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1607 1608 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1609 properties = [] 1610 while True: 1611 if before: 1612 prop = self._parse_property_before() 1613 else: 1614 prop = self._parse_property() 1615 1616 if not prop: 1617 break 1618 for p in ensure_list(prop): 1619 properties.append(p) 1620 1621 if properties: 1622 return self.expression(exp.Properties, expressions=properties) 1623 1624 return None 1625 1626 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1627 return self.expression( 1628 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1629 ) 1630 1631 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1632 if self._index >= 2: 1633 pre_volatile_token = self._tokens[self._index - 2] 1634 else: 1635 pre_volatile_token = None 1636 1637 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1638 return exp.VolatileProperty() 1639 1640 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1641 1642 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1643 self._match_pair(TokenType.EQ, TokenType.ON) 1644 1645 prop = self.expression(exp.WithSystemVersioningProperty) 1646 if self._match(TokenType.L_PAREN): 1647 self._match_text_seq("HISTORY_TABLE", "=") 1648 prop.set("this", self._parse_table_parts()) 1649 1650 if self._match(TokenType.COMMA): 1651 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1652 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1653 1654 self._match_r_paren() 1655 1656 return prop 1657 1658 def _parse_with_property( 1659 self, 1660 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1661 if self._match(TokenType.L_PAREN, advance=False): 1662 return self._parse_wrapped_csv(self._parse_property) 1663 1664 if self._match_text_seq("JOURNAL"): 1665 return self._parse_withjournaltable() 1666 1667 if self._match_text_seq("DATA"): 1668 return self._parse_withdata(no=False) 1669 elif self._match_text_seq("NO", "DATA"): 1670 return self._parse_withdata(no=True) 1671 1672 if not self._next: 1673 return None 1674 1675 return self._parse_withisolatedloading() 1676 1677 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1678 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1679 self._match(TokenType.EQ) 1680 1681 user = self._parse_id_var() 1682 self._match(TokenType.PARAMETER) 1683 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1684 1685 if not user or not host: 1686 return None 1687 1688 return exp.DefinerProperty(this=f"{user}@{host}") 1689 1690 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1691 self._match(TokenType.TABLE) 1692 self._match(TokenType.EQ) 1693 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1694 1695 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1696 return self.expression(exp.LogProperty, no=no) 1697 1698 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1699 return self.expression(exp.JournalProperty, **kwargs) 1700 1701 def _parse_checksum(self) -> exp.ChecksumProperty: 1702 self._match(TokenType.EQ) 1703 1704 on = None 1705 if self._match(TokenType.ON): 1706 on = True 1707 elif self._match_text_seq("OFF"): 1708 on = False 1709 1710 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1711 1712 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1713 return self.expression( 1714 exp.Cluster, 1715 expressions=( 1716 self._parse_wrapped_csv(self._parse_ordered) 1717 if wrapped 1718 else self._parse_csv(self._parse_ordered) 1719 ), 1720 ) 1721 1722 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1723 self._match_text_seq("BY") 1724 1725 self._match_l_paren() 1726 expressions = self._parse_csv(self._parse_column) 1727 self._match_r_paren() 1728 1729 if self._match_text_seq("SORTED", "BY"): 1730 self._match_l_paren() 1731 sorted_by = self._parse_csv(self._parse_ordered) 1732 self._match_r_paren() 1733 else: 1734 sorted_by = None 1735 1736 self._match(TokenType.INTO) 1737 buckets = self._parse_number() 1738 self._match_text_seq("BUCKETS") 1739 1740 return self.expression( 1741 exp.ClusteredByProperty, 1742 expressions=expressions, 1743 sorted_by=sorted_by, 1744 buckets=buckets, 1745 ) 1746 1747 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1748 if not self._match_text_seq("GRANTS"): 1749 self._retreat(self._index - 1) 1750 return None 1751 1752 return self.expression(exp.CopyGrantsProperty) 1753 1754 def _parse_freespace(self) -> exp.FreespaceProperty: 1755 self._match(TokenType.EQ) 1756 return self.expression( 1757 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1758 ) 1759 1760 def _parse_mergeblockratio( 1761 self, no: bool = False, default: bool = False 1762 ) -> exp.MergeBlockRatioProperty: 1763 if self._match(TokenType.EQ): 1764 return self.expression( 1765 exp.MergeBlockRatioProperty, 1766 this=self._parse_number(), 1767 percent=self._match(TokenType.PERCENT), 1768 ) 1769 1770 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1771 1772 def _parse_datablocksize( 1773 self, 1774 default: t.Optional[bool] = None, 1775 minimum: t.Optional[bool] = None, 1776 maximum: t.Optional[bool] = None, 1777 ) -> exp.DataBlocksizeProperty: 1778 self._match(TokenType.EQ) 1779 size = self._parse_number() 1780 1781 units = None 1782 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1783 units = self._prev.text 1784 1785 return self.expression( 1786 exp.DataBlocksizeProperty, 1787 size=size, 1788 units=units, 1789 default=default, 1790 minimum=minimum, 1791 maximum=maximum, 1792 ) 1793 1794 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1795 self._match(TokenType.EQ) 1796 always = self._match_text_seq("ALWAYS") 1797 manual = self._match_text_seq("MANUAL") 1798 never = self._match_text_seq("NEVER") 1799 default = self._match_text_seq("DEFAULT") 1800 1801 autotemp = None 1802 if self._match_text_seq("AUTOTEMP"): 1803 autotemp = self._parse_schema() 1804 1805 return self.expression( 1806 exp.BlockCompressionProperty, 1807 always=always, 1808 manual=manual, 1809 never=never, 1810 default=default, 1811 autotemp=autotemp, 1812 ) 1813 1814 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1815 no = self._match_text_seq("NO") 1816 concurrent = self._match_text_seq("CONCURRENT") 1817 self._match_text_seq("ISOLATED", "LOADING") 1818 for_all = self._match_text_seq("FOR", "ALL") 1819 for_insert = self._match_text_seq("FOR", "INSERT") 1820 for_none = self._match_text_seq("FOR", "NONE") 1821 return self.expression( 1822 exp.IsolatedLoadingProperty, 1823 no=no, 1824 concurrent=concurrent, 1825 for_all=for_all, 1826 for_insert=for_insert, 1827 for_none=for_none, 1828 ) 1829 1830 def _parse_locking(self) -> exp.LockingProperty: 1831 if self._match(TokenType.TABLE): 1832 kind = "TABLE" 1833 elif self._match(TokenType.VIEW): 1834 kind = "VIEW" 1835 elif self._match(TokenType.ROW): 1836 kind = "ROW" 1837 elif self._match_text_seq("DATABASE"): 1838 kind = "DATABASE" 1839 else: 1840 kind = None 1841 1842 if kind in ("DATABASE", "TABLE", "VIEW"): 1843 this = self._parse_table_parts() 1844 else: 1845 this = None 1846 1847 if self._match(TokenType.FOR): 1848 for_or_in = "FOR" 1849 elif self._match(TokenType.IN): 1850 for_or_in = "IN" 1851 else: 1852 for_or_in = None 1853 1854 if self._match_text_seq("ACCESS"): 1855 lock_type = "ACCESS" 1856 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1857 lock_type = "EXCLUSIVE" 1858 elif self._match_text_seq("SHARE"): 1859 lock_type = "SHARE" 1860 elif self._match_text_seq("READ"): 1861 lock_type = "READ" 1862 elif self._match_text_seq("WRITE"): 1863 lock_type = "WRITE" 1864 elif self._match_text_seq("CHECKSUM"): 1865 lock_type = "CHECKSUM" 1866 else: 1867 lock_type = None 1868 1869 override = self._match_text_seq("OVERRIDE") 1870 1871 return self.expression( 1872 exp.LockingProperty, 1873 this=this, 1874 kind=kind, 1875 for_or_in=for_or_in, 1876 lock_type=lock_type, 1877 override=override, 1878 ) 1879 1880 def _parse_partition_by(self) -> t.List[exp.Expression]: 1881 if self._match(TokenType.PARTITION_BY): 1882 return self._parse_csv(self._parse_conjunction) 1883 return [] 1884 1885 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1886 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1887 if self._match_text_seq("MINVALUE"): 1888 return exp.var("MINVALUE") 1889 if self._match_text_seq("MAXVALUE"): 1890 return exp.var("MAXVALUE") 1891 return self._parse_bitwise() 1892 1893 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1894 expression = None 1895 from_expressions = None 1896 to_expressions = None 1897 1898 if self._match(TokenType.IN): 1899 this = self._parse_wrapped_csv(self._parse_bitwise) 1900 elif self._match(TokenType.FROM): 1901 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1902 self._match_text_seq("TO") 1903 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1904 elif self._match_text_seq("WITH", "(", "MODULUS"): 1905 this = self._parse_number() 1906 self._match_text_seq(",", "REMAINDER") 1907 expression = self._parse_number() 1908 self._match_r_paren() 1909 else: 1910 self.raise_error("Failed to parse partition bound spec.") 1911 1912 return self.expression( 1913 exp.PartitionBoundSpec, 1914 this=this, 1915 expression=expression, 1916 from_expressions=from_expressions, 1917 to_expressions=to_expressions, 1918 ) 1919 1920 # https://www.postgresql.org/docs/current/sql-createtable.html 1921 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1922 if not self._match_text_seq("OF"): 1923 self._retreat(self._index - 1) 1924 return None 1925 1926 this = self._parse_table(schema=True) 1927 1928 if self._match(TokenType.DEFAULT): 1929 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1930 elif self._match_text_seq("FOR", "VALUES"): 1931 expression = self._parse_partition_bound_spec() 1932 else: 1933 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1934 1935 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1936 1937 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1938 self._match(TokenType.EQ) 1939 return self.expression( 1940 exp.PartitionedByProperty, 1941 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1942 ) 1943 1944 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1945 if self._match_text_seq("AND", "STATISTICS"): 1946 statistics = True 1947 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1948 statistics = False 1949 else: 1950 statistics = None 1951 1952 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1953 1954 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1955 if self._match_text_seq("SQL"): 1956 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1957 return None 1958 1959 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1960 if self._match_text_seq("SQL", "DATA"): 1961 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1962 return None 1963 1964 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1965 if self._match_text_seq("PRIMARY", "INDEX"): 1966 return exp.NoPrimaryIndexProperty() 1967 if self._match_text_seq("SQL"): 1968 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1969 return None 1970 1971 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1972 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1973 return exp.OnCommitProperty() 1974 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1975 return exp.OnCommitProperty(delete=True) 1976 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1977 1978 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1979 if self._match_text_seq("SQL", "DATA"): 1980 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1981 return None 1982 1983 def _parse_distkey(self) -> exp.DistKeyProperty: 1984 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1985 1986 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1987 table = self._parse_table(schema=True) 1988 1989 options = [] 1990 while self._match_texts(("INCLUDING", "EXCLUDING")): 1991 this = self._prev.text.upper() 1992 1993 id_var = self._parse_id_var() 1994 if not id_var: 1995 return None 1996 1997 options.append( 1998 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1999 ) 2000 2001 return self.expression(exp.LikeProperty, this=table, expressions=options) 2002 2003 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2004 return self.expression( 2005 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2006 ) 2007 2008 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2009 self._match(TokenType.EQ) 2010 return self.expression( 2011 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2012 ) 2013 2014 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2015 self._match_text_seq("WITH", "CONNECTION") 2016 return self.expression( 2017 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2018 ) 2019 2020 def _parse_returns(self) -> exp.ReturnsProperty: 2021 value: t.Optional[exp.Expression] 2022 is_table = self._match(TokenType.TABLE) 2023 2024 if is_table: 2025 if self._match(TokenType.LT): 2026 value = self.expression( 2027 exp.Schema, 2028 this="TABLE", 2029 expressions=self._parse_csv(self._parse_struct_types), 2030 ) 2031 if not self._match(TokenType.GT): 2032 self.raise_error("Expecting >") 2033 else: 2034 value = self._parse_schema(exp.var("TABLE")) 2035 else: 2036 value = self._parse_types() 2037 2038 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2039 2040 def _parse_describe(self) -> exp.Describe: 2041 kind = self._match_set(self.CREATABLES) and self._prev.text 2042 extended = self._match_text_seq("EXTENDED") 2043 this = self._parse_table(schema=True) 2044 properties = self._parse_properties() 2045 expressions = properties.expressions if properties else None 2046 return self.expression( 2047 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2048 ) 2049 2050 def _parse_insert(self) -> exp.Insert: 2051 comments = ensure_list(self._prev_comments) 2052 overwrite = self._match(TokenType.OVERWRITE) 2053 ignore = self._match(TokenType.IGNORE) 2054 local = self._match_text_seq("LOCAL") 2055 alternative = None 2056 2057 if self._match_text_seq("DIRECTORY"): 2058 this: t.Optional[exp.Expression] = self.expression( 2059 exp.Directory, 2060 this=self._parse_var_or_string(), 2061 local=local, 2062 row_format=self._parse_row_format(match_row=True), 2063 ) 2064 else: 2065 if self._match(TokenType.OR): 2066 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2067 2068 self._match(TokenType.INTO) 2069 comments += ensure_list(self._prev_comments) 2070 self._match(TokenType.TABLE) 2071 this = self._parse_table(schema=True) 2072 2073 returning = self._parse_returning() 2074 2075 return self.expression( 2076 exp.Insert, 2077 comments=comments, 2078 this=this, 2079 by_name=self._match_text_seq("BY", "NAME"), 2080 exists=self._parse_exists(), 2081 partition=self._parse_partition(), 2082 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2083 and self._parse_conjunction(), 2084 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2085 conflict=self._parse_on_conflict(), 2086 returning=returning or self._parse_returning(), 2087 overwrite=overwrite, 2088 alternative=alternative, 2089 ignore=ignore, 2090 ) 2091 2092 def _parse_kill(self) -> exp.Kill: 2093 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2094 2095 return self.expression( 2096 exp.Kill, 2097 this=self._parse_primary(), 2098 kind=kind, 2099 ) 2100 2101 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2102 conflict = self._match_text_seq("ON", "CONFLICT") 2103 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2104 2105 if not conflict and not duplicate: 2106 return None 2107 2108 nothing = None 2109 expressions = None 2110 key = None 2111 constraint = None 2112 2113 if conflict: 2114 if self._match_text_seq("ON", "CONSTRAINT"): 2115 constraint = self._parse_id_var() 2116 else: 2117 key = self._parse_csv(self._parse_value) 2118 2119 self._match_text_seq("DO") 2120 if self._match_text_seq("NOTHING"): 2121 nothing = True 2122 else: 2123 self._match(TokenType.UPDATE) 2124 self._match(TokenType.SET) 2125 expressions = self._parse_csv(self._parse_equality) 2126 2127 return self.expression( 2128 exp.OnConflict, 2129 duplicate=duplicate, 2130 expressions=expressions, 2131 nothing=nothing, 2132 key=key, 2133 constraint=constraint, 2134 ) 2135 2136 def _parse_returning(self) -> t.Optional[exp.Returning]: 2137 if not self._match(TokenType.RETURNING): 2138 return None 2139 return self.expression( 2140 exp.Returning, 2141 expressions=self._parse_csv(self._parse_expression), 2142 into=self._match(TokenType.INTO) and self._parse_table_part(), 2143 ) 2144 2145 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2146 if not self._match(TokenType.FORMAT): 2147 return None 2148 return self._parse_row_format() 2149 2150 def _parse_row_format( 2151 self, match_row: bool = False 2152 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2153 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2154 return None 2155 2156 if self._match_text_seq("SERDE"): 2157 this = self._parse_string() 2158 2159 serde_properties = None 2160 if self._match(TokenType.SERDE_PROPERTIES): 2161 serde_properties = self.expression( 2162 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2163 ) 2164 2165 return self.expression( 2166 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2167 ) 2168 2169 self._match_text_seq("DELIMITED") 2170 2171 kwargs = {} 2172 2173 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2174 kwargs["fields"] = self._parse_string() 2175 if self._match_text_seq("ESCAPED", "BY"): 2176 kwargs["escaped"] = self._parse_string() 2177 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2178 kwargs["collection_items"] = self._parse_string() 2179 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2180 kwargs["map_keys"] = self._parse_string() 2181 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2182 kwargs["lines"] = self._parse_string() 2183 if self._match_text_seq("NULL", "DEFINED", "AS"): 2184 kwargs["null"] = self._parse_string() 2185 2186 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2187 2188 def _parse_load(self) -> exp.LoadData | exp.Command: 2189 if self._match_text_seq("DATA"): 2190 local = self._match_text_seq("LOCAL") 2191 self._match_text_seq("INPATH") 2192 inpath = self._parse_string() 2193 overwrite = self._match(TokenType.OVERWRITE) 2194 self._match_pair(TokenType.INTO, TokenType.TABLE) 2195 2196 return self.expression( 2197 exp.LoadData, 2198 this=self._parse_table(schema=True), 2199 local=local, 2200 overwrite=overwrite, 2201 inpath=inpath, 2202 partition=self._parse_partition(), 2203 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2204 serde=self._match_text_seq("SERDE") and self._parse_string(), 2205 ) 2206 return self._parse_as_command(self._prev) 2207 2208 def _parse_delete(self) -> exp.Delete: 2209 # This handles MySQL's "Multiple-Table Syntax" 2210 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2211 tables = None 2212 comments = self._prev_comments 2213 if not self._match(TokenType.FROM, advance=False): 2214 tables = self._parse_csv(self._parse_table) or None 2215 2216 returning = self._parse_returning() 2217 2218 return self.expression( 2219 exp.Delete, 2220 comments=comments, 2221 tables=tables, 2222 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2223 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2224 where=self._parse_where(), 2225 returning=returning or self._parse_returning(), 2226 limit=self._parse_limit(), 2227 ) 2228 2229 def _parse_update(self) -> exp.Update: 2230 comments = self._prev_comments 2231 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2232 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2233 returning = self._parse_returning() 2234 return self.expression( 2235 exp.Update, 2236 comments=comments, 2237 **{ # type: ignore 2238 "this": this, 2239 "expressions": expressions, 2240 "from": self._parse_from(joins=True), 2241 "where": self._parse_where(), 2242 "returning": returning or self._parse_returning(), 2243 "order": self._parse_order(), 2244 "limit": self._parse_limit(), 2245 }, 2246 ) 2247 2248 def _parse_uncache(self) -> exp.Uncache: 2249 if not self._match(TokenType.TABLE): 2250 self.raise_error("Expecting TABLE after UNCACHE") 2251 2252 return self.expression( 2253 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2254 ) 2255 2256 def _parse_cache(self) -> exp.Cache: 2257 lazy = self._match_text_seq("LAZY") 2258 self._match(TokenType.TABLE) 2259 table = self._parse_table(schema=True) 2260 2261 options = [] 2262 if self._match_text_seq("OPTIONS"): 2263 self._match_l_paren() 2264 k = self._parse_string() 2265 self._match(TokenType.EQ) 2266 v = self._parse_string() 2267 options = [k, v] 2268 self._match_r_paren() 2269 2270 self._match(TokenType.ALIAS) 2271 return self.expression( 2272 exp.Cache, 2273 this=table, 2274 lazy=lazy, 2275 options=options, 2276 expression=self._parse_select(nested=True), 2277 ) 2278 2279 def _parse_partition(self) -> t.Optional[exp.Partition]: 2280 if not self._match(TokenType.PARTITION): 2281 return None 2282 2283 return self.expression( 2284 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2285 ) 2286 2287 def _parse_value(self) -> exp.Tuple: 2288 if self._match(TokenType.L_PAREN): 2289 expressions = self._parse_csv(self._parse_expression) 2290 self._match_r_paren() 2291 return self.expression(exp.Tuple, expressions=expressions) 2292 2293 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2294 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2295 2296 def _parse_projections(self) -> t.List[exp.Expression]: 2297 return self._parse_expressions() 2298 2299 def _parse_select( 2300 self, 2301 nested: bool = False, 2302 table: bool = False, 2303 parse_subquery_alias: bool = True, 2304 parse_set_operation: bool = True, 2305 ) -> t.Optional[exp.Expression]: 2306 cte = self._parse_with() 2307 2308 if cte: 2309 this = self._parse_statement() 2310 2311 if not this: 2312 self.raise_error("Failed to parse any statement following CTE") 2313 return cte 2314 2315 if "with" in this.arg_types: 2316 this.set("with", cte) 2317 else: 2318 self.raise_error(f"{this.key} does not support CTE") 2319 this = cte 2320 2321 return this 2322 2323 # duckdb supports leading with FROM x 2324 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2325 2326 if self._match(TokenType.SELECT): 2327 comments = self._prev_comments 2328 2329 hint = self._parse_hint() 2330 all_ = self._match(TokenType.ALL) 2331 distinct = self._match_set(self.DISTINCT_TOKENS) 2332 2333 kind = ( 2334 self._match(TokenType.ALIAS) 2335 and self._match_texts(("STRUCT", "VALUE")) 2336 and self._prev.text.upper() 2337 ) 2338 2339 if distinct: 2340 distinct = self.expression( 2341 exp.Distinct, 2342 on=self._parse_value() if self._match(TokenType.ON) else None, 2343 ) 2344 2345 if all_ and distinct: 2346 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2347 2348 limit = self._parse_limit(top=True) 2349 projections = self._parse_projections() 2350 2351 this = self.expression( 2352 exp.Select, 2353 kind=kind, 2354 hint=hint, 2355 distinct=distinct, 2356 expressions=projections, 2357 limit=limit, 2358 ) 2359 this.comments = comments 2360 2361 into = self._parse_into() 2362 if into: 2363 this.set("into", into) 2364 2365 if not from_: 2366 from_ = self._parse_from() 2367 2368 if from_: 2369 this.set("from", from_) 2370 2371 this = self._parse_query_modifiers(this) 2372 elif (table or nested) and self._match(TokenType.L_PAREN): 2373 if self._match(TokenType.PIVOT): 2374 this = self._parse_simplified_pivot() 2375 elif self._match(TokenType.FROM): 2376 this = exp.select("*").from_( 2377 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2378 ) 2379 else: 2380 this = ( 2381 self._parse_table() 2382 if table 2383 else self._parse_select(nested=True, parse_set_operation=False) 2384 ) 2385 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2386 2387 self._match_r_paren() 2388 2389 # We return early here so that the UNION isn't attached to the subquery by the 2390 # following call to _parse_set_operations, but instead becomes the parent node 2391 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2392 elif self._match(TokenType.VALUES, advance=False): 2393 this = self._parse_derived_table_values() 2394 elif from_: 2395 this = exp.select("*").from_(from_.this, copy=False) 2396 else: 2397 this = None 2398 2399 if parse_set_operation: 2400 return self._parse_set_operations(this) 2401 return this 2402 2403 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2404 if not skip_with_token and not self._match(TokenType.WITH): 2405 return None 2406 2407 comments = self._prev_comments 2408 recursive = self._match(TokenType.RECURSIVE) 2409 2410 expressions = [] 2411 while True: 2412 expressions.append(self._parse_cte()) 2413 2414 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2415 break 2416 else: 2417 self._match(TokenType.WITH) 2418 2419 return self.expression( 2420 exp.With, comments=comments, expressions=expressions, recursive=recursive 2421 ) 2422 2423 def _parse_cte(self) -> exp.CTE: 2424 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2425 if not alias or not alias.this: 2426 self.raise_error("Expected CTE to have alias") 2427 2428 self._match(TokenType.ALIAS) 2429 return self.expression( 2430 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2431 ) 2432 2433 def _parse_table_alias( 2434 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2435 ) -> t.Optional[exp.TableAlias]: 2436 any_token = self._match(TokenType.ALIAS) 2437 alias = ( 2438 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2439 or self._parse_string_as_identifier() 2440 ) 2441 2442 index = self._index 2443 if self._match(TokenType.L_PAREN): 2444 columns = self._parse_csv(self._parse_function_parameter) 2445 self._match_r_paren() if columns else self._retreat(index) 2446 else: 2447 columns = None 2448 2449 if not alias and not columns: 2450 return None 2451 2452 return self.expression(exp.TableAlias, this=alias, columns=columns) 2453 2454 def _parse_subquery( 2455 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2456 ) -> t.Optional[exp.Subquery]: 2457 if not this: 2458 return None 2459 2460 return self.expression( 2461 exp.Subquery, 2462 this=this, 2463 pivots=self._parse_pivots(), 2464 alias=self._parse_table_alias() if parse_alias else None, 2465 ) 2466 2467 def _implicit_unnests_to_explicit(self, this: E) -> E: 2468 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2469 2470 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2471 for i, join in enumerate(this.args.get("joins") or []): 2472 table = join.this 2473 normalized_table = table.copy() 2474 normalized_table.meta["maybe_column"] = True 2475 normalized_table = _norm(normalized_table, dialect=self.dialect) 2476 2477 if isinstance(table, exp.Table) and not join.args.get("on"): 2478 if normalized_table.parts[0].name in refs: 2479 table_as_column = table.to_column() 2480 unnest = exp.Unnest(expressions=[table_as_column]) 2481 2482 # Table.to_column creates a parent Alias node that we want to convert to 2483 # a TableAlias and attach to the Unnest, so it matches the parser's output 2484 if isinstance(table.args.get("alias"), exp.TableAlias): 2485 table_as_column.replace(table_as_column.this) 2486 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2487 2488 table.replace(unnest) 2489 2490 refs.add(normalized_table.alias_or_name) 2491 2492 return this 2493 2494 def _parse_query_modifiers( 2495 self, this: t.Optional[exp.Expression] 2496 ) -> t.Optional[exp.Expression]: 2497 if isinstance(this, (exp.Query, exp.Table)): 2498 for join in iter(self._parse_join, None): 2499 this.append("joins", join) 2500 for lateral in iter(self._parse_lateral, None): 2501 this.append("laterals", lateral) 2502 2503 while True: 2504 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2505 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2506 key, expression = parser(self) 2507 2508 if expression: 2509 this.set(key, expression) 2510 if key == "limit": 2511 offset = expression.args.pop("offset", None) 2512 2513 if offset: 2514 offset = exp.Offset(expression=offset) 2515 this.set("offset", offset) 2516 2517 limit_by_expressions = expression.expressions 2518 expression.set("expressions", None) 2519 offset.set("expressions", limit_by_expressions) 2520 continue 2521 break 2522 2523 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2524 this = self._implicit_unnests_to_explicit(this) 2525 2526 return this 2527 2528 def _parse_hint(self) -> t.Optional[exp.Hint]: 2529 if self._match(TokenType.HINT): 2530 hints = [] 2531 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2532 hints.extend(hint) 2533 2534 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2535 self.raise_error("Expected */ after HINT") 2536 2537 return self.expression(exp.Hint, expressions=hints) 2538 2539 return None 2540 2541 def _parse_into(self) -> t.Optional[exp.Into]: 2542 if not self._match(TokenType.INTO): 2543 return None 2544 2545 temp = self._match(TokenType.TEMPORARY) 2546 unlogged = self._match_text_seq("UNLOGGED") 2547 self._match(TokenType.TABLE) 2548 2549 return self.expression( 2550 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2551 ) 2552 2553 def _parse_from( 2554 self, joins: bool = False, skip_from_token: bool = False 2555 ) -> t.Optional[exp.From]: 2556 if not skip_from_token and not self._match(TokenType.FROM): 2557 return None 2558 2559 return self.expression( 2560 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2561 ) 2562 2563 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2564 if not self._match(TokenType.MATCH_RECOGNIZE): 2565 return None 2566 2567 self._match_l_paren() 2568 2569 partition = self._parse_partition_by() 2570 order = self._parse_order() 2571 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2572 2573 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2574 rows = exp.var("ONE ROW PER MATCH") 2575 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2576 text = "ALL ROWS PER MATCH" 2577 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2578 text += " SHOW EMPTY MATCHES" 2579 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2580 text += " OMIT EMPTY MATCHES" 2581 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2582 text += " WITH UNMATCHED ROWS" 2583 rows = exp.var(text) 2584 else: 2585 rows = None 2586 2587 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2588 text = "AFTER MATCH SKIP" 2589 if self._match_text_seq("PAST", "LAST", "ROW"): 2590 text += " PAST LAST ROW" 2591 elif self._match_text_seq("TO", "NEXT", "ROW"): 2592 text += " TO NEXT ROW" 2593 elif self._match_text_seq("TO", "FIRST"): 2594 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2595 elif self._match_text_seq("TO", "LAST"): 2596 text += f" TO LAST {self._advance_any().text}" # type: ignore 2597 after = exp.var(text) 2598 else: 2599 after = None 2600 2601 if self._match_text_seq("PATTERN"): 2602 self._match_l_paren() 2603 2604 if not self._curr: 2605 self.raise_error("Expecting )", self._curr) 2606 2607 paren = 1 2608 start = self._curr 2609 2610 while self._curr and paren > 0: 2611 if self._curr.token_type == TokenType.L_PAREN: 2612 paren += 1 2613 if self._curr.token_type == TokenType.R_PAREN: 2614 paren -= 1 2615 2616 end = self._prev 2617 self._advance() 2618 2619 if paren > 0: 2620 self.raise_error("Expecting )", self._curr) 2621 2622 pattern = exp.var(self._find_sql(start, end)) 2623 else: 2624 pattern = None 2625 2626 define = ( 2627 self._parse_csv(self._parse_name_as_expression) 2628 if self._match_text_seq("DEFINE") 2629 else None 2630 ) 2631 2632 self._match_r_paren() 2633 2634 return self.expression( 2635 exp.MatchRecognize, 2636 partition_by=partition, 2637 order=order, 2638 measures=measures, 2639 rows=rows, 2640 after=after, 2641 pattern=pattern, 2642 define=define, 2643 alias=self._parse_table_alias(), 2644 ) 2645 2646 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2647 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2648 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2649 cross_apply = False 2650 2651 if cross_apply is not None: 2652 this = self._parse_select(table=True) 2653 view = None 2654 outer = None 2655 elif self._match(TokenType.LATERAL): 2656 this = self._parse_select(table=True) 2657 view = self._match(TokenType.VIEW) 2658 outer = self._match(TokenType.OUTER) 2659 else: 2660 return None 2661 2662 if not this: 2663 this = ( 2664 self._parse_unnest() 2665 or self._parse_function() 2666 or self._parse_id_var(any_token=False) 2667 ) 2668 2669 while self._match(TokenType.DOT): 2670 this = exp.Dot( 2671 this=this, 2672 expression=self._parse_function() or self._parse_id_var(any_token=False), 2673 ) 2674 2675 if view: 2676 table = self._parse_id_var(any_token=False) 2677 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2678 table_alias: t.Optional[exp.TableAlias] = self.expression( 2679 exp.TableAlias, this=table, columns=columns 2680 ) 2681 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2682 # We move the alias from the lateral's child node to the lateral itself 2683 table_alias = this.args["alias"].pop() 2684 else: 2685 table_alias = self._parse_table_alias() 2686 2687 return self.expression( 2688 exp.Lateral, 2689 this=this, 2690 view=view, 2691 outer=outer, 2692 alias=table_alias, 2693 cross_apply=cross_apply, 2694 ) 2695 2696 def _parse_join_parts( 2697 self, 2698 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2699 return ( 2700 self._match_set(self.JOIN_METHODS) and self._prev, 2701 self._match_set(self.JOIN_SIDES) and self._prev, 2702 self._match_set(self.JOIN_KINDS) and self._prev, 2703 ) 2704 2705 def _parse_join( 2706 self, skip_join_token: bool = False, parse_bracket: bool = False 2707 ) -> t.Optional[exp.Join]: 2708 if self._match(TokenType.COMMA): 2709 return self.expression(exp.Join, this=self._parse_table()) 2710 2711 index = self._index 2712 method, side, kind = self._parse_join_parts() 2713 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2714 join = self._match(TokenType.JOIN) 2715 2716 if not skip_join_token and not join: 2717 self._retreat(index) 2718 kind = None 2719 method = None 2720 side = None 2721 2722 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2723 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2724 2725 if not skip_join_token and not join and not outer_apply and not cross_apply: 2726 return None 2727 2728 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2729 2730 if method: 2731 kwargs["method"] = method.text 2732 if side: 2733 kwargs["side"] = side.text 2734 if kind: 2735 kwargs["kind"] = kind.text 2736 if hint: 2737 kwargs["hint"] = hint 2738 2739 if self._match(TokenType.ON): 2740 kwargs["on"] = self._parse_conjunction() 2741 elif self._match(TokenType.USING): 2742 kwargs["using"] = self._parse_wrapped_id_vars() 2743 elif not (kind and kind.token_type == TokenType.CROSS): 2744 index = self._index 2745 join = self._parse_join() 2746 2747 if join and self._match(TokenType.ON): 2748 kwargs["on"] = self._parse_conjunction() 2749 elif join and self._match(TokenType.USING): 2750 kwargs["using"] = self._parse_wrapped_id_vars() 2751 else: 2752 join = None 2753 self._retreat(index) 2754 2755 kwargs["this"].set("joins", [join] if join else None) 2756 2757 comments = [c for token in (method, side, kind) if token for c in token.comments] 2758 return self.expression(exp.Join, comments=comments, **kwargs) 2759 2760 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2761 this = self._parse_conjunction() 2762 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2763 return this 2764 2765 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2766 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2767 2768 return this 2769 2770 def _parse_index( 2771 self, 2772 index: t.Optional[exp.Expression] = None, 2773 ) -> t.Optional[exp.Index]: 2774 if index: 2775 unique = None 2776 primary = None 2777 amp = None 2778 2779 self._match(TokenType.ON) 2780 self._match(TokenType.TABLE) # hive 2781 table = self._parse_table_parts(schema=True) 2782 else: 2783 unique = self._match(TokenType.UNIQUE) 2784 primary = self._match_text_seq("PRIMARY") 2785 amp = self._match_text_seq("AMP") 2786 2787 if not self._match(TokenType.INDEX): 2788 return None 2789 2790 index = self._parse_id_var() 2791 table = None 2792 2793 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2794 2795 if self._match(TokenType.L_PAREN, advance=False): 2796 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2797 else: 2798 columns = None 2799 2800 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2801 2802 return self.expression( 2803 exp.Index, 2804 this=index, 2805 table=table, 2806 using=using, 2807 columns=columns, 2808 unique=unique, 2809 primary=primary, 2810 amp=amp, 2811 include=include, 2812 partition_by=self._parse_partition_by(), 2813 where=self._parse_where(), 2814 ) 2815 2816 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2817 hints: t.List[exp.Expression] = [] 2818 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2819 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2820 hints.append( 2821 self.expression( 2822 exp.WithTableHint, 2823 expressions=self._parse_csv( 2824 lambda: self._parse_function() or self._parse_var(any_token=True) 2825 ), 2826 ) 2827 ) 2828 self._match_r_paren() 2829 else: 2830 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2831 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2832 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2833 2834 self._match_texts(("INDEX", "KEY")) 2835 if self._match(TokenType.FOR): 2836 hint.set("target", self._advance_any() and self._prev.text.upper()) 2837 2838 hint.set("expressions", self._parse_wrapped_id_vars()) 2839 hints.append(hint) 2840 2841 return hints or None 2842 2843 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2844 return ( 2845 (not schema and self._parse_function(optional_parens=False)) 2846 or self._parse_id_var(any_token=False) 2847 or self._parse_string_as_identifier() 2848 or self._parse_placeholder() 2849 ) 2850 2851 def _parse_table_parts( 2852 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2853 ) -> exp.Table: 2854 catalog = None 2855 db = None 2856 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2857 2858 while self._match(TokenType.DOT): 2859 if catalog: 2860 # This allows nesting the table in arbitrarily many dot expressions if needed 2861 table = self.expression( 2862 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2863 ) 2864 else: 2865 catalog = db 2866 db = table 2867 # "" used for tsql FROM a..b case 2868 table = self._parse_table_part(schema=schema) or "" 2869 2870 if ( 2871 wildcard 2872 and self._is_connected() 2873 and (isinstance(table, exp.Identifier) or not table) 2874 and self._match(TokenType.STAR) 2875 ): 2876 if isinstance(table, exp.Identifier): 2877 table.args["this"] += "*" 2878 else: 2879 table = exp.Identifier(this="*") 2880 2881 if is_db_reference: 2882 catalog = db 2883 db = table 2884 table = None 2885 2886 if not table and not is_db_reference: 2887 self.raise_error(f"Expected table name but got {self._curr}") 2888 if not db and is_db_reference: 2889 self.raise_error(f"Expected database name but got {self._curr}") 2890 2891 return self.expression( 2892 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2893 ) 2894 2895 def _parse_table( 2896 self, 2897 schema: bool = False, 2898 joins: bool = False, 2899 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2900 parse_bracket: bool = False, 2901 is_db_reference: bool = False, 2902 ) -> t.Optional[exp.Expression]: 2903 lateral = self._parse_lateral() 2904 if lateral: 2905 return lateral 2906 2907 unnest = self._parse_unnest() 2908 if unnest: 2909 return unnest 2910 2911 values = self._parse_derived_table_values() 2912 if values: 2913 return values 2914 2915 subquery = self._parse_select(table=True) 2916 if subquery: 2917 if not subquery.args.get("pivots"): 2918 subquery.set("pivots", self._parse_pivots()) 2919 return subquery 2920 2921 bracket = parse_bracket and self._parse_bracket(None) 2922 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2923 2924 only = self._match(TokenType.ONLY) 2925 2926 this = t.cast( 2927 exp.Expression, 2928 bracket 2929 or self._parse_bracket( 2930 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2931 ), 2932 ) 2933 2934 if only: 2935 this.set("only", only) 2936 2937 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 2938 self._match_text_seq("*") 2939 2940 if schema: 2941 return self._parse_schema(this=this) 2942 2943 version = self._parse_version() 2944 2945 if version: 2946 this.set("version", version) 2947 2948 if self.dialect.ALIAS_POST_TABLESAMPLE: 2949 table_sample = self._parse_table_sample() 2950 2951 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2952 if alias: 2953 this.set("alias", alias) 2954 2955 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2956 return self.expression( 2957 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2958 ) 2959 2960 this.set("hints", self._parse_table_hints()) 2961 2962 if not this.args.get("pivots"): 2963 this.set("pivots", self._parse_pivots()) 2964 2965 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2966 table_sample = self._parse_table_sample() 2967 2968 if table_sample: 2969 table_sample.set("this", this) 2970 this = table_sample 2971 2972 if joins: 2973 for join in iter(self._parse_join, None): 2974 this.append("joins", join) 2975 2976 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2977 this.set("ordinality", True) 2978 this.set("alias", self._parse_table_alias()) 2979 2980 return this 2981 2982 def _parse_version(self) -> t.Optional[exp.Version]: 2983 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2984 this = "TIMESTAMP" 2985 elif self._match(TokenType.VERSION_SNAPSHOT): 2986 this = "VERSION" 2987 else: 2988 return None 2989 2990 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2991 kind = self._prev.text.upper() 2992 start = self._parse_bitwise() 2993 self._match_texts(("TO", "AND")) 2994 end = self._parse_bitwise() 2995 expression: t.Optional[exp.Expression] = self.expression( 2996 exp.Tuple, expressions=[start, end] 2997 ) 2998 elif self._match_text_seq("CONTAINED", "IN"): 2999 kind = "CONTAINED IN" 3000 expression = self.expression( 3001 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3002 ) 3003 elif self._match(TokenType.ALL): 3004 kind = "ALL" 3005 expression = None 3006 else: 3007 self._match_text_seq("AS", "OF") 3008 kind = "AS OF" 3009 expression = self._parse_type() 3010 3011 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3012 3013 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3014 if not self._match(TokenType.UNNEST): 3015 return None 3016 3017 expressions = self._parse_wrapped_csv(self._parse_equality) 3018 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3019 3020 alias = self._parse_table_alias() if with_alias else None 3021 3022 if alias: 3023 if self.dialect.UNNEST_COLUMN_ONLY: 3024 if alias.args.get("columns"): 3025 self.raise_error("Unexpected extra column alias in unnest.") 3026 3027 alias.set("columns", [alias.this]) 3028 alias.set("this", None) 3029 3030 columns = alias.args.get("columns") or [] 3031 if offset and len(expressions) < len(columns): 3032 offset = columns.pop() 3033 3034 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3035 self._match(TokenType.ALIAS) 3036 offset = self._parse_id_var( 3037 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3038 ) or exp.to_identifier("offset") 3039 3040 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3041 3042 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3043 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3044 if not is_derived and not self._match_text_seq("VALUES"): 3045 return None 3046 3047 expressions = self._parse_csv(self._parse_value) 3048 alias = self._parse_table_alias() 3049 3050 if is_derived: 3051 self._match_r_paren() 3052 3053 return self.expression( 3054 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3055 ) 3056 3057 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3058 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3059 as_modifier and self._match_text_seq("USING", "SAMPLE") 3060 ): 3061 return None 3062 3063 bucket_numerator = None 3064 bucket_denominator = None 3065 bucket_field = None 3066 percent = None 3067 size = None 3068 seed = None 3069 3070 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3071 matched_l_paren = self._match(TokenType.L_PAREN) 3072 3073 if self.TABLESAMPLE_CSV: 3074 num = None 3075 expressions = self._parse_csv(self._parse_primary) 3076 else: 3077 expressions = None 3078 num = ( 3079 self._parse_factor() 3080 if self._match(TokenType.NUMBER, advance=False) 3081 else self._parse_primary() or self._parse_placeholder() 3082 ) 3083 3084 if self._match_text_seq("BUCKET"): 3085 bucket_numerator = self._parse_number() 3086 self._match_text_seq("OUT", "OF") 3087 bucket_denominator = bucket_denominator = self._parse_number() 3088 self._match(TokenType.ON) 3089 bucket_field = self._parse_field() 3090 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3091 percent = num 3092 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3093 size = num 3094 else: 3095 percent = num 3096 3097 if matched_l_paren: 3098 self._match_r_paren() 3099 3100 if self._match(TokenType.L_PAREN): 3101 method = self._parse_var(upper=True) 3102 seed = self._match(TokenType.COMMA) and self._parse_number() 3103 self._match_r_paren() 3104 elif self._match_texts(("SEED", "REPEATABLE")): 3105 seed = self._parse_wrapped(self._parse_number) 3106 3107 return self.expression( 3108 exp.TableSample, 3109 expressions=expressions, 3110 method=method, 3111 bucket_numerator=bucket_numerator, 3112 bucket_denominator=bucket_denominator, 3113 bucket_field=bucket_field, 3114 percent=percent, 3115 size=size, 3116 seed=seed, 3117 ) 3118 3119 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3120 return list(iter(self._parse_pivot, None)) or None 3121 3122 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3123 return list(iter(self._parse_join, None)) or None 3124 3125 # https://duckdb.org/docs/sql/statements/pivot 3126 def _parse_simplified_pivot(self) -> exp.Pivot: 3127 def _parse_on() -> t.Optional[exp.Expression]: 3128 this = self._parse_bitwise() 3129 return self._parse_in(this) if self._match(TokenType.IN) else this 3130 3131 this = self._parse_table() 3132 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3133 using = self._match(TokenType.USING) and self._parse_csv( 3134 lambda: self._parse_alias(self._parse_function()) 3135 ) 3136 group = self._parse_group() 3137 return self.expression( 3138 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3139 ) 3140 3141 def _parse_pivot_in(self) -> exp.In: 3142 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3143 this = self._parse_conjunction() 3144 3145 self._match(TokenType.ALIAS) 3146 alias = self._parse_field() 3147 if alias: 3148 return self.expression(exp.PivotAlias, this=this, alias=alias) 3149 3150 return this 3151 3152 value = self._parse_column() 3153 3154 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3155 self.raise_error("Expecting IN (") 3156 3157 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3158 3159 self._match_r_paren() 3160 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3161 3162 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3163 index = self._index 3164 include_nulls = None 3165 3166 if self._match(TokenType.PIVOT): 3167 unpivot = False 3168 elif self._match(TokenType.UNPIVOT): 3169 unpivot = True 3170 3171 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3172 if self._match_text_seq("INCLUDE", "NULLS"): 3173 include_nulls = True 3174 elif self._match_text_seq("EXCLUDE", "NULLS"): 3175 include_nulls = False 3176 else: 3177 return None 3178 3179 expressions = [] 3180 3181 if not self._match(TokenType.L_PAREN): 3182 self._retreat(index) 3183 return None 3184 3185 if unpivot: 3186 expressions = self._parse_csv(self._parse_column) 3187 else: 3188 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3189 3190 if not expressions: 3191 self.raise_error("Failed to parse PIVOT's aggregation list") 3192 3193 if not self._match(TokenType.FOR): 3194 self.raise_error("Expecting FOR") 3195 3196 field = self._parse_pivot_in() 3197 3198 self._match_r_paren() 3199 3200 pivot = self.expression( 3201 exp.Pivot, 3202 expressions=expressions, 3203 field=field, 3204 unpivot=unpivot, 3205 include_nulls=include_nulls, 3206 ) 3207 3208 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3209 pivot.set("alias", self._parse_table_alias()) 3210 3211 if not unpivot: 3212 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3213 3214 columns: t.List[exp.Expression] = [] 3215 for fld in pivot.args["field"].expressions: 3216 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3217 for name in names: 3218 if self.PREFIXED_PIVOT_COLUMNS: 3219 name = f"{name}_{field_name}" if name else field_name 3220 else: 3221 name = f"{field_name}_{name}" if name else field_name 3222 3223 columns.append(exp.to_identifier(name)) 3224 3225 pivot.set("columns", columns) 3226 3227 return pivot 3228 3229 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3230 return [agg.alias for agg in aggregations] 3231 3232 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3233 if not skip_where_token and not self._match(TokenType.PREWHERE): 3234 return None 3235 3236 return self.expression( 3237 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3238 ) 3239 3240 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3241 if not skip_where_token and not self._match(TokenType.WHERE): 3242 return None 3243 3244 return self.expression( 3245 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3246 ) 3247 3248 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3249 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3250 return None 3251 3252 elements = defaultdict(list) 3253 3254 if self._match(TokenType.ALL): 3255 return self.expression(exp.Group, all=True) 3256 3257 while True: 3258 expressions = self._parse_csv(self._parse_conjunction) 3259 if expressions: 3260 elements["expressions"].extend(expressions) 3261 3262 grouping_sets = self._parse_grouping_sets() 3263 if grouping_sets: 3264 elements["grouping_sets"].extend(grouping_sets) 3265 3266 rollup = None 3267 cube = None 3268 totals = None 3269 3270 index = self._index 3271 with_ = self._match(TokenType.WITH) 3272 if self._match(TokenType.ROLLUP): 3273 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3274 elements["rollup"].extend(ensure_list(rollup)) 3275 3276 if self._match(TokenType.CUBE): 3277 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3278 elements["cube"].extend(ensure_list(cube)) 3279 3280 if self._match_text_seq("TOTALS"): 3281 totals = True 3282 elements["totals"] = True # type: ignore 3283 3284 if not (grouping_sets or rollup or cube or totals): 3285 if with_: 3286 self._retreat(index) 3287 break 3288 3289 return self.expression(exp.Group, **elements) # type: ignore 3290 3291 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3292 if not self._match(TokenType.GROUPING_SETS): 3293 return None 3294 3295 return self._parse_wrapped_csv(self._parse_grouping_set) 3296 3297 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3298 if self._match(TokenType.L_PAREN): 3299 grouping_set = self._parse_csv(self._parse_column) 3300 self._match_r_paren() 3301 return self.expression(exp.Tuple, expressions=grouping_set) 3302 3303 return self._parse_column() 3304 3305 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3306 if not skip_having_token and not self._match(TokenType.HAVING): 3307 return None 3308 return self.expression(exp.Having, this=self._parse_conjunction()) 3309 3310 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3311 if not self._match(TokenType.QUALIFY): 3312 return None 3313 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3314 3315 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3316 if skip_start_token: 3317 start = None 3318 elif self._match(TokenType.START_WITH): 3319 start = self._parse_conjunction() 3320 else: 3321 return None 3322 3323 self._match(TokenType.CONNECT_BY) 3324 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3325 exp.Prior, this=self._parse_bitwise() 3326 ) 3327 connect = self._parse_conjunction() 3328 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3329 3330 if not start and self._match(TokenType.START_WITH): 3331 start = self._parse_conjunction() 3332 3333 return self.expression(exp.Connect, start=start, connect=connect) 3334 3335 def _parse_name_as_expression(self) -> exp.Alias: 3336 return self.expression( 3337 exp.Alias, 3338 alias=self._parse_id_var(any_token=True), 3339 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3340 ) 3341 3342 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3343 if self._match_text_seq("INTERPOLATE"): 3344 return self._parse_wrapped_csv(self._parse_name_as_expression) 3345 return None 3346 3347 def _parse_order( 3348 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3349 ) -> t.Optional[exp.Expression]: 3350 siblings = None 3351 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3352 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3353 return this 3354 3355 siblings = True 3356 3357 return self.expression( 3358 exp.Order, 3359 this=this, 3360 expressions=self._parse_csv(self._parse_ordered), 3361 interpolate=self._parse_interpolate(), 3362 siblings=siblings, 3363 ) 3364 3365 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3366 if not self._match(token): 3367 return None 3368 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3369 3370 def _parse_ordered( 3371 self, parse_method: t.Optional[t.Callable] = None 3372 ) -> t.Optional[exp.Ordered]: 3373 this = parse_method() if parse_method else self._parse_conjunction() 3374 if not this: 3375 return None 3376 3377 asc = self._match(TokenType.ASC) 3378 desc = self._match(TokenType.DESC) or (asc and False) 3379 3380 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3381 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3382 3383 nulls_first = is_nulls_first or False 3384 explicitly_null_ordered = is_nulls_first or is_nulls_last 3385 3386 if ( 3387 not explicitly_null_ordered 3388 and ( 3389 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3390 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3391 ) 3392 and self.dialect.NULL_ORDERING != "nulls_are_last" 3393 ): 3394 nulls_first = True 3395 3396 if self._match_text_seq("WITH", "FILL"): 3397 with_fill = self.expression( 3398 exp.WithFill, 3399 **{ # type: ignore 3400 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3401 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3402 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3403 }, 3404 ) 3405 else: 3406 with_fill = None 3407 3408 return self.expression( 3409 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3410 ) 3411 3412 def _parse_limit( 3413 self, this: t.Optional[exp.Expression] = None, top: bool = False 3414 ) -> t.Optional[exp.Expression]: 3415 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3416 comments = self._prev_comments 3417 if top: 3418 limit_paren = self._match(TokenType.L_PAREN) 3419 expression = self._parse_term() if limit_paren else self._parse_number() 3420 3421 if limit_paren: 3422 self._match_r_paren() 3423 else: 3424 expression = self._parse_term() 3425 3426 if self._match(TokenType.COMMA): 3427 offset = expression 3428 expression = self._parse_term() 3429 else: 3430 offset = None 3431 3432 limit_exp = self.expression( 3433 exp.Limit, 3434 this=this, 3435 expression=expression, 3436 offset=offset, 3437 comments=comments, 3438 expressions=self._parse_limit_by(), 3439 ) 3440 3441 return limit_exp 3442 3443 if self._match(TokenType.FETCH): 3444 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3445 direction = self._prev.text.upper() if direction else "FIRST" 3446 3447 count = self._parse_field(tokens=self.FETCH_TOKENS) 3448 percent = self._match(TokenType.PERCENT) 3449 3450 self._match_set((TokenType.ROW, TokenType.ROWS)) 3451 3452 only = self._match_text_seq("ONLY") 3453 with_ties = self._match_text_seq("WITH", "TIES") 3454 3455 if only and with_ties: 3456 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3457 3458 return self.expression( 3459 exp.Fetch, 3460 direction=direction, 3461 count=count, 3462 percent=percent, 3463 with_ties=with_ties, 3464 ) 3465 3466 return this 3467 3468 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3469 if not self._match(TokenType.OFFSET): 3470 return this 3471 3472 count = self._parse_term() 3473 self._match_set((TokenType.ROW, TokenType.ROWS)) 3474 3475 return self.expression( 3476 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3477 ) 3478 3479 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3480 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3481 3482 def _parse_locks(self) -> t.List[exp.Lock]: 3483 locks = [] 3484 while True: 3485 if self._match_text_seq("FOR", "UPDATE"): 3486 update = True 3487 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3488 "LOCK", "IN", "SHARE", "MODE" 3489 ): 3490 update = False 3491 else: 3492 break 3493 3494 expressions = None 3495 if self._match_text_seq("OF"): 3496 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3497 3498 wait: t.Optional[bool | exp.Expression] = None 3499 if self._match_text_seq("NOWAIT"): 3500 wait = True 3501 elif self._match_text_seq("WAIT"): 3502 wait = self._parse_primary() 3503 elif self._match_text_seq("SKIP", "LOCKED"): 3504 wait = False 3505 3506 locks.append( 3507 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3508 ) 3509 3510 return locks 3511 3512 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3513 while this and self._match_set(self.SET_OPERATIONS): 3514 token_type = self._prev.token_type 3515 3516 if token_type == TokenType.UNION: 3517 operation = exp.Union 3518 elif token_type == TokenType.EXCEPT: 3519 operation = exp.Except 3520 else: 3521 operation = exp.Intersect 3522 3523 comments = self._prev.comments 3524 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3525 by_name = self._match_text_seq("BY", "NAME") 3526 expression = self._parse_select(nested=True, parse_set_operation=False) 3527 3528 this = self.expression( 3529 operation, 3530 comments=comments, 3531 this=this, 3532 distinct=distinct, 3533 by_name=by_name, 3534 expression=expression, 3535 ) 3536 3537 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3538 expression = this.expression 3539 3540 if expression: 3541 for arg in self.UNION_MODIFIERS: 3542 expr = expression.args.get(arg) 3543 if expr: 3544 this.set(arg, expr.pop()) 3545 3546 return this 3547 3548 def _parse_expression(self) -> t.Optional[exp.Expression]: 3549 return self._parse_alias(self._parse_conjunction()) 3550 3551 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3552 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3553 3554 def _parse_equality(self) -> t.Optional[exp.Expression]: 3555 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3556 3557 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3558 return self._parse_tokens(self._parse_range, self.COMPARISON) 3559 3560 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3561 this = this or self._parse_bitwise() 3562 negate = self._match(TokenType.NOT) 3563 3564 if self._match_set(self.RANGE_PARSERS): 3565 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3566 if not expression: 3567 return this 3568 3569 this = expression 3570 elif self._match(TokenType.ISNULL): 3571 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3572 3573 # Postgres supports ISNULL and NOTNULL for conditions. 3574 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3575 if self._match(TokenType.NOTNULL): 3576 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3577 this = self.expression(exp.Not, this=this) 3578 3579 if negate: 3580 this = self.expression(exp.Not, this=this) 3581 3582 if self._match(TokenType.IS): 3583 this = self._parse_is(this) 3584 3585 return this 3586 3587 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3588 index = self._index - 1 3589 negate = self._match(TokenType.NOT) 3590 3591 if self._match_text_seq("DISTINCT", "FROM"): 3592 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3593 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3594 3595 expression = self._parse_null() or self._parse_boolean() 3596 if not expression: 3597 self._retreat(index) 3598 return None 3599 3600 this = self.expression(exp.Is, this=this, expression=expression) 3601 return self.expression(exp.Not, this=this) if negate else this 3602 3603 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3604 unnest = self._parse_unnest(with_alias=False) 3605 if unnest: 3606 this = self.expression(exp.In, this=this, unnest=unnest) 3607 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3608 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3609 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3610 3611 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3612 this = self.expression(exp.In, this=this, query=expressions[0]) 3613 else: 3614 this = self.expression(exp.In, this=this, expressions=expressions) 3615 3616 if matched_l_paren: 3617 self._match_r_paren(this) 3618 elif not self._match(TokenType.R_BRACKET, expression=this): 3619 self.raise_error("Expecting ]") 3620 else: 3621 this = self.expression(exp.In, this=this, field=self._parse_field()) 3622 3623 return this 3624 3625 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3626 low = self._parse_bitwise() 3627 self._match(TokenType.AND) 3628 high = self._parse_bitwise() 3629 return self.expression(exp.Between, this=this, low=low, high=high) 3630 3631 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3632 if not self._match(TokenType.ESCAPE): 3633 return this 3634 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3635 3636 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3637 index = self._index 3638 3639 if not self._match(TokenType.INTERVAL) and match_interval: 3640 return None 3641 3642 if self._match(TokenType.STRING, advance=False): 3643 this = self._parse_primary() 3644 else: 3645 this = self._parse_term() 3646 3647 if not this or ( 3648 isinstance(this, exp.Column) 3649 and not this.table 3650 and not this.this.quoted 3651 and this.name.upper() == "IS" 3652 ): 3653 self._retreat(index) 3654 return None 3655 3656 unit = self._parse_function() or ( 3657 not self._match(TokenType.ALIAS, advance=False) 3658 and self._parse_var(any_token=True, upper=True) 3659 ) 3660 3661 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3662 # each INTERVAL expression into this canonical form so it's easy to transpile 3663 if this and this.is_number: 3664 this = exp.Literal.string(this.name) 3665 elif this and this.is_string: 3666 parts = this.name.split() 3667 3668 if len(parts) == 2: 3669 if unit: 3670 # This is not actually a unit, it's something else (e.g. a "window side") 3671 unit = None 3672 self._retreat(self._index - 1) 3673 3674 this = exp.Literal.string(parts[0]) 3675 unit = self.expression(exp.Var, this=parts[1].upper()) 3676 3677 return self.expression(exp.Interval, this=this, unit=unit) 3678 3679 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3680 this = self._parse_term() 3681 3682 while True: 3683 if self._match_set(self.BITWISE): 3684 this = self.expression( 3685 self.BITWISE[self._prev.token_type], 3686 this=this, 3687 expression=self._parse_term(), 3688 ) 3689 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3690 this = self.expression( 3691 exp.DPipe, 3692 this=this, 3693 expression=self._parse_term(), 3694 safe=not self.dialect.STRICT_STRING_CONCAT, 3695 ) 3696 elif self._match(TokenType.DQMARK): 3697 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3698 elif self._match_pair(TokenType.LT, TokenType.LT): 3699 this = self.expression( 3700 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3701 ) 3702 elif self._match_pair(TokenType.GT, TokenType.GT): 3703 this = self.expression( 3704 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3705 ) 3706 else: 3707 break 3708 3709 return this 3710 3711 def _parse_term(self) -> t.Optional[exp.Expression]: 3712 return self._parse_tokens(self._parse_factor, self.TERM) 3713 3714 def _parse_factor(self) -> t.Optional[exp.Expression]: 3715 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3716 this = parse_method() 3717 3718 while self._match_set(self.FACTOR): 3719 this = self.expression( 3720 self.FACTOR[self._prev.token_type], 3721 this=this, 3722 comments=self._prev_comments, 3723 expression=parse_method(), 3724 ) 3725 if isinstance(this, exp.Div): 3726 this.args["typed"] = self.dialect.TYPED_DIVISION 3727 this.args["safe"] = self.dialect.SAFE_DIVISION 3728 3729 return this 3730 3731 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3732 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3733 3734 def _parse_unary(self) -> t.Optional[exp.Expression]: 3735 if self._match_set(self.UNARY_PARSERS): 3736 return self.UNARY_PARSERS[self._prev.token_type](self) 3737 return self._parse_at_time_zone(self._parse_type()) 3738 3739 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3740 interval = parse_interval and self._parse_interval() 3741 if interval: 3742 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3743 while True: 3744 index = self._index 3745 self._match(TokenType.PLUS) 3746 3747 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3748 self._retreat(index) 3749 break 3750 3751 interval = self.expression( # type: ignore 3752 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3753 ) 3754 3755 return interval 3756 3757 index = self._index 3758 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3759 this = self._parse_column() 3760 3761 if data_type: 3762 if isinstance(this, exp.Literal): 3763 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3764 if parser: 3765 return parser(self, this, data_type) 3766 return self.expression(exp.Cast, this=this, to=data_type) 3767 if not data_type.expressions: 3768 self._retreat(index) 3769 return self._parse_column() 3770 return self._parse_column_ops(data_type) 3771 3772 return this and self._parse_column_ops(this) 3773 3774 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3775 this = self._parse_type() 3776 if not this: 3777 return None 3778 3779 return self.expression( 3780 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3781 ) 3782 3783 def _parse_types( 3784 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3785 ) -> t.Optional[exp.Expression]: 3786 index = self._index 3787 3788 prefix = self._match_text_seq("SYSUDTLIB", ".") 3789 3790 if not self._match_set(self.TYPE_TOKENS): 3791 identifier = allow_identifiers and self._parse_id_var( 3792 any_token=False, tokens=(TokenType.VAR,) 3793 ) 3794 if identifier: 3795 tokens = self.dialect.tokenize(identifier.name) 3796 3797 if len(tokens) != 1: 3798 self.raise_error("Unexpected identifier", self._prev) 3799 3800 if tokens[0].token_type in self.TYPE_TOKENS: 3801 self._prev = tokens[0] 3802 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3803 type_name = identifier.name 3804 3805 while self._match(TokenType.DOT): 3806 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3807 3808 return exp.DataType.build(type_name, udt=True) 3809 else: 3810 self._retreat(self._index - 1) 3811 return None 3812 else: 3813 return None 3814 3815 type_token = self._prev.token_type 3816 3817 if type_token == TokenType.PSEUDO_TYPE: 3818 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3819 3820 if type_token == TokenType.OBJECT_IDENTIFIER: 3821 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3822 3823 nested = type_token in self.NESTED_TYPE_TOKENS 3824 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3825 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3826 expressions = None 3827 maybe_func = False 3828 3829 if self._match(TokenType.L_PAREN): 3830 if is_struct: 3831 expressions = self._parse_csv(self._parse_struct_types) 3832 elif nested: 3833 expressions = self._parse_csv( 3834 lambda: self._parse_types( 3835 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3836 ) 3837 ) 3838 elif type_token in self.ENUM_TYPE_TOKENS: 3839 expressions = self._parse_csv(self._parse_equality) 3840 elif is_aggregate: 3841 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3842 any_token=False, tokens=(TokenType.VAR,) 3843 ) 3844 if not func_or_ident or not self._match(TokenType.COMMA): 3845 return None 3846 expressions = self._parse_csv( 3847 lambda: self._parse_types( 3848 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3849 ) 3850 ) 3851 expressions.insert(0, func_or_ident) 3852 else: 3853 expressions = self._parse_csv(self._parse_type_size) 3854 3855 if not expressions or not self._match(TokenType.R_PAREN): 3856 self._retreat(index) 3857 return None 3858 3859 maybe_func = True 3860 3861 this: t.Optional[exp.Expression] = None 3862 values: t.Optional[t.List[exp.Expression]] = None 3863 3864 if nested and self._match(TokenType.LT): 3865 if is_struct: 3866 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3867 else: 3868 expressions = self._parse_csv( 3869 lambda: self._parse_types( 3870 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3871 ) 3872 ) 3873 3874 if not self._match(TokenType.GT): 3875 self.raise_error("Expecting >") 3876 3877 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3878 values = self._parse_csv(self._parse_conjunction) 3879 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3880 3881 if type_token in self.TIMESTAMPS: 3882 if self._match_text_seq("WITH", "TIME", "ZONE"): 3883 maybe_func = False 3884 tz_type = ( 3885 exp.DataType.Type.TIMETZ 3886 if type_token in self.TIMES 3887 else exp.DataType.Type.TIMESTAMPTZ 3888 ) 3889 this = exp.DataType(this=tz_type, expressions=expressions) 3890 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3891 maybe_func = False 3892 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3893 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3894 maybe_func = False 3895 elif type_token == TokenType.INTERVAL: 3896 unit = self._parse_var() 3897 3898 if self._match_text_seq("TO"): 3899 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3900 else: 3901 span = None 3902 3903 if span or not unit: 3904 this = self.expression( 3905 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3906 ) 3907 else: 3908 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3909 3910 if maybe_func and check_func: 3911 index2 = self._index 3912 peek = self._parse_string() 3913 3914 if not peek: 3915 self._retreat(index) 3916 return None 3917 3918 self._retreat(index2) 3919 3920 if not this: 3921 if self._match_text_seq("UNSIGNED"): 3922 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3923 if not unsigned_type_token: 3924 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3925 3926 type_token = unsigned_type_token or type_token 3927 3928 this = exp.DataType( 3929 this=exp.DataType.Type[type_token.value], 3930 expressions=expressions, 3931 nested=nested, 3932 values=values, 3933 prefix=prefix, 3934 ) 3935 3936 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3937 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3938 3939 return this 3940 3941 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3942 index = self._index 3943 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3944 self._match(TokenType.COLON) 3945 column_def = self._parse_column_def(this) 3946 3947 if type_required and ( 3948 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3949 ): 3950 self._retreat(index) 3951 return self._parse_types() 3952 3953 return column_def 3954 3955 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3956 if not self._match_text_seq("AT", "TIME", "ZONE"): 3957 return this 3958 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3959 3960 def _parse_column(self) -> t.Optional[exp.Expression]: 3961 this = self._parse_column_reference() 3962 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3963 3964 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3965 this = self._parse_field() 3966 if ( 3967 not this 3968 and self._match(TokenType.VALUES, advance=False) 3969 and self.VALUES_FOLLOWED_BY_PAREN 3970 and (not self._next or self._next.token_type != TokenType.L_PAREN) 3971 ): 3972 this = self._parse_id_var() 3973 3974 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 3975 3976 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3977 this = self._parse_bracket(this) 3978 3979 while self._match_set(self.COLUMN_OPERATORS): 3980 op_token = self._prev.token_type 3981 op = self.COLUMN_OPERATORS.get(op_token) 3982 3983 if op_token == TokenType.DCOLON: 3984 field = self._parse_types() 3985 if not field: 3986 self.raise_error("Expected type") 3987 elif op and self._curr: 3988 field = self._parse_column_reference() 3989 else: 3990 field = self._parse_field(anonymous_func=True, any_token=True) 3991 3992 if isinstance(field, exp.Func): 3993 # bigquery allows function calls like x.y.count(...) 3994 # SAFE.SUBSTR(...) 3995 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3996 this = self._replace_columns_with_dots(this) 3997 3998 if op: 3999 this = op(self, this, field) 4000 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4001 this = self.expression( 4002 exp.Column, 4003 this=field, 4004 table=this.this, 4005 db=this.args.get("table"), 4006 catalog=this.args.get("db"), 4007 ) 4008 else: 4009 this = self.expression(exp.Dot, this=this, expression=field) 4010 this = self._parse_bracket(this) 4011 return this 4012 4013 def _parse_primary(self) -> t.Optional[exp.Expression]: 4014 if self._match_set(self.PRIMARY_PARSERS): 4015 token_type = self._prev.token_type 4016 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4017 4018 if token_type == TokenType.STRING: 4019 expressions = [primary] 4020 while self._match(TokenType.STRING): 4021 expressions.append(exp.Literal.string(self._prev.text)) 4022 4023 if len(expressions) > 1: 4024 return self.expression(exp.Concat, expressions=expressions) 4025 4026 return primary 4027 4028 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4029 return exp.Literal.number(f"0.{self._prev.text}") 4030 4031 if self._match(TokenType.L_PAREN): 4032 comments = self._prev_comments 4033 query = self._parse_select() 4034 4035 if query: 4036 expressions = [query] 4037 else: 4038 expressions = self._parse_expressions() 4039 4040 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4041 4042 if isinstance(this, exp.UNWRAPPED_QUERIES): 4043 this = self._parse_set_operations( 4044 self._parse_subquery(this=this, parse_alias=False) 4045 ) 4046 elif len(expressions) > 1: 4047 this = self.expression(exp.Tuple, expressions=expressions) 4048 else: 4049 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 4050 4051 if this: 4052 this.add_comments(comments) 4053 4054 self._match_r_paren(expression=this) 4055 return this 4056 4057 return None 4058 4059 def _parse_field( 4060 self, 4061 any_token: bool = False, 4062 tokens: t.Optional[t.Collection[TokenType]] = None, 4063 anonymous_func: bool = False, 4064 ) -> t.Optional[exp.Expression]: 4065 return ( 4066 self._parse_primary() 4067 or self._parse_function(anonymous=anonymous_func) 4068 or self._parse_id_var(any_token=any_token, tokens=tokens) 4069 ) 4070 4071 def _parse_function( 4072 self, 4073 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4074 anonymous: bool = False, 4075 optional_parens: bool = True, 4076 ) -> t.Optional[exp.Expression]: 4077 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4078 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4079 fn_syntax = False 4080 if ( 4081 self._match(TokenType.L_BRACE, advance=False) 4082 and self._next 4083 and self._next.text.upper() == "FN" 4084 ): 4085 self._advance(2) 4086 fn_syntax = True 4087 4088 func = self._parse_function_call( 4089 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4090 ) 4091 4092 if fn_syntax: 4093 self._match(TokenType.R_BRACE) 4094 4095 return func 4096 4097 def _parse_function_call( 4098 self, 4099 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4100 anonymous: bool = False, 4101 optional_parens: bool = True, 4102 ) -> t.Optional[exp.Expression]: 4103 if not self._curr: 4104 return None 4105 4106 comments = self._curr.comments 4107 token_type = self._curr.token_type 4108 this = self._curr.text 4109 upper = this.upper() 4110 4111 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4112 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4113 self._advance() 4114 return parser(self) 4115 4116 if not self._next or self._next.token_type != TokenType.L_PAREN: 4117 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4118 self._advance() 4119 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4120 4121 return None 4122 4123 if token_type not in self.FUNC_TOKENS: 4124 return None 4125 4126 self._advance(2) 4127 4128 parser = self.FUNCTION_PARSERS.get(upper) 4129 if parser and not anonymous: 4130 this = parser(self) 4131 else: 4132 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4133 4134 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4135 this = self.expression(subquery_predicate, this=self._parse_select()) 4136 self._match_r_paren() 4137 return this 4138 4139 if functions is None: 4140 functions = self.FUNCTIONS 4141 4142 function = functions.get(upper) 4143 4144 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4145 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4146 4147 if alias: 4148 args = self._kv_to_prop_eq(args) 4149 4150 if function and not anonymous: 4151 if "dialect" in function.__code__.co_varnames: 4152 func = function(args, dialect=self.dialect) 4153 else: 4154 func = function(args) 4155 4156 func = self.validate_expression(func, args) 4157 if not self.dialect.NORMALIZE_FUNCTIONS: 4158 func.meta["name"] = this 4159 4160 this = func 4161 else: 4162 if token_type == TokenType.IDENTIFIER: 4163 this = exp.Identifier(this=this, quoted=True) 4164 this = self.expression(exp.Anonymous, this=this, expressions=args) 4165 4166 if isinstance(this, exp.Expression): 4167 this.add_comments(comments) 4168 4169 self._match_r_paren(this) 4170 return self._parse_window(this) 4171 4172 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4173 transformed = [] 4174 4175 for e in expressions: 4176 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4177 if isinstance(e, exp.Alias): 4178 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4179 4180 if not isinstance(e, exp.PropertyEQ): 4181 e = self.expression( 4182 exp.PropertyEQ, this=exp.to_identifier(e.name), expression=e.expression 4183 ) 4184 4185 if isinstance(e.this, exp.Column): 4186 e.this.replace(e.this.this) 4187 4188 transformed.append(e) 4189 4190 return transformed 4191 4192 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4193 return self._parse_column_def(self._parse_id_var()) 4194 4195 def _parse_user_defined_function( 4196 self, kind: t.Optional[TokenType] = None 4197 ) -> t.Optional[exp.Expression]: 4198 this = self._parse_id_var() 4199 4200 while self._match(TokenType.DOT): 4201 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4202 4203 if not self._match(TokenType.L_PAREN): 4204 return this 4205 4206 expressions = self._parse_csv(self._parse_function_parameter) 4207 self._match_r_paren() 4208 return self.expression( 4209 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4210 ) 4211 4212 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4213 literal = self._parse_primary() 4214 if literal: 4215 return self.expression(exp.Introducer, this=token.text, expression=literal) 4216 4217 return self.expression(exp.Identifier, this=token.text) 4218 4219 def _parse_session_parameter(self) -> exp.SessionParameter: 4220 kind = None 4221 this = self._parse_id_var() or self._parse_primary() 4222 4223 if this and self._match(TokenType.DOT): 4224 kind = this.name 4225 this = self._parse_var() or self._parse_primary() 4226 4227 return self.expression(exp.SessionParameter, this=this, kind=kind) 4228 4229 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4230 index = self._index 4231 4232 if self._match(TokenType.L_PAREN): 4233 expressions = t.cast( 4234 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4235 ) 4236 4237 if not self._match(TokenType.R_PAREN): 4238 self._retreat(index) 4239 else: 4240 expressions = [self._parse_id_var()] 4241 4242 if self._match_set(self.LAMBDAS): 4243 return self.LAMBDAS[self._prev.token_type](self, expressions) 4244 4245 self._retreat(index) 4246 4247 this: t.Optional[exp.Expression] 4248 4249 if self._match(TokenType.DISTINCT): 4250 this = self.expression( 4251 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4252 ) 4253 else: 4254 this = self._parse_select_or_expression(alias=alias) 4255 4256 return self._parse_limit( 4257 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4258 ) 4259 4260 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4261 index = self._index 4262 4263 if not self.errors: 4264 try: 4265 if self._parse_select(nested=True): 4266 return this 4267 except ParseError: 4268 pass 4269 finally: 4270 self.errors.clear() 4271 self._retreat(index) 4272 4273 if not self._match(TokenType.L_PAREN): 4274 return this 4275 4276 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4277 4278 self._match_r_paren() 4279 return self.expression(exp.Schema, this=this, expressions=args) 4280 4281 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4282 return self._parse_column_def(self._parse_field(any_token=True)) 4283 4284 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4285 # column defs are not really columns, they're identifiers 4286 if isinstance(this, exp.Column): 4287 this = this.this 4288 4289 kind = self._parse_types(schema=True) 4290 4291 if self._match_text_seq("FOR", "ORDINALITY"): 4292 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4293 4294 constraints: t.List[exp.Expression] = [] 4295 4296 if not kind and self._match(TokenType.ALIAS): 4297 constraints.append( 4298 self.expression( 4299 exp.ComputedColumnConstraint, 4300 this=self._parse_conjunction(), 4301 persisted=self._match_text_seq("PERSISTED"), 4302 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4303 ) 4304 ) 4305 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4306 self._match(TokenType.ALIAS) 4307 constraints.append( 4308 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4309 ) 4310 4311 while True: 4312 constraint = self._parse_column_constraint() 4313 if not constraint: 4314 break 4315 constraints.append(constraint) 4316 4317 if not kind and not constraints: 4318 return this 4319 4320 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4321 4322 def _parse_auto_increment( 4323 self, 4324 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4325 start = None 4326 increment = None 4327 4328 if self._match(TokenType.L_PAREN, advance=False): 4329 args = self._parse_wrapped_csv(self._parse_bitwise) 4330 start = seq_get(args, 0) 4331 increment = seq_get(args, 1) 4332 elif self._match_text_seq("START"): 4333 start = self._parse_bitwise() 4334 self._match_text_seq("INCREMENT") 4335 increment = self._parse_bitwise() 4336 4337 if start and increment: 4338 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4339 4340 return exp.AutoIncrementColumnConstraint() 4341 4342 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4343 if not self._match_text_seq("REFRESH"): 4344 self._retreat(self._index - 1) 4345 return None 4346 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4347 4348 def _parse_compress(self) -> exp.CompressColumnConstraint: 4349 if self._match(TokenType.L_PAREN, advance=False): 4350 return self.expression( 4351 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4352 ) 4353 4354 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4355 4356 def _parse_generated_as_identity( 4357 self, 4358 ) -> ( 4359 exp.GeneratedAsIdentityColumnConstraint 4360 | exp.ComputedColumnConstraint 4361 | exp.GeneratedAsRowColumnConstraint 4362 ): 4363 if self._match_text_seq("BY", "DEFAULT"): 4364 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4365 this = self.expression( 4366 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4367 ) 4368 else: 4369 self._match_text_seq("ALWAYS") 4370 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4371 4372 self._match(TokenType.ALIAS) 4373 4374 if self._match_text_seq("ROW"): 4375 start = self._match_text_seq("START") 4376 if not start: 4377 self._match(TokenType.END) 4378 hidden = self._match_text_seq("HIDDEN") 4379 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4380 4381 identity = self._match_text_seq("IDENTITY") 4382 4383 if self._match(TokenType.L_PAREN): 4384 if self._match(TokenType.START_WITH): 4385 this.set("start", self._parse_bitwise()) 4386 if self._match_text_seq("INCREMENT", "BY"): 4387 this.set("increment", self._parse_bitwise()) 4388 if self._match_text_seq("MINVALUE"): 4389 this.set("minvalue", self._parse_bitwise()) 4390 if self._match_text_seq("MAXVALUE"): 4391 this.set("maxvalue", self._parse_bitwise()) 4392 4393 if self._match_text_seq("CYCLE"): 4394 this.set("cycle", True) 4395 elif self._match_text_seq("NO", "CYCLE"): 4396 this.set("cycle", False) 4397 4398 if not identity: 4399 this.set("expression", self._parse_bitwise()) 4400 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4401 args = self._parse_csv(self._parse_bitwise) 4402 this.set("start", seq_get(args, 0)) 4403 this.set("increment", seq_get(args, 1)) 4404 4405 self._match_r_paren() 4406 4407 return this 4408 4409 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4410 self._match_text_seq("LENGTH") 4411 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4412 4413 def _parse_not_constraint( 4414 self, 4415 ) -> t.Optional[exp.Expression]: 4416 if self._match_text_seq("NULL"): 4417 return self.expression(exp.NotNullColumnConstraint) 4418 if self._match_text_seq("CASESPECIFIC"): 4419 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4420 if self._match_text_seq("FOR", "REPLICATION"): 4421 return self.expression(exp.NotForReplicationColumnConstraint) 4422 return None 4423 4424 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4425 if self._match(TokenType.CONSTRAINT): 4426 this = self._parse_id_var() 4427 else: 4428 this = None 4429 4430 if self._match_texts(self.CONSTRAINT_PARSERS): 4431 return self.expression( 4432 exp.ColumnConstraint, 4433 this=this, 4434 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4435 ) 4436 4437 return this 4438 4439 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4440 if not self._match(TokenType.CONSTRAINT): 4441 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4442 4443 this = self._parse_id_var() 4444 expressions = [] 4445 4446 while True: 4447 constraint = self._parse_unnamed_constraint() or self._parse_function() 4448 if not constraint: 4449 break 4450 expressions.append(constraint) 4451 4452 return self.expression(exp.Constraint, this=this, expressions=expressions) 4453 4454 def _parse_unnamed_constraint( 4455 self, constraints: t.Optional[t.Collection[str]] = None 4456 ) -> t.Optional[exp.Expression]: 4457 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4458 constraints or self.CONSTRAINT_PARSERS 4459 ): 4460 return None 4461 4462 constraint = self._prev.text.upper() 4463 if constraint not in self.CONSTRAINT_PARSERS: 4464 self.raise_error(f"No parser found for schema constraint {constraint}.") 4465 4466 return self.CONSTRAINT_PARSERS[constraint](self) 4467 4468 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4469 self._match_text_seq("KEY") 4470 return self.expression( 4471 exp.UniqueColumnConstraint, 4472 this=self._parse_schema(self._parse_id_var(any_token=False)), 4473 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4474 ) 4475 4476 def _parse_key_constraint_options(self) -> t.List[str]: 4477 options = [] 4478 while True: 4479 if not self._curr: 4480 break 4481 4482 if self._match(TokenType.ON): 4483 action = None 4484 on = self._advance_any() and self._prev.text 4485 4486 if self._match_text_seq("NO", "ACTION"): 4487 action = "NO ACTION" 4488 elif self._match_text_seq("CASCADE"): 4489 action = "CASCADE" 4490 elif self._match_text_seq("RESTRICT"): 4491 action = "RESTRICT" 4492 elif self._match_pair(TokenType.SET, TokenType.NULL): 4493 action = "SET NULL" 4494 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4495 action = "SET DEFAULT" 4496 else: 4497 self.raise_error("Invalid key constraint") 4498 4499 options.append(f"ON {on} {action}") 4500 elif self._match_text_seq("NOT", "ENFORCED"): 4501 options.append("NOT ENFORCED") 4502 elif self._match_text_seq("DEFERRABLE"): 4503 options.append("DEFERRABLE") 4504 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4505 options.append("INITIALLY DEFERRED") 4506 elif self._match_text_seq("NORELY"): 4507 options.append("NORELY") 4508 elif self._match_text_seq("MATCH", "FULL"): 4509 options.append("MATCH FULL") 4510 else: 4511 break 4512 4513 return options 4514 4515 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4516 if match and not self._match(TokenType.REFERENCES): 4517 return None 4518 4519 expressions = None 4520 this = self._parse_table(schema=True) 4521 options = self._parse_key_constraint_options() 4522 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4523 4524 def _parse_foreign_key(self) -> exp.ForeignKey: 4525 expressions = self._parse_wrapped_id_vars() 4526 reference = self._parse_references() 4527 options = {} 4528 4529 while self._match(TokenType.ON): 4530 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4531 self.raise_error("Expected DELETE or UPDATE") 4532 4533 kind = self._prev.text.lower() 4534 4535 if self._match_text_seq("NO", "ACTION"): 4536 action = "NO ACTION" 4537 elif self._match(TokenType.SET): 4538 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4539 action = "SET " + self._prev.text.upper() 4540 else: 4541 self._advance() 4542 action = self._prev.text.upper() 4543 4544 options[kind] = action 4545 4546 return self.expression( 4547 exp.ForeignKey, 4548 expressions=expressions, 4549 reference=reference, 4550 **options, # type: ignore 4551 ) 4552 4553 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4554 return self._parse_field() 4555 4556 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4557 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4558 self._retreat(self._index - 1) 4559 return None 4560 4561 id_vars = self._parse_wrapped_id_vars() 4562 return self.expression( 4563 exp.PeriodForSystemTimeConstraint, 4564 this=seq_get(id_vars, 0), 4565 expression=seq_get(id_vars, 1), 4566 ) 4567 4568 def _parse_primary_key( 4569 self, wrapped_optional: bool = False, in_props: bool = False 4570 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4571 desc = ( 4572 self._match_set((TokenType.ASC, TokenType.DESC)) 4573 and self._prev.token_type == TokenType.DESC 4574 ) 4575 4576 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4577 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4578 4579 expressions = self._parse_wrapped_csv( 4580 self._parse_primary_key_part, optional=wrapped_optional 4581 ) 4582 options = self._parse_key_constraint_options() 4583 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4584 4585 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4586 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4587 4588 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4589 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4590 return this 4591 4592 bracket_kind = self._prev.token_type 4593 expressions = self._parse_csv( 4594 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4595 ) 4596 4597 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4598 self.raise_error("Expected ]") 4599 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4600 self.raise_error("Expected }") 4601 4602 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4603 if bracket_kind == TokenType.L_BRACE: 4604 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4605 elif not this or this.name.upper() == "ARRAY": 4606 this = self.expression(exp.Array, expressions=expressions) 4607 else: 4608 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4609 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4610 4611 self._add_comments(this) 4612 return self._parse_bracket(this) 4613 4614 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4615 if self._match(TokenType.COLON): 4616 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4617 return this 4618 4619 def _parse_case(self) -> t.Optional[exp.Expression]: 4620 ifs = [] 4621 default = None 4622 4623 comments = self._prev_comments 4624 expression = self._parse_conjunction() 4625 4626 while self._match(TokenType.WHEN): 4627 this = self._parse_conjunction() 4628 self._match(TokenType.THEN) 4629 then = self._parse_conjunction() 4630 ifs.append(self.expression(exp.If, this=this, true=then)) 4631 4632 if self._match(TokenType.ELSE): 4633 default = self._parse_conjunction() 4634 4635 if not self._match(TokenType.END): 4636 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4637 default = exp.column("interval") 4638 else: 4639 self.raise_error("Expected END after CASE", self._prev) 4640 4641 return self._parse_window( 4642 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4643 ) 4644 4645 def _parse_if(self) -> t.Optional[exp.Expression]: 4646 if self._match(TokenType.L_PAREN): 4647 args = self._parse_csv(self._parse_conjunction) 4648 this = self.validate_expression(exp.If.from_arg_list(args), args) 4649 self._match_r_paren() 4650 else: 4651 index = self._index - 1 4652 4653 if self.NO_PAREN_IF_COMMANDS and index == 0: 4654 return self._parse_as_command(self._prev) 4655 4656 condition = self._parse_conjunction() 4657 4658 if not condition: 4659 self._retreat(index) 4660 return None 4661 4662 self._match(TokenType.THEN) 4663 true = self._parse_conjunction() 4664 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4665 self._match(TokenType.END) 4666 this = self.expression(exp.If, this=condition, true=true, false=false) 4667 4668 return self._parse_window(this) 4669 4670 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4671 if not self._match_text_seq("VALUE", "FOR"): 4672 self._retreat(self._index - 1) 4673 return None 4674 4675 return self.expression( 4676 exp.NextValueFor, 4677 this=self._parse_column(), 4678 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4679 ) 4680 4681 def _parse_extract(self) -> exp.Extract: 4682 this = self._parse_function() or self._parse_var() or self._parse_type() 4683 4684 if self._match(TokenType.FROM): 4685 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4686 4687 if not self._match(TokenType.COMMA): 4688 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4689 4690 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4691 4692 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4693 this = self._parse_conjunction() 4694 4695 if not self._match(TokenType.ALIAS): 4696 if self._match(TokenType.COMMA): 4697 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4698 4699 self.raise_error("Expected AS after CAST") 4700 4701 fmt = None 4702 to = self._parse_types() 4703 4704 if self._match(TokenType.FORMAT): 4705 fmt_string = self._parse_string() 4706 fmt = self._parse_at_time_zone(fmt_string) 4707 4708 if not to: 4709 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4710 if to.this in exp.DataType.TEMPORAL_TYPES: 4711 this = self.expression( 4712 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4713 this=this, 4714 format=exp.Literal.string( 4715 format_time( 4716 fmt_string.this if fmt_string else "", 4717 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4718 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4719 ) 4720 ), 4721 ) 4722 4723 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4724 this.set("zone", fmt.args["zone"]) 4725 return this 4726 elif not to: 4727 self.raise_error("Expected TYPE after CAST") 4728 elif isinstance(to, exp.Identifier): 4729 to = exp.DataType.build(to.name, udt=True) 4730 elif to.this == exp.DataType.Type.CHAR: 4731 if self._match(TokenType.CHARACTER_SET): 4732 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4733 4734 return self.expression( 4735 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4736 ) 4737 4738 def _parse_string_agg(self) -> exp.Expression: 4739 if self._match(TokenType.DISTINCT): 4740 args: t.List[t.Optional[exp.Expression]] = [ 4741 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4742 ] 4743 if self._match(TokenType.COMMA): 4744 args.extend(self._parse_csv(self._parse_conjunction)) 4745 else: 4746 args = self._parse_csv(self._parse_conjunction) # type: ignore 4747 4748 index = self._index 4749 if not self._match(TokenType.R_PAREN) and args: 4750 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4751 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4752 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4753 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4754 4755 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4756 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4757 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4758 if not self._match_text_seq("WITHIN", "GROUP"): 4759 self._retreat(index) 4760 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4761 4762 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4763 order = self._parse_order(this=seq_get(args, 0)) 4764 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4765 4766 def _parse_convert( 4767 self, strict: bool, safe: t.Optional[bool] = None 4768 ) -> t.Optional[exp.Expression]: 4769 this = self._parse_bitwise() 4770 4771 if self._match(TokenType.USING): 4772 to: t.Optional[exp.Expression] = self.expression( 4773 exp.CharacterSet, this=self._parse_var() 4774 ) 4775 elif self._match(TokenType.COMMA): 4776 to = self._parse_types() 4777 else: 4778 to = None 4779 4780 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4781 4782 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4783 """ 4784 There are generally two variants of the DECODE function: 4785 4786 - DECODE(bin, charset) 4787 - DECODE(expression, search, result [, search, result] ... [, default]) 4788 4789 The second variant will always be parsed into a CASE expression. Note that NULL 4790 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4791 instead of relying on pattern matching. 4792 """ 4793 args = self._parse_csv(self._parse_conjunction) 4794 4795 if len(args) < 3: 4796 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4797 4798 expression, *expressions = args 4799 if not expression: 4800 return None 4801 4802 ifs = [] 4803 for search, result in zip(expressions[::2], expressions[1::2]): 4804 if not search or not result: 4805 return None 4806 4807 if isinstance(search, exp.Literal): 4808 ifs.append( 4809 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4810 ) 4811 elif isinstance(search, exp.Null): 4812 ifs.append( 4813 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4814 ) 4815 else: 4816 cond = exp.or_( 4817 exp.EQ(this=expression.copy(), expression=search), 4818 exp.and_( 4819 exp.Is(this=expression.copy(), expression=exp.Null()), 4820 exp.Is(this=search.copy(), expression=exp.Null()), 4821 copy=False, 4822 ), 4823 copy=False, 4824 ) 4825 ifs.append(exp.If(this=cond, true=result)) 4826 4827 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4828 4829 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4830 self._match_text_seq("KEY") 4831 key = self._parse_column() 4832 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4833 self._match_text_seq("VALUE") 4834 value = self._parse_bitwise() 4835 4836 if not key and not value: 4837 return None 4838 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4839 4840 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4841 if not this or not self._match_text_seq("FORMAT", "JSON"): 4842 return this 4843 4844 return self.expression(exp.FormatJson, this=this) 4845 4846 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4847 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4848 for value in values: 4849 if self._match_text_seq(value, "ON", on): 4850 return f"{value} ON {on}" 4851 4852 return None 4853 4854 @t.overload 4855 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 4856 4857 @t.overload 4858 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 4859 4860 def _parse_json_object(self, agg=False): 4861 star = self._parse_star() 4862 expressions = ( 4863 [star] 4864 if star 4865 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4866 ) 4867 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4868 4869 unique_keys = None 4870 if self._match_text_seq("WITH", "UNIQUE"): 4871 unique_keys = True 4872 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4873 unique_keys = False 4874 4875 self._match_text_seq("KEYS") 4876 4877 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4878 self._parse_type() 4879 ) 4880 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4881 4882 return self.expression( 4883 exp.JSONObjectAgg if agg else exp.JSONObject, 4884 expressions=expressions, 4885 null_handling=null_handling, 4886 unique_keys=unique_keys, 4887 return_type=return_type, 4888 encoding=encoding, 4889 ) 4890 4891 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4892 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4893 if not self._match_text_seq("NESTED"): 4894 this = self._parse_id_var() 4895 kind = self._parse_types(allow_identifiers=False) 4896 nested = None 4897 else: 4898 this = None 4899 kind = None 4900 nested = True 4901 4902 path = self._match_text_seq("PATH") and self._parse_string() 4903 nested_schema = nested and self._parse_json_schema() 4904 4905 return self.expression( 4906 exp.JSONColumnDef, 4907 this=this, 4908 kind=kind, 4909 path=path, 4910 nested_schema=nested_schema, 4911 ) 4912 4913 def _parse_json_schema(self) -> exp.JSONSchema: 4914 self._match_text_seq("COLUMNS") 4915 return self.expression( 4916 exp.JSONSchema, 4917 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4918 ) 4919 4920 def _parse_json_table(self) -> exp.JSONTable: 4921 this = self._parse_format_json(self._parse_bitwise()) 4922 path = self._match(TokenType.COMMA) and self._parse_string() 4923 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4924 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4925 schema = self._parse_json_schema() 4926 4927 return exp.JSONTable( 4928 this=this, 4929 schema=schema, 4930 path=path, 4931 error_handling=error_handling, 4932 empty_handling=empty_handling, 4933 ) 4934 4935 def _parse_match_against(self) -> exp.MatchAgainst: 4936 expressions = self._parse_csv(self._parse_column) 4937 4938 self._match_text_seq(")", "AGAINST", "(") 4939 4940 this = self._parse_string() 4941 4942 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4943 modifier = "IN NATURAL LANGUAGE MODE" 4944 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4945 modifier = f"{modifier} WITH QUERY EXPANSION" 4946 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4947 modifier = "IN BOOLEAN MODE" 4948 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4949 modifier = "WITH QUERY EXPANSION" 4950 else: 4951 modifier = None 4952 4953 return self.expression( 4954 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4955 ) 4956 4957 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4958 def _parse_open_json(self) -> exp.OpenJSON: 4959 this = self._parse_bitwise() 4960 path = self._match(TokenType.COMMA) and self._parse_string() 4961 4962 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4963 this = self._parse_field(any_token=True) 4964 kind = self._parse_types() 4965 path = self._parse_string() 4966 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4967 4968 return self.expression( 4969 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4970 ) 4971 4972 expressions = None 4973 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4974 self._match_l_paren() 4975 expressions = self._parse_csv(_parse_open_json_column_def) 4976 4977 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4978 4979 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4980 args = self._parse_csv(self._parse_bitwise) 4981 4982 if self._match(TokenType.IN): 4983 return self.expression( 4984 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4985 ) 4986 4987 if haystack_first: 4988 haystack = seq_get(args, 0) 4989 needle = seq_get(args, 1) 4990 else: 4991 needle = seq_get(args, 0) 4992 haystack = seq_get(args, 1) 4993 4994 return self.expression( 4995 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4996 ) 4997 4998 def _parse_predict(self) -> exp.Predict: 4999 self._match_text_seq("MODEL") 5000 this = self._parse_table() 5001 5002 self._match(TokenType.COMMA) 5003 self._match_text_seq("TABLE") 5004 5005 return self.expression( 5006 exp.Predict, 5007 this=this, 5008 expression=self._parse_table(), 5009 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5010 ) 5011 5012 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5013 args = self._parse_csv(self._parse_table) 5014 return exp.JoinHint(this=func_name.upper(), expressions=args) 5015 5016 def _parse_substring(self) -> exp.Substring: 5017 # Postgres supports the form: substring(string [from int] [for int]) 5018 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5019 5020 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5021 5022 if self._match(TokenType.FROM): 5023 args.append(self._parse_bitwise()) 5024 if self._match(TokenType.FOR): 5025 args.append(self._parse_bitwise()) 5026 5027 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5028 5029 def _parse_trim(self) -> exp.Trim: 5030 # https://www.w3resource.com/sql/character-functions/trim.php 5031 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5032 5033 position = None 5034 collation = None 5035 expression = None 5036 5037 if self._match_texts(self.TRIM_TYPES): 5038 position = self._prev.text.upper() 5039 5040 this = self._parse_bitwise() 5041 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5042 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5043 expression = self._parse_bitwise() 5044 5045 if invert_order: 5046 this, expression = expression, this 5047 5048 if self._match(TokenType.COLLATE): 5049 collation = self._parse_bitwise() 5050 5051 return self.expression( 5052 exp.Trim, this=this, position=position, expression=expression, collation=collation 5053 ) 5054 5055 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5056 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5057 5058 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5059 return self._parse_window(self._parse_id_var(), alias=True) 5060 5061 def _parse_respect_or_ignore_nulls( 5062 self, this: t.Optional[exp.Expression] 5063 ) -> t.Optional[exp.Expression]: 5064 if self._match_text_seq("IGNORE", "NULLS"): 5065 return self.expression(exp.IgnoreNulls, this=this) 5066 if self._match_text_seq("RESPECT", "NULLS"): 5067 return self.expression(exp.RespectNulls, this=this) 5068 return this 5069 5070 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5071 if self._match(TokenType.HAVING): 5072 self._match_texts(("MAX", "MIN")) 5073 max = self._prev.text.upper() != "MIN" 5074 return self.expression( 5075 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5076 ) 5077 5078 return this 5079 5080 def _parse_window( 5081 self, this: t.Optional[exp.Expression], alias: bool = False 5082 ) -> t.Optional[exp.Expression]: 5083 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5084 self._match(TokenType.WHERE) 5085 this = self.expression( 5086 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5087 ) 5088 self._match_r_paren() 5089 5090 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5091 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5092 if self._match_text_seq("WITHIN", "GROUP"): 5093 order = self._parse_wrapped(self._parse_order) 5094 this = self.expression(exp.WithinGroup, this=this, expression=order) 5095 5096 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5097 # Some dialects choose to implement and some do not. 5098 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5099 5100 # There is some code above in _parse_lambda that handles 5101 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5102 5103 # The below changes handle 5104 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5105 5106 # Oracle allows both formats 5107 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5108 # and Snowflake chose to do the same for familiarity 5109 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5110 if isinstance(this, exp.AggFunc): 5111 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5112 5113 if ignore_respect and ignore_respect is not this: 5114 ignore_respect.replace(ignore_respect.this) 5115 this = self.expression(ignore_respect.__class__, this=this) 5116 5117 this = self._parse_respect_or_ignore_nulls(this) 5118 5119 # bigquery select from window x AS (partition by ...) 5120 if alias: 5121 over = None 5122 self._match(TokenType.ALIAS) 5123 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5124 return this 5125 else: 5126 over = self._prev.text.upper() 5127 5128 if not self._match(TokenType.L_PAREN): 5129 return self.expression( 5130 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5131 ) 5132 5133 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5134 5135 first = self._match(TokenType.FIRST) 5136 if self._match_text_seq("LAST"): 5137 first = False 5138 5139 partition, order = self._parse_partition_and_order() 5140 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5141 5142 if kind: 5143 self._match(TokenType.BETWEEN) 5144 start = self._parse_window_spec() 5145 self._match(TokenType.AND) 5146 end = self._parse_window_spec() 5147 5148 spec = self.expression( 5149 exp.WindowSpec, 5150 kind=kind, 5151 start=start["value"], 5152 start_side=start["side"], 5153 end=end["value"], 5154 end_side=end["side"], 5155 ) 5156 else: 5157 spec = None 5158 5159 self._match_r_paren() 5160 5161 window = self.expression( 5162 exp.Window, 5163 this=this, 5164 partition_by=partition, 5165 order=order, 5166 spec=spec, 5167 alias=window_alias, 5168 over=over, 5169 first=first, 5170 ) 5171 5172 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5173 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5174 return self._parse_window(window, alias=alias) 5175 5176 return window 5177 5178 def _parse_partition_and_order( 5179 self, 5180 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5181 return self._parse_partition_by(), self._parse_order() 5182 5183 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5184 self._match(TokenType.BETWEEN) 5185 5186 return { 5187 "value": ( 5188 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5189 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5190 or self._parse_bitwise() 5191 ), 5192 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5193 } 5194 5195 def _parse_alias( 5196 self, this: t.Optional[exp.Expression], explicit: bool = False 5197 ) -> t.Optional[exp.Expression]: 5198 any_token = self._match(TokenType.ALIAS) 5199 comments = self._prev_comments 5200 5201 if explicit and not any_token: 5202 return this 5203 5204 if self._match(TokenType.L_PAREN): 5205 aliases = self.expression( 5206 exp.Aliases, 5207 comments=comments, 5208 this=this, 5209 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5210 ) 5211 self._match_r_paren(aliases) 5212 return aliases 5213 5214 alias = self._parse_id_var(any_token) or ( 5215 self.STRING_ALIASES and self._parse_string_as_identifier() 5216 ) 5217 5218 if alias: 5219 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5220 column = this.this 5221 5222 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5223 if not this.comments and column and column.comments: 5224 this.comments = column.comments 5225 column.comments = None 5226 5227 return this 5228 5229 def _parse_id_var( 5230 self, 5231 any_token: bool = True, 5232 tokens: t.Optional[t.Collection[TokenType]] = None, 5233 ) -> t.Optional[exp.Expression]: 5234 identifier = self._parse_identifier() 5235 5236 if identifier: 5237 return identifier 5238 5239 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5240 quoted = self._prev.token_type == TokenType.STRING 5241 return exp.Identifier(this=self._prev.text, quoted=quoted) 5242 5243 return None 5244 5245 def _parse_string(self) -> t.Optional[exp.Expression]: 5246 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5247 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5248 return self._parse_placeholder() 5249 5250 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5251 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5252 5253 def _parse_number(self) -> t.Optional[exp.Expression]: 5254 if self._match(TokenType.NUMBER): 5255 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5256 return self._parse_placeholder() 5257 5258 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5259 if self._match(TokenType.IDENTIFIER): 5260 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5261 return self._parse_placeholder() 5262 5263 def _parse_var( 5264 self, 5265 any_token: bool = False, 5266 tokens: t.Optional[t.Collection[TokenType]] = None, 5267 upper: bool = False, 5268 ) -> t.Optional[exp.Expression]: 5269 if ( 5270 (any_token and self._advance_any()) 5271 or self._match(TokenType.VAR) 5272 or (self._match_set(tokens) if tokens else False) 5273 ): 5274 return self.expression( 5275 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5276 ) 5277 return self._parse_placeholder() 5278 5279 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5280 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5281 self._advance() 5282 return self._prev 5283 return None 5284 5285 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5286 return self._parse_var() or self._parse_string() 5287 5288 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5289 return self._parse_primary() or self._parse_var(any_token=True) 5290 5291 def _parse_null(self) -> t.Optional[exp.Expression]: 5292 if self._match_set(self.NULL_TOKENS): 5293 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5294 return self._parse_placeholder() 5295 5296 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5297 if self._match(TokenType.TRUE): 5298 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5299 if self._match(TokenType.FALSE): 5300 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5301 return self._parse_placeholder() 5302 5303 def _parse_star(self) -> t.Optional[exp.Expression]: 5304 if self._match(TokenType.STAR): 5305 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5306 return self._parse_placeholder() 5307 5308 def _parse_parameter(self) -> exp.Parameter: 5309 self._match(TokenType.L_BRACE) 5310 this = self._parse_identifier() or self._parse_primary_or_var() 5311 expression = self._match(TokenType.COLON) and ( 5312 self._parse_identifier() or self._parse_primary_or_var() 5313 ) 5314 self._match(TokenType.R_BRACE) 5315 return self.expression(exp.Parameter, this=this, expression=expression) 5316 5317 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5318 if self._match_set(self.PLACEHOLDER_PARSERS): 5319 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5320 if placeholder: 5321 return placeholder 5322 self._advance(-1) 5323 return None 5324 5325 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5326 if not self._match(TokenType.EXCEPT): 5327 return None 5328 if self._match(TokenType.L_PAREN, advance=False): 5329 return self._parse_wrapped_csv(self._parse_column) 5330 5331 except_column = self._parse_column() 5332 return [except_column] if except_column else None 5333 5334 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5335 if not self._match(TokenType.REPLACE): 5336 return None 5337 if self._match(TokenType.L_PAREN, advance=False): 5338 return self._parse_wrapped_csv(self._parse_expression) 5339 5340 replace_expression = self._parse_expression() 5341 return [replace_expression] if replace_expression else None 5342 5343 def _parse_csv( 5344 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5345 ) -> t.List[exp.Expression]: 5346 parse_result = parse_method() 5347 items = [parse_result] if parse_result is not None else [] 5348 5349 while self._match(sep): 5350 self._add_comments(parse_result) 5351 parse_result = parse_method() 5352 if parse_result is not None: 5353 items.append(parse_result) 5354 5355 return items 5356 5357 def _parse_tokens( 5358 self, parse_method: t.Callable, expressions: t.Dict 5359 ) -> t.Optional[exp.Expression]: 5360 this = parse_method() 5361 5362 while self._match_set(expressions): 5363 this = self.expression( 5364 expressions[self._prev.token_type], 5365 this=this, 5366 comments=self._prev_comments, 5367 expression=parse_method(), 5368 ) 5369 5370 return this 5371 5372 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5373 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5374 5375 def _parse_wrapped_csv( 5376 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5377 ) -> t.List[exp.Expression]: 5378 return self._parse_wrapped( 5379 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5380 ) 5381 5382 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5383 wrapped = self._match(TokenType.L_PAREN) 5384 if not wrapped and not optional: 5385 self.raise_error("Expecting (") 5386 parse_result = parse_method() 5387 if wrapped: 5388 self._match_r_paren() 5389 return parse_result 5390 5391 def _parse_expressions(self) -> t.List[exp.Expression]: 5392 return self._parse_csv(self._parse_expression) 5393 5394 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5395 return self._parse_select() or self._parse_set_operations( 5396 self._parse_expression() if alias else self._parse_conjunction() 5397 ) 5398 5399 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5400 return self._parse_query_modifiers( 5401 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5402 ) 5403 5404 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5405 this = None 5406 if self._match_texts(self.TRANSACTION_KIND): 5407 this = self._prev.text 5408 5409 self._match_texts(("TRANSACTION", "WORK")) 5410 5411 modes = [] 5412 while True: 5413 mode = [] 5414 while self._match(TokenType.VAR): 5415 mode.append(self._prev.text) 5416 5417 if mode: 5418 modes.append(" ".join(mode)) 5419 if not self._match(TokenType.COMMA): 5420 break 5421 5422 return self.expression(exp.Transaction, this=this, modes=modes) 5423 5424 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5425 chain = None 5426 savepoint = None 5427 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5428 5429 self._match_texts(("TRANSACTION", "WORK")) 5430 5431 if self._match_text_seq("TO"): 5432 self._match_text_seq("SAVEPOINT") 5433 savepoint = self._parse_id_var() 5434 5435 if self._match(TokenType.AND): 5436 chain = not self._match_text_seq("NO") 5437 self._match_text_seq("CHAIN") 5438 5439 if is_rollback: 5440 return self.expression(exp.Rollback, savepoint=savepoint) 5441 5442 return self.expression(exp.Commit, chain=chain) 5443 5444 def _parse_refresh(self) -> exp.Refresh: 5445 self._match(TokenType.TABLE) 5446 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5447 5448 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5449 if not self._match_text_seq("ADD"): 5450 return None 5451 5452 self._match(TokenType.COLUMN) 5453 exists_column = self._parse_exists(not_=True) 5454 expression = self._parse_field_def() 5455 5456 if expression: 5457 expression.set("exists", exists_column) 5458 5459 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5460 if self._match_texts(("FIRST", "AFTER")): 5461 position = self._prev.text 5462 column_position = self.expression( 5463 exp.ColumnPosition, this=self._parse_column(), position=position 5464 ) 5465 expression.set("position", column_position) 5466 5467 return expression 5468 5469 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5470 drop = self._match(TokenType.DROP) and self._parse_drop() 5471 if drop and not isinstance(drop, exp.Command): 5472 drop.set("kind", drop.args.get("kind", "COLUMN")) 5473 return drop 5474 5475 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5476 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5477 return self.expression( 5478 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5479 ) 5480 5481 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5482 index = self._index - 1 5483 5484 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5485 return self._parse_csv( 5486 lambda: self.expression( 5487 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5488 ) 5489 ) 5490 5491 self._retreat(index) 5492 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5493 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5494 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5495 5496 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5497 self._match(TokenType.COLUMN) 5498 column = self._parse_field(any_token=True) 5499 5500 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5501 return self.expression(exp.AlterColumn, this=column, drop=True) 5502 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5503 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5504 if self._match(TokenType.COMMENT): 5505 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5506 5507 self._match_text_seq("SET", "DATA") 5508 return self.expression( 5509 exp.AlterColumn, 5510 this=column, 5511 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5512 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5513 using=self._match(TokenType.USING) and self._parse_conjunction(), 5514 ) 5515 5516 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5517 index = self._index - 1 5518 5519 partition_exists = self._parse_exists() 5520 if self._match(TokenType.PARTITION, advance=False): 5521 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5522 5523 self._retreat(index) 5524 return self._parse_csv(self._parse_drop_column) 5525 5526 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5527 if self._match(TokenType.COLUMN): 5528 exists = self._parse_exists() 5529 old_column = self._parse_column() 5530 to = self._match_text_seq("TO") 5531 new_column = self._parse_column() 5532 5533 if old_column is None or to is None or new_column is None: 5534 return None 5535 5536 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5537 5538 self._match_text_seq("TO") 5539 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5540 5541 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5542 start = self._prev 5543 5544 if not self._match(TokenType.TABLE): 5545 return self._parse_as_command(start) 5546 5547 exists = self._parse_exists() 5548 only = self._match_text_seq("ONLY") 5549 this = self._parse_table(schema=True) 5550 5551 if self._next: 5552 self._advance() 5553 5554 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5555 if parser: 5556 actions = ensure_list(parser(self)) 5557 options = self._parse_csv(self._parse_property) 5558 5559 if not self._curr and actions: 5560 return self.expression( 5561 exp.AlterTable, 5562 this=this, 5563 exists=exists, 5564 actions=actions, 5565 only=only, 5566 options=options, 5567 ) 5568 5569 return self._parse_as_command(start) 5570 5571 def _parse_merge(self) -> exp.Merge: 5572 self._match(TokenType.INTO) 5573 target = self._parse_table() 5574 5575 if target and self._match(TokenType.ALIAS, advance=False): 5576 target.set("alias", self._parse_table_alias()) 5577 5578 self._match(TokenType.USING) 5579 using = self._parse_table() 5580 5581 self._match(TokenType.ON) 5582 on = self._parse_conjunction() 5583 5584 return self.expression( 5585 exp.Merge, 5586 this=target, 5587 using=using, 5588 on=on, 5589 expressions=self._parse_when_matched(), 5590 ) 5591 5592 def _parse_when_matched(self) -> t.List[exp.When]: 5593 whens = [] 5594 5595 while self._match(TokenType.WHEN): 5596 matched = not self._match(TokenType.NOT) 5597 self._match_text_seq("MATCHED") 5598 source = ( 5599 False 5600 if self._match_text_seq("BY", "TARGET") 5601 else self._match_text_seq("BY", "SOURCE") 5602 ) 5603 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5604 5605 self._match(TokenType.THEN) 5606 5607 if self._match(TokenType.INSERT): 5608 _this = self._parse_star() 5609 if _this: 5610 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5611 else: 5612 then = self.expression( 5613 exp.Insert, 5614 this=self._parse_value(), 5615 expression=self._match_text_seq("VALUES") and self._parse_value(), 5616 ) 5617 elif self._match(TokenType.UPDATE): 5618 expressions = self._parse_star() 5619 if expressions: 5620 then = self.expression(exp.Update, expressions=expressions) 5621 else: 5622 then = self.expression( 5623 exp.Update, 5624 expressions=self._match(TokenType.SET) 5625 and self._parse_csv(self._parse_equality), 5626 ) 5627 elif self._match(TokenType.DELETE): 5628 then = self.expression(exp.Var, this=self._prev.text) 5629 else: 5630 then = None 5631 5632 whens.append( 5633 self.expression( 5634 exp.When, 5635 matched=matched, 5636 source=source, 5637 condition=condition, 5638 then=then, 5639 ) 5640 ) 5641 return whens 5642 5643 def _parse_show(self) -> t.Optional[exp.Expression]: 5644 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5645 if parser: 5646 return parser(self) 5647 return self._parse_as_command(self._prev) 5648 5649 def _parse_set_item_assignment( 5650 self, kind: t.Optional[str] = None 5651 ) -> t.Optional[exp.Expression]: 5652 index = self._index 5653 5654 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5655 return self._parse_set_transaction(global_=kind == "GLOBAL") 5656 5657 left = self._parse_primary() or self._parse_id_var() 5658 assignment_delimiter = self._match_texts(("=", "TO")) 5659 5660 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5661 self._retreat(index) 5662 return None 5663 5664 right = self._parse_statement() or self._parse_id_var() 5665 this = self.expression(exp.EQ, this=left, expression=right) 5666 5667 return self.expression(exp.SetItem, this=this, kind=kind) 5668 5669 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5670 self._match_text_seq("TRANSACTION") 5671 characteristics = self._parse_csv( 5672 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5673 ) 5674 return self.expression( 5675 exp.SetItem, 5676 expressions=characteristics, 5677 kind="TRANSACTION", 5678 **{"global": global_}, # type: ignore 5679 ) 5680 5681 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5682 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5683 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5684 5685 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5686 index = self._index 5687 set_ = self.expression( 5688 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5689 ) 5690 5691 if self._curr: 5692 self._retreat(index) 5693 return self._parse_as_command(self._prev) 5694 5695 return set_ 5696 5697 def _parse_var_from_options( 5698 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5699 ) -> t.Optional[exp.Var]: 5700 start = self._curr 5701 if not start: 5702 return None 5703 5704 option = start.text.upper() 5705 continuations = options.get(option) 5706 5707 index = self._index 5708 self._advance() 5709 for keywords in continuations or []: 5710 if isinstance(keywords, str): 5711 keywords = (keywords,) 5712 5713 if self._match_text_seq(*keywords): 5714 option = f"{option} {' '.join(keywords)}" 5715 break 5716 else: 5717 if continuations or continuations is None: 5718 if raise_unmatched: 5719 self.raise_error(f"Unknown option {option}") 5720 5721 self._retreat(index) 5722 return None 5723 5724 return exp.var(option) 5725 5726 def _parse_as_command(self, start: Token) -> exp.Command: 5727 while self._curr: 5728 self._advance() 5729 text = self._find_sql(start, self._prev) 5730 size = len(start.text) 5731 self._warn_unsupported() 5732 return exp.Command(this=text[:size], expression=text[size:]) 5733 5734 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5735 settings = [] 5736 5737 self._match_l_paren() 5738 kind = self._parse_id_var() 5739 5740 if self._match(TokenType.L_PAREN): 5741 while True: 5742 key = self._parse_id_var() 5743 value = self._parse_primary() 5744 5745 if not key and value is None: 5746 break 5747 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5748 self._match(TokenType.R_PAREN) 5749 5750 self._match_r_paren() 5751 5752 return self.expression( 5753 exp.DictProperty, 5754 this=this, 5755 kind=kind.this if kind else None, 5756 settings=settings, 5757 ) 5758 5759 def _parse_dict_range(self, this: str) -> exp.DictRange: 5760 self._match_l_paren() 5761 has_min = self._match_text_seq("MIN") 5762 if has_min: 5763 min = self._parse_var() or self._parse_primary() 5764 self._match_text_seq("MAX") 5765 max = self._parse_var() or self._parse_primary() 5766 else: 5767 max = self._parse_var() or self._parse_primary() 5768 min = exp.Literal.number(0) 5769 self._match_r_paren() 5770 return self.expression(exp.DictRange, this=this, min=min, max=max) 5771 5772 def _parse_comprehension( 5773 self, this: t.Optional[exp.Expression] 5774 ) -> t.Optional[exp.Comprehension]: 5775 index = self._index 5776 expression = self._parse_column() 5777 if not self._match(TokenType.IN): 5778 self._retreat(index - 1) 5779 return None 5780 iterator = self._parse_column() 5781 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5782 return self.expression( 5783 exp.Comprehension, 5784 this=this, 5785 expression=expression, 5786 iterator=iterator, 5787 condition=condition, 5788 ) 5789 5790 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5791 if self._match(TokenType.HEREDOC_STRING): 5792 return self.expression(exp.Heredoc, this=self._prev.text) 5793 5794 if not self._match_text_seq("$"): 5795 return None 5796 5797 tags = ["$"] 5798 tag_text = None 5799 5800 if self._is_connected(): 5801 self._advance() 5802 tags.append(self._prev.text.upper()) 5803 else: 5804 self.raise_error("No closing $ found") 5805 5806 if tags[-1] != "$": 5807 if self._is_connected() and self._match_text_seq("$"): 5808 tag_text = tags[-1] 5809 tags.append("$") 5810 else: 5811 self.raise_error("No closing $ found") 5812 5813 heredoc_start = self._curr 5814 5815 while self._curr: 5816 if self._match_text_seq(*tags, advance=False): 5817 this = self._find_sql(heredoc_start, self._prev) 5818 self._advance(len(tags)) 5819 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5820 5821 self._advance() 5822 5823 self.raise_error(f"No closing {''.join(tags)} found") 5824 return None 5825 5826 def _find_parser( 5827 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5828 ) -> t.Optional[t.Callable]: 5829 if not self._curr: 5830 return None 5831 5832 index = self._index 5833 this = [] 5834 while True: 5835 # The current token might be multiple words 5836 curr = self._curr.text.upper() 5837 key = curr.split(" ") 5838 this.append(curr) 5839 5840 self._advance() 5841 result, trie = in_trie(trie, key) 5842 if result == TrieResult.FAILED: 5843 break 5844 5845 if result == TrieResult.EXISTS: 5846 subparser = parsers[" ".join(this)] 5847 return subparser 5848 5849 self._retreat(index) 5850 return None 5851 5852 def _match(self, token_type, advance=True, expression=None): 5853 if not self._curr: 5854 return None 5855 5856 if self._curr.token_type == token_type: 5857 if advance: 5858 self._advance() 5859 self._add_comments(expression) 5860 return True 5861 5862 return None 5863 5864 def _match_set(self, types, advance=True): 5865 if not self._curr: 5866 return None 5867 5868 if self._curr.token_type in types: 5869 if advance: 5870 self._advance() 5871 return True 5872 5873 return None 5874 5875 def _match_pair(self, token_type_a, token_type_b, advance=True): 5876 if not self._curr or not self._next: 5877 return None 5878 5879 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5880 if advance: 5881 self._advance(2) 5882 return True 5883 5884 return None 5885 5886 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5887 if not self._match(TokenType.L_PAREN, expression=expression): 5888 self.raise_error("Expecting (") 5889 5890 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5891 if not self._match(TokenType.R_PAREN, expression=expression): 5892 self.raise_error("Expecting )") 5893 5894 def _match_texts(self, texts, advance=True): 5895 if self._curr and self._curr.text.upper() in texts: 5896 if advance: 5897 self._advance() 5898 return True 5899 return None 5900 5901 def _match_text_seq(self, *texts, advance=True): 5902 index = self._index 5903 for text in texts: 5904 if self._curr and self._curr.text.upper() == text: 5905 self._advance() 5906 else: 5907 self._retreat(index) 5908 return None 5909 5910 if not advance: 5911 self._retreat(index) 5912 5913 return True 5914 5915 @t.overload 5916 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ... 5917 5918 @t.overload 5919 def _replace_columns_with_dots( 5920 self, this: t.Optional[exp.Expression] 5921 ) -> t.Optional[exp.Expression]: ... 5922 5923 def _replace_columns_with_dots(self, this): 5924 if isinstance(this, exp.Dot): 5925 exp.replace_children(this, self._replace_columns_with_dots) 5926 elif isinstance(this, exp.Column): 5927 exp.replace_children(this, self._replace_columns_with_dots) 5928 table = this.args.get("table") 5929 this = ( 5930 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5931 ) 5932 5933 return this 5934 5935 def _replace_lambda( 5936 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5937 ) -> t.Optional[exp.Expression]: 5938 if not node: 5939 return node 5940 5941 for column in node.find_all(exp.Column): 5942 if column.parts[0].name in lambda_variables: 5943 dot_or_id = column.to_dot() if column.table else column.this 5944 parent = column.parent 5945 5946 while isinstance(parent, exp.Dot): 5947 if not isinstance(parent.parent, exp.Dot): 5948 parent.replace(dot_or_id) 5949 break 5950 parent = parent.parent 5951 else: 5952 if column is node: 5953 node = dot_or_id 5954 else: 5955 column.replace(dot_or_id) 5956 return node 5957 5958 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 5959 start = self._prev 5960 5961 # Not to be confused with TRUNCATE(number, decimals) function call 5962 if self._match(TokenType.L_PAREN): 5963 self._retreat(self._index - 2) 5964 return self._parse_function() 5965 5966 # Clickhouse supports TRUNCATE DATABASE as well 5967 is_database = self._match(TokenType.DATABASE) 5968 5969 self._match(TokenType.TABLE) 5970 5971 exists = self._parse_exists(not_=False) 5972 5973 expressions = self._parse_csv( 5974 lambda: self._parse_table(schema=True, is_db_reference=is_database) 5975 ) 5976 5977 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5978 5979 if self._match_text_seq("RESTART", "IDENTITY"): 5980 identity = "RESTART" 5981 elif self._match_text_seq("CONTINUE", "IDENTITY"): 5982 identity = "CONTINUE" 5983 else: 5984 identity = None 5985 5986 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 5987 option = self._prev.text 5988 else: 5989 option = None 5990 5991 partition = self._parse_partition() 5992 5993 # Fallback case 5994 if self._curr: 5995 return self._parse_as_command(start) 5996 5997 return self.expression( 5998 exp.TruncateTable, 5999 expressions=expressions, 6000 is_database=is_database, 6001 exists=exists, 6002 cluster=cluster, 6003 identity=identity, 6004 option=option, 6005 partition=partition, 6006 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1048 def __init__( 1049 self, 1050 error_level: t.Optional[ErrorLevel] = None, 1051 error_message_context: int = 100, 1052 max_errors: int = 3, 1053 dialect: DialectType = None, 1054 ): 1055 from sqlglot.dialects import Dialect 1056 1057 self.error_level = error_level or ErrorLevel.IMMEDIATE 1058 self.error_message_context = error_message_context 1059 self.max_errors = max_errors 1060 self.dialect = Dialect.get_or_raise(dialect) 1061 self.reset()
1073 def parse( 1074 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1075 ) -> t.List[t.Optional[exp.Expression]]: 1076 """ 1077 Parses a list of tokens and returns a list of syntax trees, one tree 1078 per parsed SQL statement. 1079 1080 Args: 1081 raw_tokens: The list of tokens. 1082 sql: The original SQL string, used to produce helpful debug messages. 1083 1084 Returns: 1085 The list of the produced syntax trees. 1086 """ 1087 return self._parse( 1088 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1089 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1091 def parse_into( 1092 self, 1093 expression_types: exp.IntoType, 1094 raw_tokens: t.List[Token], 1095 sql: t.Optional[str] = None, 1096 ) -> t.List[t.Optional[exp.Expression]]: 1097 """ 1098 Parses a list of tokens into a given Expression type. If a collection of Expression 1099 types is given instead, this method will try to parse the token list into each one 1100 of them, stopping at the first for which the parsing succeeds. 1101 1102 Args: 1103 expression_types: The expression type(s) to try and parse the token list into. 1104 raw_tokens: The list of tokens. 1105 sql: The original SQL string, used to produce helpful debug messages. 1106 1107 Returns: 1108 The target Expression. 1109 """ 1110 errors = [] 1111 for expression_type in ensure_list(expression_types): 1112 parser = self.EXPRESSION_PARSERS.get(expression_type) 1113 if not parser: 1114 raise TypeError(f"No parser registered for {expression_type}") 1115 1116 try: 1117 return self._parse(parser, raw_tokens, sql) 1118 except ParseError as e: 1119 e.errors[0]["into_expression"] = expression_type 1120 errors.append(e) 1121 1122 raise ParseError( 1123 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1124 errors=merge_errors(errors), 1125 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1162 def check_errors(self) -> None: 1163 """Logs or raises any found errors, depending on the chosen error level setting.""" 1164 if self.error_level == ErrorLevel.WARN: 1165 for error in self.errors: 1166 logger.error(str(error)) 1167 elif self.error_level == ErrorLevel.RAISE and self.errors: 1168 raise ParseError( 1169 concat_messages(self.errors, self.max_errors), 1170 errors=merge_errors(self.errors), 1171 )
Logs or raises any found errors, depending on the chosen error level setting.
1173 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1174 """ 1175 Appends an error in the list of recorded errors or raises it, depending on the chosen 1176 error level setting. 1177 """ 1178 token = token or self._curr or self._prev or Token.string("") 1179 start = token.start 1180 end = token.end + 1 1181 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1182 highlight = self.sql[start:end] 1183 end_context = self.sql[end : end + self.error_message_context] 1184 1185 error = ParseError.new( 1186 f"{message}. Line {token.line}, Col: {token.col}.\n" 1187 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1188 description=message, 1189 line=token.line, 1190 col=token.col, 1191 start_context=start_context, 1192 highlight=highlight, 1193 end_context=end_context, 1194 ) 1195 1196 if self.error_level == ErrorLevel.IMMEDIATE: 1197 raise error 1198 1199 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1201 def expression( 1202 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1203 ) -> E: 1204 """ 1205 Creates a new, validated Expression. 1206 1207 Args: 1208 exp_class: The expression class to instantiate. 1209 comments: An optional list of comments to attach to the expression. 1210 kwargs: The arguments to set for the expression along with their respective values. 1211 1212 Returns: 1213 The target expression. 1214 """ 1215 instance = exp_class(**kwargs) 1216 instance.add_comments(comments) if comments else self._add_comments(instance) 1217 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1224 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1225 """ 1226 Validates an Expression, making sure that all its mandatory arguments are set. 1227 1228 Args: 1229 expression: The expression to validate. 1230 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1231 1232 Returns: 1233 The validated expression. 1234 """ 1235 if self.error_level != ErrorLevel.IGNORE: 1236 for error_message in expression.error_messages(args): 1237 self.raise_error(error_message) 1238 1239 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.