sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 ) 35 36 37def parse_like(args: t.List) -> exp.Escape | exp.Like: 38 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 39 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 40 41 42def binary_range_parser( 43 expr_type: t.Type[exp.Expression], 44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 45 return lambda self, this: self._parse_escape( 46 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 47 ) 48 49 50def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 61 62 63class _Parser(type): 64 def __new__(cls, clsname, bases, attrs): 65 klass = super().__new__(cls, clsname, bases, attrs) 66 67 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 68 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 69 70 return klass 71 72 73class Parser(metaclass=_Parser): 74 """ 75 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 76 77 Args: 78 error_level: The desired error level. 79 Default: ErrorLevel.IMMEDIATE 80 error_message_context: Determines the amount of context to capture from a 81 query string when displaying the error message (in number of characters). 82 Default: 100 83 max_errors: Maximum number of error messages to include in a raised ParseError. 84 This is only relevant if error_level is ErrorLevel.RAISE. 85 Default: 3 86 """ 87 88 FUNCTIONS: t.Dict[str, t.Callable] = { 89 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 90 "CONCAT": lambda args, dialect: exp.Concat( 91 expressions=args, 92 safe=not dialect.STRICT_STRING_CONCAT, 93 coalesce=dialect.CONCAT_COALESCE, 94 ), 95 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 96 expressions=args, 97 safe=not dialect.STRICT_STRING_CONCAT, 98 coalesce=dialect.CONCAT_COALESCE, 99 ), 100 "DATE_TO_DATE_STR": lambda args: exp.Cast( 101 this=seq_get(args, 0), 102 to=exp.DataType(this=exp.DataType.Type.TEXT), 103 ), 104 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 105 "LIKE": parse_like, 106 "LOG": parse_logarithm, 107 "TIME_TO_TIME_STR": lambda args: exp.Cast( 108 this=seq_get(args, 0), 109 to=exp.DataType(this=exp.DataType.Type.TEXT), 110 ), 111 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 112 this=exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 start=exp.Literal.number(1), 117 length=exp.Literal.number(10), 118 ), 119 "VAR_MAP": parse_var_map, 120 } 121 122 NO_PAREN_FUNCTIONS = { 123 TokenType.CURRENT_DATE: exp.CurrentDate, 124 TokenType.CURRENT_DATETIME: exp.CurrentDate, 125 TokenType.CURRENT_TIME: exp.CurrentTime, 126 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 127 TokenType.CURRENT_USER: exp.CurrentUser, 128 } 129 130 STRUCT_TYPE_TOKENS = { 131 TokenType.NESTED, 132 TokenType.STRUCT, 133 } 134 135 NESTED_TYPE_TOKENS = { 136 TokenType.ARRAY, 137 TokenType.LOWCARDINALITY, 138 TokenType.MAP, 139 TokenType.NULLABLE, 140 *STRUCT_TYPE_TOKENS, 141 } 142 143 ENUM_TYPE_TOKENS = { 144 TokenType.ENUM, 145 TokenType.ENUM8, 146 TokenType.ENUM16, 147 } 148 149 TYPE_TOKENS = { 150 TokenType.BIT, 151 TokenType.BOOLEAN, 152 TokenType.TINYINT, 153 TokenType.UTINYINT, 154 TokenType.SMALLINT, 155 TokenType.USMALLINT, 156 TokenType.INT, 157 TokenType.UINT, 158 TokenType.BIGINT, 159 TokenType.UBIGINT, 160 TokenType.INT128, 161 TokenType.UINT128, 162 TokenType.INT256, 163 TokenType.UINT256, 164 TokenType.MEDIUMINT, 165 TokenType.UMEDIUMINT, 166 TokenType.FIXEDSTRING, 167 TokenType.FLOAT, 168 TokenType.DOUBLE, 169 TokenType.CHAR, 170 TokenType.NCHAR, 171 TokenType.VARCHAR, 172 TokenType.NVARCHAR, 173 TokenType.TEXT, 174 TokenType.MEDIUMTEXT, 175 TokenType.LONGTEXT, 176 TokenType.MEDIUMBLOB, 177 TokenType.LONGBLOB, 178 TokenType.BINARY, 179 TokenType.VARBINARY, 180 TokenType.JSON, 181 TokenType.JSONB, 182 TokenType.INTERVAL, 183 TokenType.TINYBLOB, 184 TokenType.TINYTEXT, 185 TokenType.TIME, 186 TokenType.TIMETZ, 187 TokenType.TIMESTAMP, 188 TokenType.TIMESTAMP_S, 189 TokenType.TIMESTAMP_MS, 190 TokenType.TIMESTAMP_NS, 191 TokenType.TIMESTAMPTZ, 192 TokenType.TIMESTAMPLTZ, 193 TokenType.DATETIME, 194 TokenType.DATETIME64, 195 TokenType.DATE, 196 TokenType.INT4RANGE, 197 TokenType.INT4MULTIRANGE, 198 TokenType.INT8RANGE, 199 TokenType.INT8MULTIRANGE, 200 TokenType.NUMRANGE, 201 TokenType.NUMMULTIRANGE, 202 TokenType.TSRANGE, 203 TokenType.TSMULTIRANGE, 204 TokenType.TSTZRANGE, 205 TokenType.TSTZMULTIRANGE, 206 TokenType.DATERANGE, 207 TokenType.DATEMULTIRANGE, 208 TokenType.DECIMAL, 209 TokenType.UDECIMAL, 210 TokenType.BIGDECIMAL, 211 TokenType.UUID, 212 TokenType.GEOGRAPHY, 213 TokenType.GEOMETRY, 214 TokenType.HLLSKETCH, 215 TokenType.HSTORE, 216 TokenType.PSEUDO_TYPE, 217 TokenType.SUPER, 218 TokenType.SERIAL, 219 TokenType.SMALLSERIAL, 220 TokenType.BIGSERIAL, 221 TokenType.XML, 222 TokenType.YEAR, 223 TokenType.UNIQUEIDENTIFIER, 224 TokenType.USERDEFINED, 225 TokenType.MONEY, 226 TokenType.SMALLMONEY, 227 TokenType.ROWVERSION, 228 TokenType.IMAGE, 229 TokenType.VARIANT, 230 TokenType.OBJECT, 231 TokenType.OBJECT_IDENTIFIER, 232 TokenType.INET, 233 TokenType.IPADDRESS, 234 TokenType.IPPREFIX, 235 TokenType.UNKNOWN, 236 TokenType.NULL, 237 *ENUM_TYPE_TOKENS, 238 *NESTED_TYPE_TOKENS, 239 } 240 241 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 242 TokenType.BIGINT: TokenType.UBIGINT, 243 TokenType.INT: TokenType.UINT, 244 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 245 TokenType.SMALLINT: TokenType.USMALLINT, 246 TokenType.TINYINT: TokenType.UTINYINT, 247 TokenType.DECIMAL: TokenType.UDECIMAL, 248 } 249 250 SUBQUERY_PREDICATES = { 251 TokenType.ANY: exp.Any, 252 TokenType.ALL: exp.All, 253 TokenType.EXISTS: exp.Exists, 254 TokenType.SOME: exp.Any, 255 } 256 257 RESERVED_TOKENS = { 258 *Tokenizer.SINGLE_TOKENS.values(), 259 TokenType.SELECT, 260 } 261 262 DB_CREATABLES = { 263 TokenType.DATABASE, 264 TokenType.SCHEMA, 265 TokenType.TABLE, 266 TokenType.VIEW, 267 TokenType.MODEL, 268 TokenType.DICTIONARY, 269 } 270 271 CREATABLES = { 272 TokenType.COLUMN, 273 TokenType.CONSTRAINT, 274 TokenType.FUNCTION, 275 TokenType.INDEX, 276 TokenType.PROCEDURE, 277 TokenType.FOREIGN_KEY, 278 *DB_CREATABLES, 279 } 280 281 # Tokens that can represent identifiers 282 ID_VAR_TOKENS = { 283 TokenType.VAR, 284 TokenType.ANTI, 285 TokenType.APPLY, 286 TokenType.ASC, 287 TokenType.AUTO_INCREMENT, 288 TokenType.BEGIN, 289 TokenType.CACHE, 290 TokenType.CASE, 291 TokenType.COLLATE, 292 TokenType.COMMAND, 293 TokenType.COMMENT, 294 TokenType.COMMIT, 295 TokenType.CONSTRAINT, 296 TokenType.DEFAULT, 297 TokenType.DELETE, 298 TokenType.DESC, 299 TokenType.DESCRIBE, 300 TokenType.DICTIONARY, 301 TokenType.DIV, 302 TokenType.END, 303 TokenType.EXECUTE, 304 TokenType.ESCAPE, 305 TokenType.FALSE, 306 TokenType.FIRST, 307 TokenType.FILTER, 308 TokenType.FINAL, 309 TokenType.FORMAT, 310 TokenType.FULL, 311 TokenType.IS, 312 TokenType.ISNULL, 313 TokenType.INTERVAL, 314 TokenType.KEEP, 315 TokenType.KILL, 316 TokenType.LEFT, 317 TokenType.LOAD, 318 TokenType.MERGE, 319 TokenType.NATURAL, 320 TokenType.NEXT, 321 TokenType.OFFSET, 322 TokenType.OPERATOR, 323 TokenType.ORDINALITY, 324 TokenType.OVERLAPS, 325 TokenType.OVERWRITE, 326 TokenType.PARTITION, 327 TokenType.PERCENT, 328 TokenType.PIVOT, 329 TokenType.PRAGMA, 330 TokenType.RANGE, 331 TokenType.RECURSIVE, 332 TokenType.REFERENCES, 333 TokenType.REFRESH, 334 TokenType.REPLACE, 335 TokenType.RIGHT, 336 TokenType.ROW, 337 TokenType.ROWS, 338 TokenType.SEMI, 339 TokenType.SET, 340 TokenType.SETTINGS, 341 TokenType.SHOW, 342 TokenType.TEMPORARY, 343 TokenType.TOP, 344 TokenType.TRUE, 345 TokenType.UNIQUE, 346 TokenType.UNPIVOT, 347 TokenType.UPDATE, 348 TokenType.USE, 349 TokenType.VOLATILE, 350 TokenType.WINDOW, 351 *CREATABLES, 352 *SUBQUERY_PREDICATES, 353 *TYPE_TOKENS, 354 *NO_PAREN_FUNCTIONS, 355 } 356 357 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 358 359 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 360 TokenType.ANTI, 361 TokenType.APPLY, 362 TokenType.ASOF, 363 TokenType.FULL, 364 TokenType.LEFT, 365 TokenType.LOCK, 366 TokenType.NATURAL, 367 TokenType.OFFSET, 368 TokenType.RIGHT, 369 TokenType.SEMI, 370 TokenType.WINDOW, 371 } 372 373 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 374 375 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 376 377 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 378 379 FUNC_TOKENS = { 380 TokenType.COLLATE, 381 TokenType.COMMAND, 382 TokenType.CURRENT_DATE, 383 TokenType.CURRENT_DATETIME, 384 TokenType.CURRENT_TIMESTAMP, 385 TokenType.CURRENT_TIME, 386 TokenType.CURRENT_USER, 387 TokenType.FILTER, 388 TokenType.FIRST, 389 TokenType.FORMAT, 390 TokenType.GLOB, 391 TokenType.IDENTIFIER, 392 TokenType.INDEX, 393 TokenType.ISNULL, 394 TokenType.ILIKE, 395 TokenType.INSERT, 396 TokenType.LIKE, 397 TokenType.MERGE, 398 TokenType.OFFSET, 399 TokenType.PRIMARY_KEY, 400 TokenType.RANGE, 401 TokenType.REPLACE, 402 TokenType.RLIKE, 403 TokenType.ROW, 404 TokenType.UNNEST, 405 TokenType.VAR, 406 TokenType.LEFT, 407 TokenType.RIGHT, 408 TokenType.DATE, 409 TokenType.DATETIME, 410 TokenType.TABLE, 411 TokenType.TIMESTAMP, 412 TokenType.TIMESTAMPTZ, 413 TokenType.WINDOW, 414 TokenType.XOR, 415 *TYPE_TOKENS, 416 *SUBQUERY_PREDICATES, 417 } 418 419 CONJUNCTION = { 420 TokenType.AND: exp.And, 421 TokenType.OR: exp.Or, 422 } 423 424 EQUALITY = { 425 TokenType.COLON_EQ: exp.PropertyEQ, 426 TokenType.EQ: exp.EQ, 427 TokenType.NEQ: exp.NEQ, 428 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 429 } 430 431 COMPARISON = { 432 TokenType.GT: exp.GT, 433 TokenType.GTE: exp.GTE, 434 TokenType.LT: exp.LT, 435 TokenType.LTE: exp.LTE, 436 } 437 438 BITWISE = { 439 TokenType.AMP: exp.BitwiseAnd, 440 TokenType.CARET: exp.BitwiseXor, 441 TokenType.PIPE: exp.BitwiseOr, 442 } 443 444 TERM = { 445 TokenType.DASH: exp.Sub, 446 TokenType.PLUS: exp.Add, 447 TokenType.MOD: exp.Mod, 448 TokenType.COLLATE: exp.Collate, 449 } 450 451 FACTOR = { 452 TokenType.DIV: exp.IntDiv, 453 TokenType.LR_ARROW: exp.Distance, 454 TokenType.SLASH: exp.Div, 455 TokenType.STAR: exp.Mul, 456 } 457 458 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 459 460 TIMES = { 461 TokenType.TIME, 462 TokenType.TIMETZ, 463 } 464 465 TIMESTAMPS = { 466 TokenType.TIMESTAMP, 467 TokenType.TIMESTAMPTZ, 468 TokenType.TIMESTAMPLTZ, 469 *TIMES, 470 } 471 472 SET_OPERATIONS = { 473 TokenType.UNION, 474 TokenType.INTERSECT, 475 TokenType.EXCEPT, 476 } 477 478 JOIN_METHODS = { 479 TokenType.NATURAL, 480 TokenType.ASOF, 481 } 482 483 JOIN_SIDES = { 484 TokenType.LEFT, 485 TokenType.RIGHT, 486 TokenType.FULL, 487 } 488 489 JOIN_KINDS = { 490 TokenType.INNER, 491 TokenType.OUTER, 492 TokenType.CROSS, 493 TokenType.SEMI, 494 TokenType.ANTI, 495 } 496 497 JOIN_HINTS: t.Set[str] = set() 498 499 LAMBDAS = { 500 TokenType.ARROW: lambda self, expressions: self.expression( 501 exp.Lambda, 502 this=self._replace_lambda( 503 self._parse_conjunction(), 504 {node.name for node in expressions}, 505 ), 506 expressions=expressions, 507 ), 508 TokenType.FARROW: lambda self, expressions: self.expression( 509 exp.Kwarg, 510 this=exp.var(expressions[0].name), 511 expression=self._parse_conjunction(), 512 ), 513 } 514 515 COLUMN_OPERATORS = { 516 TokenType.DOT: None, 517 TokenType.DCOLON: lambda self, this, to: self.expression( 518 exp.Cast if self.STRICT_CAST else exp.TryCast, 519 this=this, 520 to=to, 521 ), 522 TokenType.ARROW: lambda self, this, path: self.expression( 523 exp.JSONExtract, 524 this=this, 525 expression=path, 526 ), 527 TokenType.DARROW: lambda self, this, path: self.expression( 528 exp.JSONExtractScalar, 529 this=this, 530 expression=path, 531 ), 532 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 533 exp.JSONBExtract, 534 this=this, 535 expression=path, 536 ), 537 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 538 exp.JSONBExtractScalar, 539 this=this, 540 expression=path, 541 ), 542 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 543 exp.JSONBContains, 544 this=this, 545 expression=key, 546 ), 547 } 548 549 EXPRESSION_PARSERS = { 550 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 551 exp.Column: lambda self: self._parse_column(), 552 exp.Condition: lambda self: self._parse_conjunction(), 553 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 554 exp.Expression: lambda self: self._parse_statement(), 555 exp.From: lambda self: self._parse_from(), 556 exp.Group: lambda self: self._parse_group(), 557 exp.Having: lambda self: self._parse_having(), 558 exp.Identifier: lambda self: self._parse_id_var(), 559 exp.Join: lambda self: self._parse_join(), 560 exp.Lambda: lambda self: self._parse_lambda(), 561 exp.Lateral: lambda self: self._parse_lateral(), 562 exp.Limit: lambda self: self._parse_limit(), 563 exp.Offset: lambda self: self._parse_offset(), 564 exp.Order: lambda self: self._parse_order(), 565 exp.Ordered: lambda self: self._parse_ordered(), 566 exp.Properties: lambda self: self._parse_properties(), 567 exp.Qualify: lambda self: self._parse_qualify(), 568 exp.Returning: lambda self: self._parse_returning(), 569 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 570 exp.Table: lambda self: self._parse_table_parts(), 571 exp.TableAlias: lambda self: self._parse_table_alias(), 572 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 573 exp.Where: lambda self: self._parse_where(), 574 exp.Window: lambda self: self._parse_named_window(), 575 exp.With: lambda self: self._parse_with(), 576 "JOIN_TYPE": lambda self: self._parse_join_parts(), 577 } 578 579 STATEMENT_PARSERS = { 580 TokenType.ALTER: lambda self: self._parse_alter(), 581 TokenType.BEGIN: lambda self: self._parse_transaction(), 582 TokenType.CACHE: lambda self: self._parse_cache(), 583 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 584 TokenType.COMMENT: lambda self: self._parse_comment(), 585 TokenType.CREATE: lambda self: self._parse_create(), 586 TokenType.DELETE: lambda self: self._parse_delete(), 587 TokenType.DESC: lambda self: self._parse_describe(), 588 TokenType.DESCRIBE: lambda self: self._parse_describe(), 589 TokenType.DROP: lambda self: self._parse_drop(), 590 TokenType.INSERT: lambda self: self._parse_insert(), 591 TokenType.KILL: lambda self: self._parse_kill(), 592 TokenType.LOAD: lambda self: self._parse_load(), 593 TokenType.MERGE: lambda self: self._parse_merge(), 594 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 595 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 596 TokenType.REFRESH: lambda self: self._parse_refresh(), 597 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 598 TokenType.SET: lambda self: self._parse_set(), 599 TokenType.UNCACHE: lambda self: self._parse_uncache(), 600 TokenType.UPDATE: lambda self: self._parse_update(), 601 TokenType.USE: lambda self: self.expression( 602 exp.Use, 603 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 604 and exp.var(self._prev.text), 605 this=self._parse_table(schema=False), 606 ), 607 } 608 609 UNARY_PARSERS = { 610 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 611 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 612 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 613 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 614 } 615 616 PRIMARY_PARSERS = { 617 TokenType.STRING: lambda self, token: self.expression( 618 exp.Literal, this=token.text, is_string=True 619 ), 620 TokenType.NUMBER: lambda self, token: self.expression( 621 exp.Literal, this=token.text, is_string=False 622 ), 623 TokenType.STAR: lambda self, _: self.expression( 624 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 625 ), 626 TokenType.NULL: lambda self, _: self.expression(exp.Null), 627 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 628 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 629 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 630 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 631 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 632 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 633 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 634 exp.National, this=token.text 635 ), 636 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 637 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 638 exp.RawString, this=token.text 639 ), 640 TokenType.UNICODE_STRING: lambda self, token: self.expression( 641 exp.UnicodeString, 642 this=token.text, 643 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 644 ), 645 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 646 } 647 648 PLACEHOLDER_PARSERS = { 649 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 650 TokenType.PARAMETER: lambda self: self._parse_parameter(), 651 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 652 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 653 else None, 654 } 655 656 RANGE_PARSERS = { 657 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 658 TokenType.GLOB: binary_range_parser(exp.Glob), 659 TokenType.ILIKE: binary_range_parser(exp.ILike), 660 TokenType.IN: lambda self, this: self._parse_in(this), 661 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 662 TokenType.IS: lambda self, this: self._parse_is(this), 663 TokenType.LIKE: binary_range_parser(exp.Like), 664 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 665 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 666 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 667 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 668 } 669 670 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 671 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 672 "AUTO": lambda self: self._parse_auto_property(), 673 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 674 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 675 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 676 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 677 "CHECKSUM": lambda self: self._parse_checksum(), 678 "CLUSTER BY": lambda self: self._parse_cluster(), 679 "CLUSTERED": lambda self: self._parse_clustered_by(), 680 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 681 exp.CollateProperty, **kwargs 682 ), 683 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 684 "COPY": lambda self: self._parse_copy_property(), 685 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 686 "DEFINER": lambda self: self._parse_definer(), 687 "DETERMINISTIC": lambda self: self.expression( 688 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 689 ), 690 "DISTKEY": lambda self: self._parse_distkey(), 691 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 692 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 693 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 694 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 695 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 696 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 697 "FREESPACE": lambda self: self._parse_freespace(), 698 "HEAP": lambda self: self.expression(exp.HeapProperty), 699 "IMMUTABLE": lambda self: self.expression( 700 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 701 ), 702 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 703 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 704 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 705 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 706 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 707 "LIKE": lambda self: self._parse_create_like(), 708 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 709 "LOCK": lambda self: self._parse_locking(), 710 "LOCKING": lambda self: self._parse_locking(), 711 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 712 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 713 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 714 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 715 "NO": lambda self: self._parse_no_property(), 716 "ON": lambda self: self._parse_on_property(), 717 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 718 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 719 "PARTITION": lambda self: self._parse_partitioned_of(), 720 "PARTITION BY": lambda self: self._parse_partitioned_by(), 721 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 722 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 723 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 724 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 725 "REMOTE": lambda self: self._parse_remote_with_connection(), 726 "RETURNS": lambda self: self._parse_returns(), 727 "ROW": lambda self: self._parse_row(), 728 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 729 "SAMPLE": lambda self: self.expression( 730 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 731 ), 732 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 733 "SETTINGS": lambda self: self.expression( 734 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 735 ), 736 "SORTKEY": lambda self: self._parse_sortkey(), 737 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 738 "STABLE": lambda self: self.expression( 739 exp.StabilityProperty, this=exp.Literal.string("STABLE") 740 ), 741 "STORED": lambda self: self._parse_stored(), 742 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 743 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 744 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 745 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 746 "TO": lambda self: self._parse_to_table(), 747 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 748 "TRANSFORM": lambda self: self.expression( 749 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 750 ), 751 "TTL": lambda self: self._parse_ttl(), 752 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 753 "VOLATILE": lambda self: self._parse_volatile_property(), 754 "WITH": lambda self: self._parse_with_property(), 755 } 756 757 CONSTRAINT_PARSERS = { 758 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 759 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 760 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 761 "CHARACTER SET": lambda self: self.expression( 762 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 763 ), 764 "CHECK": lambda self: self.expression( 765 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 766 ), 767 "COLLATE": lambda self: self.expression( 768 exp.CollateColumnConstraint, this=self._parse_var() 769 ), 770 "COMMENT": lambda self: self.expression( 771 exp.CommentColumnConstraint, this=self._parse_string() 772 ), 773 "COMPRESS": lambda self: self._parse_compress(), 774 "CLUSTERED": lambda self: self.expression( 775 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 776 ), 777 "NONCLUSTERED": lambda self: self.expression( 778 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 779 ), 780 "DEFAULT": lambda self: self.expression( 781 exp.DefaultColumnConstraint, this=self._parse_bitwise() 782 ), 783 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 784 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 785 "FORMAT": lambda self: self.expression( 786 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 787 ), 788 "GENERATED": lambda self: self._parse_generated_as_identity(), 789 "IDENTITY": lambda self: self._parse_auto_increment(), 790 "INLINE": lambda self: self._parse_inline(), 791 "LIKE": lambda self: self._parse_create_like(), 792 "NOT": lambda self: self._parse_not_constraint(), 793 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 794 "ON": lambda self: ( 795 self._match(TokenType.UPDATE) 796 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 797 ) 798 or self.expression(exp.OnProperty, this=self._parse_id_var()), 799 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 800 "PERIOD": lambda self: self._parse_period_for_system_time(), 801 "PRIMARY KEY": lambda self: self._parse_primary_key(), 802 "REFERENCES": lambda self: self._parse_references(match=False), 803 "TITLE": lambda self: self.expression( 804 exp.TitleColumnConstraint, this=self._parse_var_or_string() 805 ), 806 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 807 "UNIQUE": lambda self: self._parse_unique(), 808 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 809 "WITH": lambda self: self.expression( 810 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 811 ), 812 } 813 814 ALTER_PARSERS = { 815 "ADD": lambda self: self._parse_alter_table_add(), 816 "ALTER": lambda self: self._parse_alter_table_alter(), 817 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 818 "DROP": lambda self: self._parse_alter_table_drop(), 819 "RENAME": lambda self: self._parse_alter_table_rename(), 820 } 821 822 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 823 824 NO_PAREN_FUNCTION_PARSERS = { 825 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 826 "CASE": lambda self: self._parse_case(), 827 "IF": lambda self: self._parse_if(), 828 "NEXT": lambda self: self._parse_next_value_for(), 829 } 830 831 INVALID_FUNC_NAME_TOKENS = { 832 TokenType.IDENTIFIER, 833 TokenType.STRING, 834 } 835 836 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 837 838 FUNCTION_PARSERS = { 839 "ANY_VALUE": lambda self: self._parse_any_value(), 840 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 841 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 842 "DECODE": lambda self: self._parse_decode(), 843 "EXTRACT": lambda self: self._parse_extract(), 844 "JSON_OBJECT": lambda self: self._parse_json_object(), 845 "JSON_TABLE": lambda self: self._parse_json_table(), 846 "MATCH": lambda self: self._parse_match_against(), 847 "OPENJSON": lambda self: self._parse_open_json(), 848 "POSITION": lambda self: self._parse_position(), 849 "PREDICT": lambda self: self._parse_predict(), 850 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 851 "STRING_AGG": lambda self: self._parse_string_agg(), 852 "SUBSTRING": lambda self: self._parse_substring(), 853 "TRIM": lambda self: self._parse_trim(), 854 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 855 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 856 } 857 858 QUERY_MODIFIER_PARSERS = { 859 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 860 TokenType.WHERE: lambda self: ("where", self._parse_where()), 861 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 862 TokenType.HAVING: lambda self: ("having", self._parse_having()), 863 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 864 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 865 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 866 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 867 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 868 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 869 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 870 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 871 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 872 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 873 TokenType.CLUSTER_BY: lambda self: ( 874 "cluster", 875 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 876 ), 877 TokenType.DISTRIBUTE_BY: lambda self: ( 878 "distribute", 879 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 880 ), 881 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 882 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 883 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 884 } 885 886 SET_PARSERS = { 887 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 888 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 889 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 890 "TRANSACTION": lambda self: self._parse_set_transaction(), 891 } 892 893 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 894 895 TYPE_LITERAL_PARSERS = { 896 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 897 } 898 899 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 900 901 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 902 903 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 904 905 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 906 TRANSACTION_CHARACTERISTICS = { 907 "ISOLATION LEVEL REPEATABLE READ", 908 "ISOLATION LEVEL READ COMMITTED", 909 "ISOLATION LEVEL READ UNCOMMITTED", 910 "ISOLATION LEVEL SERIALIZABLE", 911 "READ WRITE", 912 "READ ONLY", 913 } 914 915 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 916 917 CLONE_KEYWORDS = {"CLONE", "COPY"} 918 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 919 920 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 921 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 922 923 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 924 925 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 926 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 927 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 928 929 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 930 931 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 932 933 DISTINCT_TOKENS = {TokenType.DISTINCT} 934 935 NULL_TOKENS = {TokenType.NULL} 936 937 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 938 939 STRICT_CAST = True 940 941 PREFIXED_PIVOT_COLUMNS = False 942 IDENTIFY_PIVOT_STRINGS = False 943 944 LOG_DEFAULTS_TO_LN = False 945 946 # Whether or not ADD is present for each column added by ALTER TABLE 947 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 948 949 # Whether or not the table sample clause expects CSV syntax 950 TABLESAMPLE_CSV = False 951 952 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 953 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 954 955 # Whether the TRIM function expects the characters to trim as its first argument 956 TRIM_PATTERN_FIRST = False 957 958 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 959 MODIFIERS_ATTACHED_TO_UNION = True 960 UNION_MODIFIERS = {"order", "limit", "offset"} 961 962 __slots__ = ( 963 "error_level", 964 "error_message_context", 965 "max_errors", 966 "dialect", 967 "sql", 968 "errors", 969 "_tokens", 970 "_index", 971 "_curr", 972 "_next", 973 "_prev", 974 "_prev_comments", 975 ) 976 977 # Autofilled 978 SHOW_TRIE: t.Dict = {} 979 SET_TRIE: t.Dict = {} 980 981 def __init__( 982 self, 983 error_level: t.Optional[ErrorLevel] = None, 984 error_message_context: int = 100, 985 max_errors: int = 3, 986 dialect: DialectType = None, 987 ): 988 from sqlglot.dialects import Dialect 989 990 self.error_level = error_level or ErrorLevel.IMMEDIATE 991 self.error_message_context = error_message_context 992 self.max_errors = max_errors 993 self.dialect = Dialect.get_or_raise(dialect) 994 self.reset() 995 996 def reset(self): 997 self.sql = "" 998 self.errors = [] 999 self._tokens = [] 1000 self._index = 0 1001 self._curr = None 1002 self._next = None 1003 self._prev = None 1004 self._prev_comments = None 1005 1006 def parse( 1007 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1008 ) -> t.List[t.Optional[exp.Expression]]: 1009 """ 1010 Parses a list of tokens and returns a list of syntax trees, one tree 1011 per parsed SQL statement. 1012 1013 Args: 1014 raw_tokens: The list of tokens. 1015 sql: The original SQL string, used to produce helpful debug messages. 1016 1017 Returns: 1018 The list of the produced syntax trees. 1019 """ 1020 return self._parse( 1021 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1022 ) 1023 1024 def parse_into( 1025 self, 1026 expression_types: exp.IntoType, 1027 raw_tokens: t.List[Token], 1028 sql: t.Optional[str] = None, 1029 ) -> t.List[t.Optional[exp.Expression]]: 1030 """ 1031 Parses a list of tokens into a given Expression type. If a collection of Expression 1032 types is given instead, this method will try to parse the token list into each one 1033 of them, stopping at the first for which the parsing succeeds. 1034 1035 Args: 1036 expression_types: The expression type(s) to try and parse the token list into. 1037 raw_tokens: The list of tokens. 1038 sql: The original SQL string, used to produce helpful debug messages. 1039 1040 Returns: 1041 The target Expression. 1042 """ 1043 errors = [] 1044 for expression_type in ensure_list(expression_types): 1045 parser = self.EXPRESSION_PARSERS.get(expression_type) 1046 if not parser: 1047 raise TypeError(f"No parser registered for {expression_type}") 1048 1049 try: 1050 return self._parse(parser, raw_tokens, sql) 1051 except ParseError as e: 1052 e.errors[0]["into_expression"] = expression_type 1053 errors.append(e) 1054 1055 raise ParseError( 1056 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1057 errors=merge_errors(errors), 1058 ) from errors[-1] 1059 1060 def _parse( 1061 self, 1062 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1063 raw_tokens: t.List[Token], 1064 sql: t.Optional[str] = None, 1065 ) -> t.List[t.Optional[exp.Expression]]: 1066 self.reset() 1067 self.sql = sql or "" 1068 1069 total = len(raw_tokens) 1070 chunks: t.List[t.List[Token]] = [[]] 1071 1072 for i, token in enumerate(raw_tokens): 1073 if token.token_type == TokenType.SEMICOLON: 1074 if i < total - 1: 1075 chunks.append([]) 1076 else: 1077 chunks[-1].append(token) 1078 1079 expressions = [] 1080 1081 for tokens in chunks: 1082 self._index = -1 1083 self._tokens = tokens 1084 self._advance() 1085 1086 expressions.append(parse_method(self)) 1087 1088 if self._index < len(self._tokens): 1089 self.raise_error("Invalid expression / Unexpected token") 1090 1091 self.check_errors() 1092 1093 return expressions 1094 1095 def check_errors(self) -> None: 1096 """Logs or raises any found errors, depending on the chosen error level setting.""" 1097 if self.error_level == ErrorLevel.WARN: 1098 for error in self.errors: 1099 logger.error(str(error)) 1100 elif self.error_level == ErrorLevel.RAISE and self.errors: 1101 raise ParseError( 1102 concat_messages(self.errors, self.max_errors), 1103 errors=merge_errors(self.errors), 1104 ) 1105 1106 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1107 """ 1108 Appends an error in the list of recorded errors or raises it, depending on the chosen 1109 error level setting. 1110 """ 1111 token = token or self._curr or self._prev or Token.string("") 1112 start = token.start 1113 end = token.end + 1 1114 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1115 highlight = self.sql[start:end] 1116 end_context = self.sql[end : end + self.error_message_context] 1117 1118 error = ParseError.new( 1119 f"{message}. Line {token.line}, Col: {token.col}.\n" 1120 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1121 description=message, 1122 line=token.line, 1123 col=token.col, 1124 start_context=start_context, 1125 highlight=highlight, 1126 end_context=end_context, 1127 ) 1128 1129 if self.error_level == ErrorLevel.IMMEDIATE: 1130 raise error 1131 1132 self.errors.append(error) 1133 1134 def expression( 1135 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1136 ) -> E: 1137 """ 1138 Creates a new, validated Expression. 1139 1140 Args: 1141 exp_class: The expression class to instantiate. 1142 comments: An optional list of comments to attach to the expression. 1143 kwargs: The arguments to set for the expression along with their respective values. 1144 1145 Returns: 1146 The target expression. 1147 """ 1148 instance = exp_class(**kwargs) 1149 instance.add_comments(comments) if comments else self._add_comments(instance) 1150 return self.validate_expression(instance) 1151 1152 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1153 if expression and self._prev_comments: 1154 expression.add_comments(self._prev_comments) 1155 self._prev_comments = None 1156 1157 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1158 """ 1159 Validates an Expression, making sure that all its mandatory arguments are set. 1160 1161 Args: 1162 expression: The expression to validate. 1163 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1164 1165 Returns: 1166 The validated expression. 1167 """ 1168 if self.error_level != ErrorLevel.IGNORE: 1169 for error_message in expression.error_messages(args): 1170 self.raise_error(error_message) 1171 1172 return expression 1173 1174 def _find_sql(self, start: Token, end: Token) -> str: 1175 return self.sql[start.start : end.end + 1] 1176 1177 def _is_connected(self) -> bool: 1178 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1179 1180 def _advance(self, times: int = 1) -> None: 1181 self._index += times 1182 self._curr = seq_get(self._tokens, self._index) 1183 self._next = seq_get(self._tokens, self._index + 1) 1184 1185 if self._index > 0: 1186 self._prev = self._tokens[self._index - 1] 1187 self._prev_comments = self._prev.comments 1188 else: 1189 self._prev = None 1190 self._prev_comments = None 1191 1192 def _retreat(self, index: int) -> None: 1193 if index != self._index: 1194 self._advance(index - self._index) 1195 1196 def _parse_command(self) -> exp.Command: 1197 return self.expression( 1198 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1199 ) 1200 1201 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1202 start = self._prev 1203 exists = self._parse_exists() if allow_exists else None 1204 1205 self._match(TokenType.ON) 1206 1207 kind = self._match_set(self.CREATABLES) and self._prev 1208 if not kind: 1209 return self._parse_as_command(start) 1210 1211 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1212 this = self._parse_user_defined_function(kind=kind.token_type) 1213 elif kind.token_type == TokenType.TABLE: 1214 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1215 elif kind.token_type == TokenType.COLUMN: 1216 this = self._parse_column() 1217 else: 1218 this = self._parse_id_var() 1219 1220 self._match(TokenType.IS) 1221 1222 return self.expression( 1223 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1224 ) 1225 1226 def _parse_to_table( 1227 self, 1228 ) -> exp.ToTableProperty: 1229 table = self._parse_table_parts(schema=True) 1230 return self.expression(exp.ToTableProperty, this=table) 1231 1232 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1233 def _parse_ttl(self) -> exp.Expression: 1234 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1235 this = self._parse_bitwise() 1236 1237 if self._match_text_seq("DELETE"): 1238 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1239 if self._match_text_seq("RECOMPRESS"): 1240 return self.expression( 1241 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1242 ) 1243 if self._match_text_seq("TO", "DISK"): 1244 return self.expression( 1245 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1246 ) 1247 if self._match_text_seq("TO", "VOLUME"): 1248 return self.expression( 1249 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1250 ) 1251 1252 return this 1253 1254 expressions = self._parse_csv(_parse_ttl_action) 1255 where = self._parse_where() 1256 group = self._parse_group() 1257 1258 aggregates = None 1259 if group and self._match(TokenType.SET): 1260 aggregates = self._parse_csv(self._parse_set_item) 1261 1262 return self.expression( 1263 exp.MergeTreeTTL, 1264 expressions=expressions, 1265 where=where, 1266 group=group, 1267 aggregates=aggregates, 1268 ) 1269 1270 def _parse_statement(self) -> t.Optional[exp.Expression]: 1271 if self._curr is None: 1272 return None 1273 1274 if self._match_set(self.STATEMENT_PARSERS): 1275 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1276 1277 if self._match_set(Tokenizer.COMMANDS): 1278 return self._parse_command() 1279 1280 expression = self._parse_expression() 1281 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1282 return self._parse_query_modifiers(expression) 1283 1284 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1285 start = self._prev 1286 temporary = self._match(TokenType.TEMPORARY) 1287 materialized = self._match_text_seq("MATERIALIZED") 1288 1289 kind = self._match_set(self.CREATABLES) and self._prev.text 1290 if not kind: 1291 return self._parse_as_command(start) 1292 1293 return self.expression( 1294 exp.Drop, 1295 comments=start.comments, 1296 exists=exists or self._parse_exists(), 1297 this=self._parse_table(schema=True), 1298 kind=kind, 1299 temporary=temporary, 1300 materialized=materialized, 1301 cascade=self._match_text_seq("CASCADE"), 1302 constraints=self._match_text_seq("CONSTRAINTS"), 1303 purge=self._match_text_seq("PURGE"), 1304 ) 1305 1306 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1307 return ( 1308 self._match_text_seq("IF") 1309 and (not not_ or self._match(TokenType.NOT)) 1310 and self._match(TokenType.EXISTS) 1311 ) 1312 1313 def _parse_create(self) -> exp.Create | exp.Command: 1314 # Note: this can't be None because we've matched a statement parser 1315 start = self._prev 1316 comments = self._prev_comments 1317 1318 replace = start.text.upper() == "REPLACE" or self._match_pair( 1319 TokenType.OR, TokenType.REPLACE 1320 ) 1321 unique = self._match(TokenType.UNIQUE) 1322 1323 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1324 self._advance() 1325 1326 properties = None 1327 create_token = self._match_set(self.CREATABLES) and self._prev 1328 1329 if not create_token: 1330 # exp.Properties.Location.POST_CREATE 1331 properties = self._parse_properties() 1332 create_token = self._match_set(self.CREATABLES) and self._prev 1333 1334 if not properties or not create_token: 1335 return self._parse_as_command(start) 1336 1337 exists = self._parse_exists(not_=True) 1338 this = None 1339 expression: t.Optional[exp.Expression] = None 1340 indexes = None 1341 no_schema_binding = None 1342 begin = None 1343 end = None 1344 clone = None 1345 1346 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1347 nonlocal properties 1348 if properties and temp_props: 1349 properties.expressions.extend(temp_props.expressions) 1350 elif temp_props: 1351 properties = temp_props 1352 1353 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1354 this = self._parse_user_defined_function(kind=create_token.token_type) 1355 1356 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1357 extend_props(self._parse_properties()) 1358 1359 self._match(TokenType.ALIAS) 1360 1361 if self._match(TokenType.COMMAND): 1362 expression = self._parse_as_command(self._prev) 1363 else: 1364 begin = self._match(TokenType.BEGIN) 1365 return_ = self._match_text_seq("RETURN") 1366 1367 if self._match(TokenType.STRING, advance=False): 1368 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1369 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1370 expression = self._parse_string() 1371 extend_props(self._parse_properties()) 1372 else: 1373 expression = self._parse_statement() 1374 1375 end = self._match_text_seq("END") 1376 1377 if return_: 1378 expression = self.expression(exp.Return, this=expression) 1379 elif create_token.token_type == TokenType.INDEX: 1380 this = self._parse_index(index=self._parse_id_var()) 1381 elif create_token.token_type in self.DB_CREATABLES: 1382 table_parts = self._parse_table_parts(schema=True) 1383 1384 # exp.Properties.Location.POST_NAME 1385 self._match(TokenType.COMMA) 1386 extend_props(self._parse_properties(before=True)) 1387 1388 this = self._parse_schema(this=table_parts) 1389 1390 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1391 extend_props(self._parse_properties()) 1392 1393 self._match(TokenType.ALIAS) 1394 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1395 # exp.Properties.Location.POST_ALIAS 1396 extend_props(self._parse_properties()) 1397 1398 expression = self._parse_ddl_select() 1399 1400 if create_token.token_type == TokenType.TABLE: 1401 # exp.Properties.Location.POST_EXPRESSION 1402 extend_props(self._parse_properties()) 1403 1404 indexes = [] 1405 while True: 1406 index = self._parse_index() 1407 1408 # exp.Properties.Location.POST_INDEX 1409 extend_props(self._parse_properties()) 1410 1411 if not index: 1412 break 1413 else: 1414 self._match(TokenType.COMMA) 1415 indexes.append(index) 1416 elif create_token.token_type == TokenType.VIEW: 1417 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1418 no_schema_binding = True 1419 1420 shallow = self._match_text_seq("SHALLOW") 1421 1422 if self._match_texts(self.CLONE_KEYWORDS): 1423 copy = self._prev.text.lower() == "copy" 1424 clone = self.expression( 1425 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1426 ) 1427 1428 return self.expression( 1429 exp.Create, 1430 comments=comments, 1431 this=this, 1432 kind=create_token.text.upper(), 1433 replace=replace, 1434 unique=unique, 1435 expression=expression, 1436 exists=exists, 1437 properties=properties, 1438 indexes=indexes, 1439 no_schema_binding=no_schema_binding, 1440 begin=begin, 1441 end=end, 1442 clone=clone, 1443 ) 1444 1445 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1446 # only used for teradata currently 1447 self._match(TokenType.COMMA) 1448 1449 kwargs = { 1450 "no": self._match_text_seq("NO"), 1451 "dual": self._match_text_seq("DUAL"), 1452 "before": self._match_text_seq("BEFORE"), 1453 "default": self._match_text_seq("DEFAULT"), 1454 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1455 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1456 "after": self._match_text_seq("AFTER"), 1457 "minimum": self._match_texts(("MIN", "MINIMUM")), 1458 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1459 } 1460 1461 if self._match_texts(self.PROPERTY_PARSERS): 1462 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1463 try: 1464 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1465 except TypeError: 1466 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1467 1468 return None 1469 1470 def _parse_property(self) -> t.Optional[exp.Expression]: 1471 if self._match_texts(self.PROPERTY_PARSERS): 1472 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1473 1474 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1475 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1476 1477 if self._match_text_seq("COMPOUND", "SORTKEY"): 1478 return self._parse_sortkey(compound=True) 1479 1480 if self._match_text_seq("SQL", "SECURITY"): 1481 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1482 1483 index = self._index 1484 key = self._parse_column() 1485 1486 if not self._match(TokenType.EQ): 1487 self._retreat(index) 1488 return None 1489 1490 return self.expression( 1491 exp.Property, 1492 this=key.to_dot() if isinstance(key, exp.Column) else key, 1493 value=self._parse_column() or self._parse_var(any_token=True), 1494 ) 1495 1496 def _parse_stored(self) -> exp.FileFormatProperty: 1497 self._match(TokenType.ALIAS) 1498 1499 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1500 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1501 1502 return self.expression( 1503 exp.FileFormatProperty, 1504 this=self.expression( 1505 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1506 ) 1507 if input_format or output_format 1508 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1509 ) 1510 1511 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1512 self._match(TokenType.EQ) 1513 self._match(TokenType.ALIAS) 1514 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1515 1516 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1517 properties = [] 1518 while True: 1519 if before: 1520 prop = self._parse_property_before() 1521 else: 1522 prop = self._parse_property() 1523 1524 if not prop: 1525 break 1526 for p in ensure_list(prop): 1527 properties.append(p) 1528 1529 if properties: 1530 return self.expression(exp.Properties, expressions=properties) 1531 1532 return None 1533 1534 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1535 return self.expression( 1536 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1537 ) 1538 1539 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1540 if self._index >= 2: 1541 pre_volatile_token = self._tokens[self._index - 2] 1542 else: 1543 pre_volatile_token = None 1544 1545 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1546 return exp.VolatileProperty() 1547 1548 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1549 1550 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1551 self._match_pair(TokenType.EQ, TokenType.ON) 1552 1553 prop = self.expression(exp.WithSystemVersioningProperty) 1554 if self._match(TokenType.L_PAREN): 1555 self._match_text_seq("HISTORY_TABLE", "=") 1556 prop.set("this", self._parse_table_parts()) 1557 1558 if self._match(TokenType.COMMA): 1559 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1560 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1561 1562 self._match_r_paren() 1563 1564 return prop 1565 1566 def _parse_with_property( 1567 self, 1568 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1569 if self._match(TokenType.L_PAREN, advance=False): 1570 return self._parse_wrapped_csv(self._parse_property) 1571 1572 if self._match_text_seq("JOURNAL"): 1573 return self._parse_withjournaltable() 1574 1575 if self._match_text_seq("DATA"): 1576 return self._parse_withdata(no=False) 1577 elif self._match_text_seq("NO", "DATA"): 1578 return self._parse_withdata(no=True) 1579 1580 if not self._next: 1581 return None 1582 1583 return self._parse_withisolatedloading() 1584 1585 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1586 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1587 self._match(TokenType.EQ) 1588 1589 user = self._parse_id_var() 1590 self._match(TokenType.PARAMETER) 1591 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1592 1593 if not user or not host: 1594 return None 1595 1596 return exp.DefinerProperty(this=f"{user}@{host}") 1597 1598 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1599 self._match(TokenType.TABLE) 1600 self._match(TokenType.EQ) 1601 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1602 1603 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1604 return self.expression(exp.LogProperty, no=no) 1605 1606 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1607 return self.expression(exp.JournalProperty, **kwargs) 1608 1609 def _parse_checksum(self) -> exp.ChecksumProperty: 1610 self._match(TokenType.EQ) 1611 1612 on = None 1613 if self._match(TokenType.ON): 1614 on = True 1615 elif self._match_text_seq("OFF"): 1616 on = False 1617 1618 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1619 1620 def _parse_cluster(self) -> exp.Cluster: 1621 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1622 1623 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1624 self._match_text_seq("BY") 1625 1626 self._match_l_paren() 1627 expressions = self._parse_csv(self._parse_column) 1628 self._match_r_paren() 1629 1630 if self._match_text_seq("SORTED", "BY"): 1631 self._match_l_paren() 1632 sorted_by = self._parse_csv(self._parse_ordered) 1633 self._match_r_paren() 1634 else: 1635 sorted_by = None 1636 1637 self._match(TokenType.INTO) 1638 buckets = self._parse_number() 1639 self._match_text_seq("BUCKETS") 1640 1641 return self.expression( 1642 exp.ClusteredByProperty, 1643 expressions=expressions, 1644 sorted_by=sorted_by, 1645 buckets=buckets, 1646 ) 1647 1648 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1649 if not self._match_text_seq("GRANTS"): 1650 self._retreat(self._index - 1) 1651 return None 1652 1653 return self.expression(exp.CopyGrantsProperty) 1654 1655 def _parse_freespace(self) -> exp.FreespaceProperty: 1656 self._match(TokenType.EQ) 1657 return self.expression( 1658 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1659 ) 1660 1661 def _parse_mergeblockratio( 1662 self, no: bool = False, default: bool = False 1663 ) -> exp.MergeBlockRatioProperty: 1664 if self._match(TokenType.EQ): 1665 return self.expression( 1666 exp.MergeBlockRatioProperty, 1667 this=self._parse_number(), 1668 percent=self._match(TokenType.PERCENT), 1669 ) 1670 1671 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1672 1673 def _parse_datablocksize( 1674 self, 1675 default: t.Optional[bool] = None, 1676 minimum: t.Optional[bool] = None, 1677 maximum: t.Optional[bool] = None, 1678 ) -> exp.DataBlocksizeProperty: 1679 self._match(TokenType.EQ) 1680 size = self._parse_number() 1681 1682 units = None 1683 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1684 units = self._prev.text 1685 1686 return self.expression( 1687 exp.DataBlocksizeProperty, 1688 size=size, 1689 units=units, 1690 default=default, 1691 minimum=minimum, 1692 maximum=maximum, 1693 ) 1694 1695 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1696 self._match(TokenType.EQ) 1697 always = self._match_text_seq("ALWAYS") 1698 manual = self._match_text_seq("MANUAL") 1699 never = self._match_text_seq("NEVER") 1700 default = self._match_text_seq("DEFAULT") 1701 1702 autotemp = None 1703 if self._match_text_seq("AUTOTEMP"): 1704 autotemp = self._parse_schema() 1705 1706 return self.expression( 1707 exp.BlockCompressionProperty, 1708 always=always, 1709 manual=manual, 1710 never=never, 1711 default=default, 1712 autotemp=autotemp, 1713 ) 1714 1715 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1716 no = self._match_text_seq("NO") 1717 concurrent = self._match_text_seq("CONCURRENT") 1718 self._match_text_seq("ISOLATED", "LOADING") 1719 for_all = self._match_text_seq("FOR", "ALL") 1720 for_insert = self._match_text_seq("FOR", "INSERT") 1721 for_none = self._match_text_seq("FOR", "NONE") 1722 return self.expression( 1723 exp.IsolatedLoadingProperty, 1724 no=no, 1725 concurrent=concurrent, 1726 for_all=for_all, 1727 for_insert=for_insert, 1728 for_none=for_none, 1729 ) 1730 1731 def _parse_locking(self) -> exp.LockingProperty: 1732 if self._match(TokenType.TABLE): 1733 kind = "TABLE" 1734 elif self._match(TokenType.VIEW): 1735 kind = "VIEW" 1736 elif self._match(TokenType.ROW): 1737 kind = "ROW" 1738 elif self._match_text_seq("DATABASE"): 1739 kind = "DATABASE" 1740 else: 1741 kind = None 1742 1743 if kind in ("DATABASE", "TABLE", "VIEW"): 1744 this = self._parse_table_parts() 1745 else: 1746 this = None 1747 1748 if self._match(TokenType.FOR): 1749 for_or_in = "FOR" 1750 elif self._match(TokenType.IN): 1751 for_or_in = "IN" 1752 else: 1753 for_or_in = None 1754 1755 if self._match_text_seq("ACCESS"): 1756 lock_type = "ACCESS" 1757 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1758 lock_type = "EXCLUSIVE" 1759 elif self._match_text_seq("SHARE"): 1760 lock_type = "SHARE" 1761 elif self._match_text_seq("READ"): 1762 lock_type = "READ" 1763 elif self._match_text_seq("WRITE"): 1764 lock_type = "WRITE" 1765 elif self._match_text_seq("CHECKSUM"): 1766 lock_type = "CHECKSUM" 1767 else: 1768 lock_type = None 1769 1770 override = self._match_text_seq("OVERRIDE") 1771 1772 return self.expression( 1773 exp.LockingProperty, 1774 this=this, 1775 kind=kind, 1776 for_or_in=for_or_in, 1777 lock_type=lock_type, 1778 override=override, 1779 ) 1780 1781 def _parse_partition_by(self) -> t.List[exp.Expression]: 1782 if self._match(TokenType.PARTITION_BY): 1783 return self._parse_csv(self._parse_conjunction) 1784 return [] 1785 1786 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1787 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1788 if self._match_text_seq("MINVALUE"): 1789 return exp.var("MINVALUE") 1790 if self._match_text_seq("MAXVALUE"): 1791 return exp.var("MAXVALUE") 1792 return self._parse_bitwise() 1793 1794 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1795 expression = None 1796 from_expressions = None 1797 to_expressions = None 1798 1799 if self._match(TokenType.IN): 1800 this = self._parse_wrapped_csv(self._parse_bitwise) 1801 elif self._match(TokenType.FROM): 1802 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1803 self._match_text_seq("TO") 1804 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1805 elif self._match_text_seq("WITH", "(", "MODULUS"): 1806 this = self._parse_number() 1807 self._match_text_seq(",", "REMAINDER") 1808 expression = self._parse_number() 1809 self._match_r_paren() 1810 else: 1811 self.raise_error("Failed to parse partition bound spec.") 1812 1813 return self.expression( 1814 exp.PartitionBoundSpec, 1815 this=this, 1816 expression=expression, 1817 from_expressions=from_expressions, 1818 to_expressions=to_expressions, 1819 ) 1820 1821 # https://www.postgresql.org/docs/current/sql-createtable.html 1822 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1823 if not self._match_text_seq("OF"): 1824 self._retreat(self._index - 1) 1825 return None 1826 1827 this = self._parse_table(schema=True) 1828 1829 if self._match(TokenType.DEFAULT): 1830 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1831 elif self._match_text_seq("FOR", "VALUES"): 1832 expression = self._parse_partition_bound_spec() 1833 else: 1834 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1835 1836 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1837 1838 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1839 self._match(TokenType.EQ) 1840 return self.expression( 1841 exp.PartitionedByProperty, 1842 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1843 ) 1844 1845 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1846 if self._match_text_seq("AND", "STATISTICS"): 1847 statistics = True 1848 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1849 statistics = False 1850 else: 1851 statistics = None 1852 1853 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1854 1855 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1856 if self._match_text_seq("PRIMARY", "INDEX"): 1857 return exp.NoPrimaryIndexProperty() 1858 return None 1859 1860 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1861 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1862 return exp.OnCommitProperty() 1863 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1864 return exp.OnCommitProperty(delete=True) 1865 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1866 1867 def _parse_distkey(self) -> exp.DistKeyProperty: 1868 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1869 1870 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1871 table = self._parse_table(schema=True) 1872 1873 options = [] 1874 while self._match_texts(("INCLUDING", "EXCLUDING")): 1875 this = self._prev.text.upper() 1876 1877 id_var = self._parse_id_var() 1878 if not id_var: 1879 return None 1880 1881 options.append( 1882 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1883 ) 1884 1885 return self.expression(exp.LikeProperty, this=table, expressions=options) 1886 1887 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1888 return self.expression( 1889 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1890 ) 1891 1892 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1893 self._match(TokenType.EQ) 1894 return self.expression( 1895 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1896 ) 1897 1898 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1899 self._match_text_seq("WITH", "CONNECTION") 1900 return self.expression( 1901 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1902 ) 1903 1904 def _parse_returns(self) -> exp.ReturnsProperty: 1905 value: t.Optional[exp.Expression] 1906 is_table = self._match(TokenType.TABLE) 1907 1908 if is_table: 1909 if self._match(TokenType.LT): 1910 value = self.expression( 1911 exp.Schema, 1912 this="TABLE", 1913 expressions=self._parse_csv(self._parse_struct_types), 1914 ) 1915 if not self._match(TokenType.GT): 1916 self.raise_error("Expecting >") 1917 else: 1918 value = self._parse_schema(exp.var("TABLE")) 1919 else: 1920 value = self._parse_types() 1921 1922 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1923 1924 def _parse_describe(self) -> exp.Describe: 1925 kind = self._match_set(self.CREATABLES) and self._prev.text 1926 this = self._parse_table(schema=True) 1927 properties = self._parse_properties() 1928 expressions = properties.expressions if properties else None 1929 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1930 1931 def _parse_insert(self) -> exp.Insert: 1932 comments = ensure_list(self._prev_comments) 1933 overwrite = self._match(TokenType.OVERWRITE) 1934 ignore = self._match(TokenType.IGNORE) 1935 local = self._match_text_seq("LOCAL") 1936 alternative = None 1937 1938 if self._match_text_seq("DIRECTORY"): 1939 this: t.Optional[exp.Expression] = self.expression( 1940 exp.Directory, 1941 this=self._parse_var_or_string(), 1942 local=local, 1943 row_format=self._parse_row_format(match_row=True), 1944 ) 1945 else: 1946 if self._match(TokenType.OR): 1947 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1948 1949 self._match(TokenType.INTO) 1950 comments += ensure_list(self._prev_comments) 1951 self._match(TokenType.TABLE) 1952 this = self._parse_table(schema=True) 1953 1954 returning = self._parse_returning() 1955 1956 return self.expression( 1957 exp.Insert, 1958 comments=comments, 1959 this=this, 1960 by_name=self._match_text_seq("BY", "NAME"), 1961 exists=self._parse_exists(), 1962 partition=self._parse_partition(), 1963 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1964 and self._parse_conjunction(), 1965 expression=self._parse_ddl_select(), 1966 conflict=self._parse_on_conflict(), 1967 returning=returning or self._parse_returning(), 1968 overwrite=overwrite, 1969 alternative=alternative, 1970 ignore=ignore, 1971 ) 1972 1973 def _parse_kill(self) -> exp.Kill: 1974 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1975 1976 return self.expression( 1977 exp.Kill, 1978 this=self._parse_primary(), 1979 kind=kind, 1980 ) 1981 1982 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1983 conflict = self._match_text_seq("ON", "CONFLICT") 1984 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1985 1986 if not conflict and not duplicate: 1987 return None 1988 1989 nothing = None 1990 expressions = None 1991 key = None 1992 constraint = None 1993 1994 if conflict: 1995 if self._match_text_seq("ON", "CONSTRAINT"): 1996 constraint = self._parse_id_var() 1997 else: 1998 key = self._parse_csv(self._parse_value) 1999 2000 self._match_text_seq("DO") 2001 if self._match_text_seq("NOTHING"): 2002 nothing = True 2003 else: 2004 self._match(TokenType.UPDATE) 2005 self._match(TokenType.SET) 2006 expressions = self._parse_csv(self._parse_equality) 2007 2008 return self.expression( 2009 exp.OnConflict, 2010 duplicate=duplicate, 2011 expressions=expressions, 2012 nothing=nothing, 2013 key=key, 2014 constraint=constraint, 2015 ) 2016 2017 def _parse_returning(self) -> t.Optional[exp.Returning]: 2018 if not self._match(TokenType.RETURNING): 2019 return None 2020 return self.expression( 2021 exp.Returning, 2022 expressions=self._parse_csv(self._parse_expression), 2023 into=self._match(TokenType.INTO) and self._parse_table_part(), 2024 ) 2025 2026 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2027 if not self._match(TokenType.FORMAT): 2028 return None 2029 return self._parse_row_format() 2030 2031 def _parse_row_format( 2032 self, match_row: bool = False 2033 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2034 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2035 return None 2036 2037 if self._match_text_seq("SERDE"): 2038 this = self._parse_string() 2039 2040 serde_properties = None 2041 if self._match(TokenType.SERDE_PROPERTIES): 2042 serde_properties = self.expression( 2043 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2044 ) 2045 2046 return self.expression( 2047 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2048 ) 2049 2050 self._match_text_seq("DELIMITED") 2051 2052 kwargs = {} 2053 2054 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2055 kwargs["fields"] = self._parse_string() 2056 if self._match_text_seq("ESCAPED", "BY"): 2057 kwargs["escaped"] = self._parse_string() 2058 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2059 kwargs["collection_items"] = self._parse_string() 2060 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2061 kwargs["map_keys"] = self._parse_string() 2062 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2063 kwargs["lines"] = self._parse_string() 2064 if self._match_text_seq("NULL", "DEFINED", "AS"): 2065 kwargs["null"] = self._parse_string() 2066 2067 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2068 2069 def _parse_load(self) -> exp.LoadData | exp.Command: 2070 if self._match_text_seq("DATA"): 2071 local = self._match_text_seq("LOCAL") 2072 self._match_text_seq("INPATH") 2073 inpath = self._parse_string() 2074 overwrite = self._match(TokenType.OVERWRITE) 2075 self._match_pair(TokenType.INTO, TokenType.TABLE) 2076 2077 return self.expression( 2078 exp.LoadData, 2079 this=self._parse_table(schema=True), 2080 local=local, 2081 overwrite=overwrite, 2082 inpath=inpath, 2083 partition=self._parse_partition(), 2084 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2085 serde=self._match_text_seq("SERDE") and self._parse_string(), 2086 ) 2087 return self._parse_as_command(self._prev) 2088 2089 def _parse_delete(self) -> exp.Delete: 2090 # This handles MySQL's "Multiple-Table Syntax" 2091 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2092 tables = None 2093 comments = self._prev_comments 2094 if not self._match(TokenType.FROM, advance=False): 2095 tables = self._parse_csv(self._parse_table) or None 2096 2097 returning = self._parse_returning() 2098 2099 return self.expression( 2100 exp.Delete, 2101 comments=comments, 2102 tables=tables, 2103 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2104 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2105 where=self._parse_where(), 2106 returning=returning or self._parse_returning(), 2107 limit=self._parse_limit(), 2108 ) 2109 2110 def _parse_update(self) -> exp.Update: 2111 comments = self._prev_comments 2112 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2113 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2114 returning = self._parse_returning() 2115 return self.expression( 2116 exp.Update, 2117 comments=comments, 2118 **{ # type: ignore 2119 "this": this, 2120 "expressions": expressions, 2121 "from": self._parse_from(joins=True), 2122 "where": self._parse_where(), 2123 "returning": returning or self._parse_returning(), 2124 "order": self._parse_order(), 2125 "limit": self._parse_limit(), 2126 }, 2127 ) 2128 2129 def _parse_uncache(self) -> exp.Uncache: 2130 if not self._match(TokenType.TABLE): 2131 self.raise_error("Expecting TABLE after UNCACHE") 2132 2133 return self.expression( 2134 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2135 ) 2136 2137 def _parse_cache(self) -> exp.Cache: 2138 lazy = self._match_text_seq("LAZY") 2139 self._match(TokenType.TABLE) 2140 table = self._parse_table(schema=True) 2141 2142 options = [] 2143 if self._match_text_seq("OPTIONS"): 2144 self._match_l_paren() 2145 k = self._parse_string() 2146 self._match(TokenType.EQ) 2147 v = self._parse_string() 2148 options = [k, v] 2149 self._match_r_paren() 2150 2151 self._match(TokenType.ALIAS) 2152 return self.expression( 2153 exp.Cache, 2154 this=table, 2155 lazy=lazy, 2156 options=options, 2157 expression=self._parse_select(nested=True), 2158 ) 2159 2160 def _parse_partition(self) -> t.Optional[exp.Partition]: 2161 if not self._match(TokenType.PARTITION): 2162 return None 2163 2164 return self.expression( 2165 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2166 ) 2167 2168 def _parse_value(self) -> exp.Tuple: 2169 if self._match(TokenType.L_PAREN): 2170 expressions = self._parse_csv(self._parse_expression) 2171 self._match_r_paren() 2172 return self.expression(exp.Tuple, expressions=expressions) 2173 2174 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2175 # https://prestodb.io/docs/current/sql/values.html 2176 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2177 2178 def _parse_projections(self) -> t.List[exp.Expression]: 2179 return self._parse_expressions() 2180 2181 def _parse_select( 2182 self, 2183 nested: bool = False, 2184 table: bool = False, 2185 parse_subquery_alias: bool = True, 2186 parse_set_operation: bool = True, 2187 ) -> t.Optional[exp.Expression]: 2188 cte = self._parse_with() 2189 2190 if cte: 2191 this = self._parse_statement() 2192 2193 if not this: 2194 self.raise_error("Failed to parse any statement following CTE") 2195 return cte 2196 2197 if "with" in this.arg_types: 2198 this.set("with", cte) 2199 else: 2200 self.raise_error(f"{this.key} does not support CTE") 2201 this = cte 2202 2203 return this 2204 2205 # duckdb supports leading with FROM x 2206 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2207 2208 if self._match(TokenType.SELECT): 2209 comments = self._prev_comments 2210 2211 hint = self._parse_hint() 2212 all_ = self._match(TokenType.ALL) 2213 distinct = self._match_set(self.DISTINCT_TOKENS) 2214 2215 kind = ( 2216 self._match(TokenType.ALIAS) 2217 and self._match_texts(("STRUCT", "VALUE")) 2218 and self._prev.text.upper() 2219 ) 2220 2221 if distinct: 2222 distinct = self.expression( 2223 exp.Distinct, 2224 on=self._parse_value() if self._match(TokenType.ON) else None, 2225 ) 2226 2227 if all_ and distinct: 2228 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2229 2230 limit = self._parse_limit(top=True) 2231 projections = self._parse_projections() 2232 2233 this = self.expression( 2234 exp.Select, 2235 kind=kind, 2236 hint=hint, 2237 distinct=distinct, 2238 expressions=projections, 2239 limit=limit, 2240 ) 2241 this.comments = comments 2242 2243 into = self._parse_into() 2244 if into: 2245 this.set("into", into) 2246 2247 if not from_: 2248 from_ = self._parse_from() 2249 2250 if from_: 2251 this.set("from", from_) 2252 2253 this = self._parse_query_modifiers(this) 2254 elif (table or nested) and self._match(TokenType.L_PAREN): 2255 if self._match(TokenType.PIVOT): 2256 this = self._parse_simplified_pivot() 2257 elif self._match(TokenType.FROM): 2258 this = exp.select("*").from_( 2259 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2260 ) 2261 else: 2262 this = ( 2263 self._parse_table() 2264 if table 2265 else self._parse_select(nested=True, parse_set_operation=False) 2266 ) 2267 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2268 2269 self._match_r_paren() 2270 2271 # We return early here so that the UNION isn't attached to the subquery by the 2272 # following call to _parse_set_operations, but instead becomes the parent node 2273 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2274 elif self._match(TokenType.VALUES): 2275 this = self.expression( 2276 exp.Values, 2277 expressions=self._parse_csv(self._parse_value), 2278 alias=self._parse_table_alias(), 2279 ) 2280 elif from_: 2281 this = exp.select("*").from_(from_.this, copy=False) 2282 else: 2283 this = None 2284 2285 if parse_set_operation: 2286 return self._parse_set_operations(this) 2287 return this 2288 2289 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2290 if not skip_with_token and not self._match(TokenType.WITH): 2291 return None 2292 2293 comments = self._prev_comments 2294 recursive = self._match(TokenType.RECURSIVE) 2295 2296 expressions = [] 2297 while True: 2298 expressions.append(self._parse_cte()) 2299 2300 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2301 break 2302 else: 2303 self._match(TokenType.WITH) 2304 2305 return self.expression( 2306 exp.With, comments=comments, expressions=expressions, recursive=recursive 2307 ) 2308 2309 def _parse_cte(self) -> exp.CTE: 2310 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2311 if not alias or not alias.this: 2312 self.raise_error("Expected CTE to have alias") 2313 2314 self._match(TokenType.ALIAS) 2315 return self.expression( 2316 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2317 ) 2318 2319 def _parse_table_alias( 2320 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2321 ) -> t.Optional[exp.TableAlias]: 2322 any_token = self._match(TokenType.ALIAS) 2323 alias = ( 2324 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2325 or self._parse_string_as_identifier() 2326 ) 2327 2328 index = self._index 2329 if self._match(TokenType.L_PAREN): 2330 columns = self._parse_csv(self._parse_function_parameter) 2331 self._match_r_paren() if columns else self._retreat(index) 2332 else: 2333 columns = None 2334 2335 if not alias and not columns: 2336 return None 2337 2338 return self.expression(exp.TableAlias, this=alias, columns=columns) 2339 2340 def _parse_subquery( 2341 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2342 ) -> t.Optional[exp.Subquery]: 2343 if not this: 2344 return None 2345 2346 return self.expression( 2347 exp.Subquery, 2348 this=this, 2349 pivots=self._parse_pivots(), 2350 alias=self._parse_table_alias() if parse_alias else None, 2351 ) 2352 2353 def _parse_query_modifiers( 2354 self, this: t.Optional[exp.Expression] 2355 ) -> t.Optional[exp.Expression]: 2356 if isinstance(this, self.MODIFIABLES): 2357 for join in iter(self._parse_join, None): 2358 this.append("joins", join) 2359 for lateral in iter(self._parse_lateral, None): 2360 this.append("laterals", lateral) 2361 2362 while True: 2363 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2364 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2365 key, expression = parser(self) 2366 2367 if expression: 2368 this.set(key, expression) 2369 if key == "limit": 2370 offset = expression.args.pop("offset", None) 2371 if offset: 2372 this.set("offset", exp.Offset(expression=offset)) 2373 continue 2374 break 2375 return this 2376 2377 def _parse_hint(self) -> t.Optional[exp.Hint]: 2378 if self._match(TokenType.HINT): 2379 hints = [] 2380 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2381 hints.extend(hint) 2382 2383 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2384 self.raise_error("Expected */ after HINT") 2385 2386 return self.expression(exp.Hint, expressions=hints) 2387 2388 return None 2389 2390 def _parse_into(self) -> t.Optional[exp.Into]: 2391 if not self._match(TokenType.INTO): 2392 return None 2393 2394 temp = self._match(TokenType.TEMPORARY) 2395 unlogged = self._match_text_seq("UNLOGGED") 2396 self._match(TokenType.TABLE) 2397 2398 return self.expression( 2399 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2400 ) 2401 2402 def _parse_from( 2403 self, joins: bool = False, skip_from_token: bool = False 2404 ) -> t.Optional[exp.From]: 2405 if not skip_from_token and not self._match(TokenType.FROM): 2406 return None 2407 2408 return self.expression( 2409 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2410 ) 2411 2412 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2413 if not self._match(TokenType.MATCH_RECOGNIZE): 2414 return None 2415 2416 self._match_l_paren() 2417 2418 partition = self._parse_partition_by() 2419 order = self._parse_order() 2420 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2421 2422 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2423 rows = exp.var("ONE ROW PER MATCH") 2424 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2425 text = "ALL ROWS PER MATCH" 2426 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2427 text += f" SHOW EMPTY MATCHES" 2428 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2429 text += f" OMIT EMPTY MATCHES" 2430 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2431 text += f" WITH UNMATCHED ROWS" 2432 rows = exp.var(text) 2433 else: 2434 rows = None 2435 2436 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2437 text = "AFTER MATCH SKIP" 2438 if self._match_text_seq("PAST", "LAST", "ROW"): 2439 text += f" PAST LAST ROW" 2440 elif self._match_text_seq("TO", "NEXT", "ROW"): 2441 text += f" TO NEXT ROW" 2442 elif self._match_text_seq("TO", "FIRST"): 2443 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2444 elif self._match_text_seq("TO", "LAST"): 2445 text += f" TO LAST {self._advance_any().text}" # type: ignore 2446 after = exp.var(text) 2447 else: 2448 after = None 2449 2450 if self._match_text_seq("PATTERN"): 2451 self._match_l_paren() 2452 2453 if not self._curr: 2454 self.raise_error("Expecting )", self._curr) 2455 2456 paren = 1 2457 start = self._curr 2458 2459 while self._curr and paren > 0: 2460 if self._curr.token_type == TokenType.L_PAREN: 2461 paren += 1 2462 if self._curr.token_type == TokenType.R_PAREN: 2463 paren -= 1 2464 2465 end = self._prev 2466 self._advance() 2467 2468 if paren > 0: 2469 self.raise_error("Expecting )", self._curr) 2470 2471 pattern = exp.var(self._find_sql(start, end)) 2472 else: 2473 pattern = None 2474 2475 define = ( 2476 self._parse_csv(self._parse_name_as_expression) 2477 if self._match_text_seq("DEFINE") 2478 else None 2479 ) 2480 2481 self._match_r_paren() 2482 2483 return self.expression( 2484 exp.MatchRecognize, 2485 partition_by=partition, 2486 order=order, 2487 measures=measures, 2488 rows=rows, 2489 after=after, 2490 pattern=pattern, 2491 define=define, 2492 alias=self._parse_table_alias(), 2493 ) 2494 2495 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2496 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2497 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2498 cross_apply = False 2499 2500 if cross_apply is not None: 2501 this = self._parse_select(table=True) 2502 view = None 2503 outer = None 2504 elif self._match(TokenType.LATERAL): 2505 this = self._parse_select(table=True) 2506 view = self._match(TokenType.VIEW) 2507 outer = self._match(TokenType.OUTER) 2508 else: 2509 return None 2510 2511 if not this: 2512 this = ( 2513 self._parse_unnest() 2514 or self._parse_function() 2515 or self._parse_id_var(any_token=False) 2516 ) 2517 2518 while self._match(TokenType.DOT): 2519 this = exp.Dot( 2520 this=this, 2521 expression=self._parse_function() or self._parse_id_var(any_token=False), 2522 ) 2523 2524 if view: 2525 table = self._parse_id_var(any_token=False) 2526 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2527 table_alias: t.Optional[exp.TableAlias] = self.expression( 2528 exp.TableAlias, this=table, columns=columns 2529 ) 2530 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2531 # We move the alias from the lateral's child node to the lateral itself 2532 table_alias = this.args["alias"].pop() 2533 else: 2534 table_alias = self._parse_table_alias() 2535 2536 return self.expression( 2537 exp.Lateral, 2538 this=this, 2539 view=view, 2540 outer=outer, 2541 alias=table_alias, 2542 cross_apply=cross_apply, 2543 ) 2544 2545 def _parse_join_parts( 2546 self, 2547 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2548 return ( 2549 self._match_set(self.JOIN_METHODS) and self._prev, 2550 self._match_set(self.JOIN_SIDES) and self._prev, 2551 self._match_set(self.JOIN_KINDS) and self._prev, 2552 ) 2553 2554 def _parse_join( 2555 self, skip_join_token: bool = False, parse_bracket: bool = False 2556 ) -> t.Optional[exp.Join]: 2557 if self._match(TokenType.COMMA): 2558 return self.expression(exp.Join, this=self._parse_table()) 2559 2560 index = self._index 2561 method, side, kind = self._parse_join_parts() 2562 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2563 join = self._match(TokenType.JOIN) 2564 2565 if not skip_join_token and not join: 2566 self._retreat(index) 2567 kind = None 2568 method = None 2569 side = None 2570 2571 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2572 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2573 2574 if not skip_join_token and not join and not outer_apply and not cross_apply: 2575 return None 2576 2577 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2578 2579 if method: 2580 kwargs["method"] = method.text 2581 if side: 2582 kwargs["side"] = side.text 2583 if kind: 2584 kwargs["kind"] = kind.text 2585 if hint: 2586 kwargs["hint"] = hint 2587 2588 if self._match(TokenType.ON): 2589 kwargs["on"] = self._parse_conjunction() 2590 elif self._match(TokenType.USING): 2591 kwargs["using"] = self._parse_wrapped_id_vars() 2592 elif not (kind and kind.token_type == TokenType.CROSS): 2593 index = self._index 2594 join = self._parse_join() 2595 2596 if join and self._match(TokenType.ON): 2597 kwargs["on"] = self._parse_conjunction() 2598 elif join and self._match(TokenType.USING): 2599 kwargs["using"] = self._parse_wrapped_id_vars() 2600 else: 2601 join = None 2602 self._retreat(index) 2603 2604 kwargs["this"].set("joins", [join] if join else None) 2605 2606 comments = [c for token in (method, side, kind) if token for c in token.comments] 2607 return self.expression(exp.Join, comments=comments, **kwargs) 2608 2609 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2610 this = self._parse_conjunction() 2611 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2612 return this 2613 2614 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2615 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2616 2617 return this 2618 2619 def _parse_index( 2620 self, 2621 index: t.Optional[exp.Expression] = None, 2622 ) -> t.Optional[exp.Index]: 2623 if index: 2624 unique = None 2625 primary = None 2626 amp = None 2627 2628 self._match(TokenType.ON) 2629 self._match(TokenType.TABLE) # hive 2630 table = self._parse_table_parts(schema=True) 2631 else: 2632 unique = self._match(TokenType.UNIQUE) 2633 primary = self._match_text_seq("PRIMARY") 2634 amp = self._match_text_seq("AMP") 2635 2636 if not self._match(TokenType.INDEX): 2637 return None 2638 2639 index = self._parse_id_var() 2640 table = None 2641 2642 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2643 2644 if self._match(TokenType.L_PAREN, advance=False): 2645 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2646 else: 2647 columns = None 2648 2649 return self.expression( 2650 exp.Index, 2651 this=index, 2652 table=table, 2653 using=using, 2654 columns=columns, 2655 unique=unique, 2656 primary=primary, 2657 amp=amp, 2658 partition_by=self._parse_partition_by(), 2659 where=self._parse_where(), 2660 ) 2661 2662 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2663 hints: t.List[exp.Expression] = [] 2664 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2665 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2666 hints.append( 2667 self.expression( 2668 exp.WithTableHint, 2669 expressions=self._parse_csv( 2670 lambda: self._parse_function() or self._parse_var(any_token=True) 2671 ), 2672 ) 2673 ) 2674 self._match_r_paren() 2675 else: 2676 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2677 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2678 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2679 2680 self._match_texts(("INDEX", "KEY")) 2681 if self._match(TokenType.FOR): 2682 hint.set("target", self._advance_any() and self._prev.text.upper()) 2683 2684 hint.set("expressions", self._parse_wrapped_id_vars()) 2685 hints.append(hint) 2686 2687 return hints or None 2688 2689 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2690 return ( 2691 (not schema and self._parse_function(optional_parens=False)) 2692 or self._parse_id_var(any_token=False) 2693 or self._parse_string_as_identifier() 2694 or self._parse_placeholder() 2695 ) 2696 2697 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2698 catalog = None 2699 db = None 2700 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2701 2702 while self._match(TokenType.DOT): 2703 if catalog: 2704 # This allows nesting the table in arbitrarily many dot expressions if needed 2705 table = self.expression( 2706 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2707 ) 2708 else: 2709 catalog = db 2710 db = table 2711 table = self._parse_table_part(schema=schema) or "" 2712 2713 if not table: 2714 self.raise_error(f"Expected table name but got {self._curr}") 2715 2716 return self.expression( 2717 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2718 ) 2719 2720 def _parse_table( 2721 self, 2722 schema: bool = False, 2723 joins: bool = False, 2724 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2725 parse_bracket: bool = False, 2726 ) -> t.Optional[exp.Expression]: 2727 lateral = self._parse_lateral() 2728 if lateral: 2729 return lateral 2730 2731 unnest = self._parse_unnest() 2732 if unnest: 2733 return unnest 2734 2735 values = self._parse_derived_table_values() 2736 if values: 2737 return values 2738 2739 subquery = self._parse_select(table=True) 2740 if subquery: 2741 if not subquery.args.get("pivots"): 2742 subquery.set("pivots", self._parse_pivots()) 2743 return subquery 2744 2745 bracket = parse_bracket and self._parse_bracket(None) 2746 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2747 this = t.cast( 2748 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2749 ) 2750 2751 if schema: 2752 return self._parse_schema(this=this) 2753 2754 version = self._parse_version() 2755 2756 if version: 2757 this.set("version", version) 2758 2759 if self.dialect.ALIAS_POST_TABLESAMPLE: 2760 table_sample = self._parse_table_sample() 2761 2762 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2763 if alias: 2764 this.set("alias", alias) 2765 2766 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2767 return self.expression( 2768 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2769 ) 2770 2771 this.set("hints", self._parse_table_hints()) 2772 2773 if not this.args.get("pivots"): 2774 this.set("pivots", self._parse_pivots()) 2775 2776 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2777 table_sample = self._parse_table_sample() 2778 2779 if table_sample: 2780 table_sample.set("this", this) 2781 this = table_sample 2782 2783 if joins: 2784 for join in iter(self._parse_join, None): 2785 this.append("joins", join) 2786 2787 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2788 this.set("ordinality", True) 2789 this.set("alias", self._parse_table_alias()) 2790 2791 return this 2792 2793 def _parse_version(self) -> t.Optional[exp.Version]: 2794 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2795 this = "TIMESTAMP" 2796 elif self._match(TokenType.VERSION_SNAPSHOT): 2797 this = "VERSION" 2798 else: 2799 return None 2800 2801 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2802 kind = self._prev.text.upper() 2803 start = self._parse_bitwise() 2804 self._match_texts(("TO", "AND")) 2805 end = self._parse_bitwise() 2806 expression: t.Optional[exp.Expression] = self.expression( 2807 exp.Tuple, expressions=[start, end] 2808 ) 2809 elif self._match_text_seq("CONTAINED", "IN"): 2810 kind = "CONTAINED IN" 2811 expression = self.expression( 2812 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2813 ) 2814 elif self._match(TokenType.ALL): 2815 kind = "ALL" 2816 expression = None 2817 else: 2818 self._match_text_seq("AS", "OF") 2819 kind = "AS OF" 2820 expression = self._parse_type() 2821 2822 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2823 2824 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2825 if not self._match(TokenType.UNNEST): 2826 return None 2827 2828 expressions = self._parse_wrapped_csv(self._parse_equality) 2829 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2830 2831 alias = self._parse_table_alias() if with_alias else None 2832 2833 if alias: 2834 if self.dialect.UNNEST_COLUMN_ONLY: 2835 if alias.args.get("columns"): 2836 self.raise_error("Unexpected extra column alias in unnest.") 2837 2838 alias.set("columns", [alias.this]) 2839 alias.set("this", None) 2840 2841 columns = alias.args.get("columns") or [] 2842 if offset and len(expressions) < len(columns): 2843 offset = columns.pop() 2844 2845 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2846 self._match(TokenType.ALIAS) 2847 offset = self._parse_id_var( 2848 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2849 ) or exp.to_identifier("offset") 2850 2851 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2852 2853 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2854 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2855 if not is_derived and not self._match(TokenType.VALUES): 2856 return None 2857 2858 expressions = self._parse_csv(self._parse_value) 2859 alias = self._parse_table_alias() 2860 2861 if is_derived: 2862 self._match_r_paren() 2863 2864 return self.expression( 2865 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2866 ) 2867 2868 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2869 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2870 as_modifier and self._match_text_seq("USING", "SAMPLE") 2871 ): 2872 return None 2873 2874 bucket_numerator = None 2875 bucket_denominator = None 2876 bucket_field = None 2877 percent = None 2878 size = None 2879 seed = None 2880 2881 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2882 matched_l_paren = self._match(TokenType.L_PAREN) 2883 2884 if self.TABLESAMPLE_CSV: 2885 num = None 2886 expressions = self._parse_csv(self._parse_primary) 2887 else: 2888 expressions = None 2889 num = ( 2890 self._parse_factor() 2891 if self._match(TokenType.NUMBER, advance=False) 2892 else self._parse_primary() or self._parse_placeholder() 2893 ) 2894 2895 if self._match_text_seq("BUCKET"): 2896 bucket_numerator = self._parse_number() 2897 self._match_text_seq("OUT", "OF") 2898 bucket_denominator = bucket_denominator = self._parse_number() 2899 self._match(TokenType.ON) 2900 bucket_field = self._parse_field() 2901 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2902 percent = num 2903 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2904 size = num 2905 else: 2906 percent = num 2907 2908 if matched_l_paren: 2909 self._match_r_paren() 2910 2911 if self._match(TokenType.L_PAREN): 2912 method = self._parse_var(upper=True) 2913 seed = self._match(TokenType.COMMA) and self._parse_number() 2914 self._match_r_paren() 2915 elif self._match_texts(("SEED", "REPEATABLE")): 2916 seed = self._parse_wrapped(self._parse_number) 2917 2918 return self.expression( 2919 exp.TableSample, 2920 expressions=expressions, 2921 method=method, 2922 bucket_numerator=bucket_numerator, 2923 bucket_denominator=bucket_denominator, 2924 bucket_field=bucket_field, 2925 percent=percent, 2926 size=size, 2927 seed=seed, 2928 ) 2929 2930 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2931 return list(iter(self._parse_pivot, None)) or None 2932 2933 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2934 return list(iter(self._parse_join, None)) or None 2935 2936 # https://duckdb.org/docs/sql/statements/pivot 2937 def _parse_simplified_pivot(self) -> exp.Pivot: 2938 def _parse_on() -> t.Optional[exp.Expression]: 2939 this = self._parse_bitwise() 2940 return self._parse_in(this) if self._match(TokenType.IN) else this 2941 2942 this = self._parse_table() 2943 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2944 using = self._match(TokenType.USING) and self._parse_csv( 2945 lambda: self._parse_alias(self._parse_function()) 2946 ) 2947 group = self._parse_group() 2948 return self.expression( 2949 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2950 ) 2951 2952 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2953 index = self._index 2954 include_nulls = None 2955 2956 if self._match(TokenType.PIVOT): 2957 unpivot = False 2958 elif self._match(TokenType.UNPIVOT): 2959 unpivot = True 2960 2961 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2962 if self._match_text_seq("INCLUDE", "NULLS"): 2963 include_nulls = True 2964 elif self._match_text_seq("EXCLUDE", "NULLS"): 2965 include_nulls = False 2966 else: 2967 return None 2968 2969 expressions = [] 2970 field = None 2971 2972 if not self._match(TokenType.L_PAREN): 2973 self._retreat(index) 2974 return None 2975 2976 if unpivot: 2977 expressions = self._parse_csv(self._parse_column) 2978 else: 2979 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2980 2981 if not expressions: 2982 self.raise_error("Failed to parse PIVOT's aggregation list") 2983 2984 if not self._match(TokenType.FOR): 2985 self.raise_error("Expecting FOR") 2986 2987 value = self._parse_column() 2988 2989 if not self._match(TokenType.IN): 2990 self.raise_error("Expecting IN") 2991 2992 field = self._parse_in(value, alias=True) 2993 2994 self._match_r_paren() 2995 2996 pivot = self.expression( 2997 exp.Pivot, 2998 expressions=expressions, 2999 field=field, 3000 unpivot=unpivot, 3001 include_nulls=include_nulls, 3002 ) 3003 3004 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3005 pivot.set("alias", self._parse_table_alias()) 3006 3007 if not unpivot: 3008 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3009 3010 columns: t.List[exp.Expression] = [] 3011 for fld in pivot.args["field"].expressions: 3012 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3013 for name in names: 3014 if self.PREFIXED_PIVOT_COLUMNS: 3015 name = f"{name}_{field_name}" if name else field_name 3016 else: 3017 name = f"{field_name}_{name}" if name else field_name 3018 3019 columns.append(exp.to_identifier(name)) 3020 3021 pivot.set("columns", columns) 3022 3023 return pivot 3024 3025 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3026 return [agg.alias for agg in aggregations] 3027 3028 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3029 if not skip_where_token and not self._match(TokenType.WHERE): 3030 return None 3031 3032 return self.expression( 3033 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3034 ) 3035 3036 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3037 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3038 return None 3039 3040 elements = defaultdict(list) 3041 3042 if self._match(TokenType.ALL): 3043 return self.expression(exp.Group, all=True) 3044 3045 while True: 3046 expressions = self._parse_csv(self._parse_conjunction) 3047 if expressions: 3048 elements["expressions"].extend(expressions) 3049 3050 grouping_sets = self._parse_grouping_sets() 3051 if grouping_sets: 3052 elements["grouping_sets"].extend(grouping_sets) 3053 3054 rollup = None 3055 cube = None 3056 totals = None 3057 3058 index = self._index 3059 with_ = self._match(TokenType.WITH) 3060 if self._match(TokenType.ROLLUP): 3061 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3062 elements["rollup"].extend(ensure_list(rollup)) 3063 3064 if self._match(TokenType.CUBE): 3065 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3066 elements["cube"].extend(ensure_list(cube)) 3067 3068 if self._match_text_seq("TOTALS"): 3069 totals = True 3070 elements["totals"] = True # type: ignore 3071 3072 if not (grouping_sets or rollup or cube or totals): 3073 if with_: 3074 self._retreat(index) 3075 break 3076 3077 return self.expression(exp.Group, **elements) # type: ignore 3078 3079 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3080 if not self._match(TokenType.GROUPING_SETS): 3081 return None 3082 3083 return self._parse_wrapped_csv(self._parse_grouping_set) 3084 3085 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3086 if self._match(TokenType.L_PAREN): 3087 grouping_set = self._parse_csv(self._parse_column) 3088 self._match_r_paren() 3089 return self.expression(exp.Tuple, expressions=grouping_set) 3090 3091 return self._parse_column() 3092 3093 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3094 if not skip_having_token and not self._match(TokenType.HAVING): 3095 return None 3096 return self.expression(exp.Having, this=self._parse_conjunction()) 3097 3098 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3099 if not self._match(TokenType.QUALIFY): 3100 return None 3101 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3102 3103 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3104 if skip_start_token: 3105 start = None 3106 elif self._match(TokenType.START_WITH): 3107 start = self._parse_conjunction() 3108 else: 3109 return None 3110 3111 self._match(TokenType.CONNECT_BY) 3112 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3113 exp.Prior, this=self._parse_bitwise() 3114 ) 3115 connect = self._parse_conjunction() 3116 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3117 3118 if not start and self._match(TokenType.START_WITH): 3119 start = self._parse_conjunction() 3120 3121 return self.expression(exp.Connect, start=start, connect=connect) 3122 3123 def _parse_name_as_expression(self) -> exp.Alias: 3124 return self.expression( 3125 exp.Alias, 3126 alias=self._parse_id_var(any_token=True), 3127 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3128 ) 3129 3130 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3131 if self._match_text_seq("INTERPOLATE"): 3132 return self._parse_wrapped_csv(self._parse_name_as_expression) 3133 return None 3134 3135 def _parse_order( 3136 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3137 ) -> t.Optional[exp.Expression]: 3138 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3139 return this 3140 3141 return self.expression( 3142 exp.Order, 3143 this=this, 3144 expressions=self._parse_csv(self._parse_ordered), 3145 interpolate=self._parse_interpolate(), 3146 ) 3147 3148 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3149 if not self._match(token): 3150 return None 3151 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3152 3153 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3154 this = parse_method() if parse_method else self._parse_conjunction() 3155 3156 asc = self._match(TokenType.ASC) 3157 desc = self._match(TokenType.DESC) or (asc and False) 3158 3159 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3160 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3161 3162 nulls_first = is_nulls_first or False 3163 explicitly_null_ordered = is_nulls_first or is_nulls_last 3164 3165 if ( 3166 not explicitly_null_ordered 3167 and ( 3168 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3169 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3170 ) 3171 and self.dialect.NULL_ORDERING != "nulls_are_last" 3172 ): 3173 nulls_first = True 3174 3175 if self._match_text_seq("WITH", "FILL"): 3176 with_fill = self.expression( 3177 exp.WithFill, 3178 **{ # type: ignore 3179 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3180 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3181 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3182 }, 3183 ) 3184 else: 3185 with_fill = None 3186 3187 return self.expression( 3188 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3189 ) 3190 3191 def _parse_limit( 3192 self, this: t.Optional[exp.Expression] = None, top: bool = False 3193 ) -> t.Optional[exp.Expression]: 3194 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3195 comments = self._prev_comments 3196 if top: 3197 limit_paren = self._match(TokenType.L_PAREN) 3198 expression = self._parse_term() if limit_paren else self._parse_number() 3199 3200 if limit_paren: 3201 self._match_r_paren() 3202 else: 3203 expression = self._parse_term() 3204 3205 if self._match(TokenType.COMMA): 3206 offset = expression 3207 expression = self._parse_term() 3208 else: 3209 offset = None 3210 3211 limit_exp = self.expression( 3212 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3213 ) 3214 3215 return limit_exp 3216 3217 if self._match(TokenType.FETCH): 3218 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3219 direction = self._prev.text.upper() if direction else "FIRST" 3220 3221 count = self._parse_field(tokens=self.FETCH_TOKENS) 3222 percent = self._match(TokenType.PERCENT) 3223 3224 self._match_set((TokenType.ROW, TokenType.ROWS)) 3225 3226 only = self._match_text_seq("ONLY") 3227 with_ties = self._match_text_seq("WITH", "TIES") 3228 3229 if only and with_ties: 3230 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3231 3232 return self.expression( 3233 exp.Fetch, 3234 direction=direction, 3235 count=count, 3236 percent=percent, 3237 with_ties=with_ties, 3238 ) 3239 3240 return this 3241 3242 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3243 if not self._match(TokenType.OFFSET): 3244 return this 3245 3246 count = self._parse_term() 3247 self._match_set((TokenType.ROW, TokenType.ROWS)) 3248 return self.expression(exp.Offset, this=this, expression=count) 3249 3250 def _parse_locks(self) -> t.List[exp.Lock]: 3251 locks = [] 3252 while True: 3253 if self._match_text_seq("FOR", "UPDATE"): 3254 update = True 3255 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3256 "LOCK", "IN", "SHARE", "MODE" 3257 ): 3258 update = False 3259 else: 3260 break 3261 3262 expressions = None 3263 if self._match_text_seq("OF"): 3264 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3265 3266 wait: t.Optional[bool | exp.Expression] = None 3267 if self._match_text_seq("NOWAIT"): 3268 wait = True 3269 elif self._match_text_seq("WAIT"): 3270 wait = self._parse_primary() 3271 elif self._match_text_seq("SKIP", "LOCKED"): 3272 wait = False 3273 3274 locks.append( 3275 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3276 ) 3277 3278 return locks 3279 3280 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3281 while this and self._match_set(self.SET_OPERATIONS): 3282 token_type = self._prev.token_type 3283 3284 if token_type == TokenType.UNION: 3285 operation = exp.Union 3286 elif token_type == TokenType.EXCEPT: 3287 operation = exp.Except 3288 else: 3289 operation = exp.Intersect 3290 3291 comments = self._prev.comments 3292 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3293 by_name = self._match_text_seq("BY", "NAME") 3294 expression = self._parse_select(nested=True, parse_set_operation=False) 3295 3296 this = self.expression( 3297 operation, 3298 comments=comments, 3299 this=this, 3300 distinct=distinct, 3301 by_name=by_name, 3302 expression=expression, 3303 ) 3304 3305 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3306 expression = this.expression 3307 3308 if expression: 3309 for arg in self.UNION_MODIFIERS: 3310 expr = expression.args.get(arg) 3311 if expr: 3312 this.set(arg, expr.pop()) 3313 3314 return this 3315 3316 def _parse_expression(self) -> t.Optional[exp.Expression]: 3317 return self._parse_alias(self._parse_conjunction()) 3318 3319 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3320 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3321 3322 def _parse_equality(self) -> t.Optional[exp.Expression]: 3323 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3324 3325 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3326 return self._parse_tokens(self._parse_range, self.COMPARISON) 3327 3328 def _parse_range(self) -> t.Optional[exp.Expression]: 3329 this = self._parse_bitwise() 3330 negate = self._match(TokenType.NOT) 3331 3332 if self._match_set(self.RANGE_PARSERS): 3333 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3334 if not expression: 3335 return this 3336 3337 this = expression 3338 elif self._match(TokenType.ISNULL): 3339 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3340 3341 # Postgres supports ISNULL and NOTNULL for conditions. 3342 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3343 if self._match(TokenType.NOTNULL): 3344 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3345 this = self.expression(exp.Not, this=this) 3346 3347 if negate: 3348 this = self.expression(exp.Not, this=this) 3349 3350 if self._match(TokenType.IS): 3351 this = self._parse_is(this) 3352 3353 return this 3354 3355 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3356 index = self._index - 1 3357 negate = self._match(TokenType.NOT) 3358 3359 if self._match_text_seq("DISTINCT", "FROM"): 3360 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3361 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3362 3363 expression = self._parse_null() or self._parse_boolean() 3364 if not expression: 3365 self._retreat(index) 3366 return None 3367 3368 this = self.expression(exp.Is, this=this, expression=expression) 3369 return self.expression(exp.Not, this=this) if negate else this 3370 3371 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3372 unnest = self._parse_unnest(with_alias=False) 3373 if unnest: 3374 this = self.expression(exp.In, this=this, unnest=unnest) 3375 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3376 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3377 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3378 3379 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3380 this = self.expression(exp.In, this=this, query=expressions[0]) 3381 else: 3382 this = self.expression(exp.In, this=this, expressions=expressions) 3383 3384 if matched_l_paren: 3385 self._match_r_paren(this) 3386 elif not self._match(TokenType.R_BRACKET, expression=this): 3387 self.raise_error("Expecting ]") 3388 else: 3389 this = self.expression(exp.In, this=this, field=self._parse_field()) 3390 3391 return this 3392 3393 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3394 low = self._parse_bitwise() 3395 self._match(TokenType.AND) 3396 high = self._parse_bitwise() 3397 return self.expression(exp.Between, this=this, low=low, high=high) 3398 3399 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3400 if not self._match(TokenType.ESCAPE): 3401 return this 3402 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3403 3404 def _parse_interval(self) -> t.Optional[exp.Interval]: 3405 index = self._index 3406 3407 if not self._match(TokenType.INTERVAL): 3408 return None 3409 3410 if self._match(TokenType.STRING, advance=False): 3411 this = self._parse_primary() 3412 else: 3413 this = self._parse_term() 3414 3415 if not this or ( 3416 isinstance(this, exp.Column) 3417 and not this.table 3418 and not this.this.quoted 3419 and this.name.upper() == "IS" 3420 ): 3421 self._retreat(index) 3422 return None 3423 3424 unit = self._parse_function() or self._parse_var(any_token=True, upper=True) 3425 3426 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3427 # each INTERVAL expression into this canonical form so it's easy to transpile 3428 if this and this.is_number: 3429 this = exp.Literal.string(this.name) 3430 elif this and this.is_string: 3431 parts = this.name.split() 3432 3433 if len(parts) == 2: 3434 if unit: 3435 # This is not actually a unit, it's something else (e.g. a "window side") 3436 unit = None 3437 self._retreat(self._index - 1) 3438 3439 this = exp.Literal.string(parts[0]) 3440 unit = self.expression(exp.Var, this=parts[1].upper()) 3441 3442 return self.expression(exp.Interval, this=this, unit=unit) 3443 3444 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3445 this = self._parse_term() 3446 3447 while True: 3448 if self._match_set(self.BITWISE): 3449 this = self.expression( 3450 self.BITWISE[self._prev.token_type], 3451 this=this, 3452 expression=self._parse_term(), 3453 ) 3454 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3455 this = self.expression( 3456 exp.DPipe, 3457 this=this, 3458 expression=self._parse_term(), 3459 safe=not self.dialect.STRICT_STRING_CONCAT, 3460 ) 3461 elif self._match(TokenType.DQMARK): 3462 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3463 elif self._match_pair(TokenType.LT, TokenType.LT): 3464 this = self.expression( 3465 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3466 ) 3467 elif self._match_pair(TokenType.GT, TokenType.GT): 3468 this = self.expression( 3469 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3470 ) 3471 else: 3472 break 3473 3474 return this 3475 3476 def _parse_term(self) -> t.Optional[exp.Expression]: 3477 return self._parse_tokens(self._parse_factor, self.TERM) 3478 3479 def _parse_factor(self) -> t.Optional[exp.Expression]: 3480 if self.EXPONENT: 3481 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3482 else: 3483 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3484 if isinstance(factor, exp.Div): 3485 factor.args["typed"] = self.dialect.TYPED_DIVISION 3486 factor.args["safe"] = self.dialect.SAFE_DIVISION 3487 return factor 3488 3489 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3490 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3491 3492 def _parse_unary(self) -> t.Optional[exp.Expression]: 3493 if self._match_set(self.UNARY_PARSERS): 3494 return self.UNARY_PARSERS[self._prev.token_type](self) 3495 return self._parse_at_time_zone(self._parse_type()) 3496 3497 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3498 interval = parse_interval and self._parse_interval() 3499 if interval: 3500 return interval 3501 3502 index = self._index 3503 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3504 this = self._parse_column() 3505 3506 if data_type: 3507 if isinstance(this, exp.Literal): 3508 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3509 if parser: 3510 return parser(self, this, data_type) 3511 return self.expression(exp.Cast, this=this, to=data_type) 3512 if not data_type.expressions: 3513 self._retreat(index) 3514 return self._parse_column() 3515 return self._parse_column_ops(data_type) 3516 3517 return this and self._parse_column_ops(this) 3518 3519 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3520 this = self._parse_type() 3521 if not this: 3522 return None 3523 3524 return self.expression( 3525 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3526 ) 3527 3528 def _parse_types( 3529 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3530 ) -> t.Optional[exp.Expression]: 3531 index = self._index 3532 3533 prefix = self._match_text_seq("SYSUDTLIB", ".") 3534 3535 if not self._match_set(self.TYPE_TOKENS): 3536 identifier = allow_identifiers and self._parse_id_var( 3537 any_token=False, tokens=(TokenType.VAR,) 3538 ) 3539 3540 if identifier: 3541 tokens = self.dialect.tokenize(identifier.name) 3542 3543 if len(tokens) != 1: 3544 self.raise_error("Unexpected identifier", self._prev) 3545 3546 if tokens[0].token_type in self.TYPE_TOKENS: 3547 self._prev = tokens[0] 3548 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3549 type_name = identifier.name 3550 3551 while self._match(TokenType.DOT): 3552 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3553 3554 return exp.DataType.build(type_name, udt=True) 3555 else: 3556 return None 3557 else: 3558 return None 3559 3560 type_token = self._prev.token_type 3561 3562 if type_token == TokenType.PSEUDO_TYPE: 3563 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3564 3565 if type_token == TokenType.OBJECT_IDENTIFIER: 3566 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3567 3568 nested = type_token in self.NESTED_TYPE_TOKENS 3569 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3570 expressions = None 3571 maybe_func = False 3572 3573 if self._match(TokenType.L_PAREN): 3574 if is_struct: 3575 expressions = self._parse_csv(self._parse_struct_types) 3576 elif nested: 3577 expressions = self._parse_csv( 3578 lambda: self._parse_types( 3579 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3580 ) 3581 ) 3582 elif type_token in self.ENUM_TYPE_TOKENS: 3583 expressions = self._parse_csv(self._parse_equality) 3584 else: 3585 expressions = self._parse_csv(self._parse_type_size) 3586 3587 if not expressions or not self._match(TokenType.R_PAREN): 3588 self._retreat(index) 3589 return None 3590 3591 maybe_func = True 3592 3593 this: t.Optional[exp.Expression] = None 3594 values: t.Optional[t.List[exp.Expression]] = None 3595 3596 if nested and self._match(TokenType.LT): 3597 if is_struct: 3598 expressions = self._parse_csv(self._parse_struct_types) 3599 else: 3600 expressions = self._parse_csv( 3601 lambda: self._parse_types( 3602 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3603 ) 3604 ) 3605 3606 if not self._match(TokenType.GT): 3607 self.raise_error("Expecting >") 3608 3609 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3610 values = self._parse_csv(self._parse_conjunction) 3611 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3612 3613 if type_token in self.TIMESTAMPS: 3614 if self._match_text_seq("WITH", "TIME", "ZONE"): 3615 maybe_func = False 3616 tz_type = ( 3617 exp.DataType.Type.TIMETZ 3618 if type_token in self.TIMES 3619 else exp.DataType.Type.TIMESTAMPTZ 3620 ) 3621 this = exp.DataType(this=tz_type, expressions=expressions) 3622 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3623 maybe_func = False 3624 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3625 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3626 maybe_func = False 3627 elif type_token == TokenType.INTERVAL: 3628 unit = self._parse_var() 3629 3630 if self._match_text_seq("TO"): 3631 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3632 else: 3633 span = None 3634 3635 if span or not unit: 3636 this = self.expression( 3637 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3638 ) 3639 else: 3640 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3641 3642 if maybe_func and check_func: 3643 index2 = self._index 3644 peek = self._parse_string() 3645 3646 if not peek: 3647 self._retreat(index) 3648 return None 3649 3650 self._retreat(index2) 3651 3652 if not this: 3653 if self._match_text_seq("UNSIGNED"): 3654 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3655 if not unsigned_type_token: 3656 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3657 3658 type_token = unsigned_type_token or type_token 3659 3660 this = exp.DataType( 3661 this=exp.DataType.Type[type_token.value], 3662 expressions=expressions, 3663 nested=nested, 3664 values=values, 3665 prefix=prefix, 3666 ) 3667 3668 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3669 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3670 3671 return this 3672 3673 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3674 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3675 self._match(TokenType.COLON) 3676 return self._parse_column_def(this) 3677 3678 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3679 if not self._match_text_seq("AT", "TIME", "ZONE"): 3680 return this 3681 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3682 3683 def _parse_column(self) -> t.Optional[exp.Expression]: 3684 this = self._parse_field() 3685 if isinstance(this, exp.Identifier): 3686 this = self.expression(exp.Column, this=this) 3687 elif not this: 3688 return self._parse_bracket(this) 3689 return self._parse_column_ops(this) 3690 3691 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3692 this = self._parse_bracket(this) 3693 3694 while self._match_set(self.COLUMN_OPERATORS): 3695 op_token = self._prev.token_type 3696 op = self.COLUMN_OPERATORS.get(op_token) 3697 3698 if op_token == TokenType.DCOLON: 3699 field = self._parse_types() 3700 if not field: 3701 self.raise_error("Expected type") 3702 elif op and self._curr: 3703 self._advance() 3704 value = self._prev.text 3705 field = ( 3706 exp.Literal.number(value) 3707 if self._prev.token_type == TokenType.NUMBER 3708 else exp.Literal.string(value) 3709 ) 3710 else: 3711 field = self._parse_field(anonymous_func=True, any_token=True) 3712 3713 if isinstance(field, exp.Func): 3714 # bigquery allows function calls like x.y.count(...) 3715 # SAFE.SUBSTR(...) 3716 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3717 this = self._replace_columns_with_dots(this) 3718 3719 if op: 3720 this = op(self, this, field) 3721 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3722 this = self.expression( 3723 exp.Column, 3724 this=field, 3725 table=this.this, 3726 db=this.args.get("table"), 3727 catalog=this.args.get("db"), 3728 ) 3729 else: 3730 this = self.expression(exp.Dot, this=this, expression=field) 3731 this = self._parse_bracket(this) 3732 return this 3733 3734 def _parse_primary(self) -> t.Optional[exp.Expression]: 3735 if self._match_set(self.PRIMARY_PARSERS): 3736 token_type = self._prev.token_type 3737 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3738 3739 if token_type == TokenType.STRING: 3740 expressions = [primary] 3741 while self._match(TokenType.STRING): 3742 expressions.append(exp.Literal.string(self._prev.text)) 3743 3744 if len(expressions) > 1: 3745 return self.expression(exp.Concat, expressions=expressions) 3746 3747 return primary 3748 3749 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3750 return exp.Literal.number(f"0.{self._prev.text}") 3751 3752 if self._match(TokenType.L_PAREN): 3753 comments = self._prev_comments 3754 query = self._parse_select() 3755 3756 if query: 3757 expressions = [query] 3758 else: 3759 expressions = self._parse_expressions() 3760 3761 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3762 3763 if isinstance(this, exp.Subqueryable): 3764 this = self._parse_set_operations( 3765 self._parse_subquery(this=this, parse_alias=False) 3766 ) 3767 elif len(expressions) > 1: 3768 this = self.expression(exp.Tuple, expressions=expressions) 3769 else: 3770 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3771 3772 if this: 3773 this.add_comments(comments) 3774 3775 self._match_r_paren(expression=this) 3776 return this 3777 3778 return None 3779 3780 def _parse_field( 3781 self, 3782 any_token: bool = False, 3783 tokens: t.Optional[t.Collection[TokenType]] = None, 3784 anonymous_func: bool = False, 3785 ) -> t.Optional[exp.Expression]: 3786 return ( 3787 self._parse_primary() 3788 or self._parse_function(anonymous=anonymous_func) 3789 or self._parse_id_var(any_token=any_token, tokens=tokens) 3790 ) 3791 3792 def _parse_function( 3793 self, 3794 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3795 anonymous: bool = False, 3796 optional_parens: bool = True, 3797 ) -> t.Optional[exp.Expression]: 3798 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3799 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3800 fn_syntax = False 3801 if ( 3802 self._match(TokenType.L_BRACE, advance=False) 3803 and self._next 3804 and self._next.text.upper() == "FN" 3805 ): 3806 self._advance(2) 3807 fn_syntax = True 3808 3809 func = self._parse_function_call( 3810 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3811 ) 3812 3813 if fn_syntax: 3814 self._match(TokenType.R_BRACE) 3815 3816 return func 3817 3818 def _parse_function_call( 3819 self, 3820 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3821 anonymous: bool = False, 3822 optional_parens: bool = True, 3823 ) -> t.Optional[exp.Expression]: 3824 if not self._curr: 3825 return None 3826 3827 comments = self._curr.comments 3828 token_type = self._curr.token_type 3829 this = self._curr.text 3830 upper = this.upper() 3831 3832 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3833 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3834 self._advance() 3835 return parser(self) 3836 3837 if not self._next or self._next.token_type != TokenType.L_PAREN: 3838 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3839 self._advance() 3840 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3841 3842 return None 3843 3844 if token_type not in self.FUNC_TOKENS: 3845 return None 3846 3847 self._advance(2) 3848 3849 parser = self.FUNCTION_PARSERS.get(upper) 3850 if parser and not anonymous: 3851 this = parser(self) 3852 else: 3853 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3854 3855 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3856 this = self.expression(subquery_predicate, this=self._parse_select()) 3857 self._match_r_paren() 3858 return this 3859 3860 if functions is None: 3861 functions = self.FUNCTIONS 3862 3863 function = functions.get(upper) 3864 3865 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3866 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3867 3868 if function and not anonymous: 3869 if "dialect" in function.__code__.co_varnames: 3870 func = function(args, dialect=self.dialect) 3871 else: 3872 func = function(args) 3873 3874 func = self.validate_expression(func, args) 3875 if not self.dialect.NORMALIZE_FUNCTIONS: 3876 func.meta["name"] = this 3877 3878 this = func 3879 else: 3880 this = self.expression(exp.Anonymous, this=this, expressions=args) 3881 3882 if isinstance(this, exp.Expression): 3883 this.add_comments(comments) 3884 3885 self._match_r_paren(this) 3886 return self._parse_window(this) 3887 3888 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3889 return self._parse_column_def(self._parse_id_var()) 3890 3891 def _parse_user_defined_function( 3892 self, kind: t.Optional[TokenType] = None 3893 ) -> t.Optional[exp.Expression]: 3894 this = self._parse_id_var() 3895 3896 while self._match(TokenType.DOT): 3897 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3898 3899 if not self._match(TokenType.L_PAREN): 3900 return this 3901 3902 expressions = self._parse_csv(self._parse_function_parameter) 3903 self._match_r_paren() 3904 return self.expression( 3905 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3906 ) 3907 3908 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3909 literal = self._parse_primary() 3910 if literal: 3911 return self.expression(exp.Introducer, this=token.text, expression=literal) 3912 3913 return self.expression(exp.Identifier, this=token.text) 3914 3915 def _parse_session_parameter(self) -> exp.SessionParameter: 3916 kind = None 3917 this = self._parse_id_var() or self._parse_primary() 3918 3919 if this and self._match(TokenType.DOT): 3920 kind = this.name 3921 this = self._parse_var() or self._parse_primary() 3922 3923 return self.expression(exp.SessionParameter, this=this, kind=kind) 3924 3925 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3926 index = self._index 3927 3928 if self._match(TokenType.L_PAREN): 3929 expressions = t.cast( 3930 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3931 ) 3932 3933 if not self._match(TokenType.R_PAREN): 3934 self._retreat(index) 3935 else: 3936 expressions = [self._parse_id_var()] 3937 3938 if self._match_set(self.LAMBDAS): 3939 return self.LAMBDAS[self._prev.token_type](self, expressions) 3940 3941 self._retreat(index) 3942 3943 this: t.Optional[exp.Expression] 3944 3945 if self._match(TokenType.DISTINCT): 3946 this = self.expression( 3947 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3948 ) 3949 else: 3950 this = self._parse_select_or_expression(alias=alias) 3951 3952 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3953 3954 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3955 index = self._index 3956 3957 if not self.errors: 3958 try: 3959 if self._parse_select(nested=True): 3960 return this 3961 except ParseError: 3962 pass 3963 finally: 3964 self.errors.clear() 3965 self._retreat(index) 3966 3967 if not self._match(TokenType.L_PAREN): 3968 return this 3969 3970 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3971 3972 self._match_r_paren() 3973 return self.expression(exp.Schema, this=this, expressions=args) 3974 3975 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3976 return self._parse_column_def(self._parse_field(any_token=True)) 3977 3978 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3979 # column defs are not really columns, they're identifiers 3980 if isinstance(this, exp.Column): 3981 this = this.this 3982 3983 kind = self._parse_types(schema=True) 3984 3985 if self._match_text_seq("FOR", "ORDINALITY"): 3986 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3987 3988 constraints: t.List[exp.Expression] = [] 3989 3990 if not kind and self._match(TokenType.ALIAS): 3991 constraints.append( 3992 self.expression( 3993 exp.ComputedColumnConstraint, 3994 this=self._parse_conjunction(), 3995 persisted=self._match_text_seq("PERSISTED"), 3996 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3997 ) 3998 ) 3999 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4000 self._match(TokenType.ALIAS) 4001 constraints.append( 4002 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4003 ) 4004 4005 while True: 4006 constraint = self._parse_column_constraint() 4007 if not constraint: 4008 break 4009 constraints.append(constraint) 4010 4011 if not kind and not constraints: 4012 return this 4013 4014 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4015 4016 def _parse_auto_increment( 4017 self, 4018 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4019 start = None 4020 increment = None 4021 4022 if self._match(TokenType.L_PAREN, advance=False): 4023 args = self._parse_wrapped_csv(self._parse_bitwise) 4024 start = seq_get(args, 0) 4025 increment = seq_get(args, 1) 4026 elif self._match_text_seq("START"): 4027 start = self._parse_bitwise() 4028 self._match_text_seq("INCREMENT") 4029 increment = self._parse_bitwise() 4030 4031 if start and increment: 4032 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4033 4034 return exp.AutoIncrementColumnConstraint() 4035 4036 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4037 if not self._match_text_seq("REFRESH"): 4038 self._retreat(self._index - 1) 4039 return None 4040 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4041 4042 def _parse_compress(self) -> exp.CompressColumnConstraint: 4043 if self._match(TokenType.L_PAREN, advance=False): 4044 return self.expression( 4045 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4046 ) 4047 4048 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4049 4050 def _parse_generated_as_identity( 4051 self, 4052 ) -> ( 4053 exp.GeneratedAsIdentityColumnConstraint 4054 | exp.ComputedColumnConstraint 4055 | exp.GeneratedAsRowColumnConstraint 4056 ): 4057 if self._match_text_seq("BY", "DEFAULT"): 4058 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4059 this = self.expression( 4060 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4061 ) 4062 else: 4063 self._match_text_seq("ALWAYS") 4064 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4065 4066 self._match(TokenType.ALIAS) 4067 4068 if self._match_text_seq("ROW"): 4069 start = self._match_text_seq("START") 4070 if not start: 4071 self._match(TokenType.END) 4072 hidden = self._match_text_seq("HIDDEN") 4073 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4074 4075 identity = self._match_text_seq("IDENTITY") 4076 4077 if self._match(TokenType.L_PAREN): 4078 if self._match(TokenType.START_WITH): 4079 this.set("start", self._parse_bitwise()) 4080 if self._match_text_seq("INCREMENT", "BY"): 4081 this.set("increment", self._parse_bitwise()) 4082 if self._match_text_seq("MINVALUE"): 4083 this.set("minvalue", self._parse_bitwise()) 4084 if self._match_text_seq("MAXVALUE"): 4085 this.set("maxvalue", self._parse_bitwise()) 4086 4087 if self._match_text_seq("CYCLE"): 4088 this.set("cycle", True) 4089 elif self._match_text_seq("NO", "CYCLE"): 4090 this.set("cycle", False) 4091 4092 if not identity: 4093 this.set("expression", self._parse_bitwise()) 4094 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4095 args = self._parse_csv(self._parse_bitwise) 4096 this.set("start", seq_get(args, 0)) 4097 this.set("increment", seq_get(args, 1)) 4098 4099 self._match_r_paren() 4100 4101 return this 4102 4103 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4104 self._match_text_seq("LENGTH") 4105 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4106 4107 def _parse_not_constraint( 4108 self, 4109 ) -> t.Optional[exp.Expression]: 4110 if self._match_text_seq("NULL"): 4111 return self.expression(exp.NotNullColumnConstraint) 4112 if self._match_text_seq("CASESPECIFIC"): 4113 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4114 if self._match_text_seq("FOR", "REPLICATION"): 4115 return self.expression(exp.NotForReplicationColumnConstraint) 4116 return None 4117 4118 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4119 if self._match(TokenType.CONSTRAINT): 4120 this = self._parse_id_var() 4121 else: 4122 this = None 4123 4124 if self._match_texts(self.CONSTRAINT_PARSERS): 4125 return self.expression( 4126 exp.ColumnConstraint, 4127 this=this, 4128 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4129 ) 4130 4131 return this 4132 4133 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4134 if not self._match(TokenType.CONSTRAINT): 4135 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4136 4137 this = self._parse_id_var() 4138 expressions = [] 4139 4140 while True: 4141 constraint = self._parse_unnamed_constraint() or self._parse_function() 4142 if not constraint: 4143 break 4144 expressions.append(constraint) 4145 4146 return self.expression(exp.Constraint, this=this, expressions=expressions) 4147 4148 def _parse_unnamed_constraint( 4149 self, constraints: t.Optional[t.Collection[str]] = None 4150 ) -> t.Optional[exp.Expression]: 4151 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4152 constraints or self.CONSTRAINT_PARSERS 4153 ): 4154 return None 4155 4156 constraint = self._prev.text.upper() 4157 if constraint not in self.CONSTRAINT_PARSERS: 4158 self.raise_error(f"No parser found for schema constraint {constraint}.") 4159 4160 return self.CONSTRAINT_PARSERS[constraint](self) 4161 4162 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4163 self._match_text_seq("KEY") 4164 return self.expression( 4165 exp.UniqueColumnConstraint, 4166 this=self._parse_schema(self._parse_id_var(any_token=False)), 4167 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4168 ) 4169 4170 def _parse_key_constraint_options(self) -> t.List[str]: 4171 options = [] 4172 while True: 4173 if not self._curr: 4174 break 4175 4176 if self._match(TokenType.ON): 4177 action = None 4178 on = self._advance_any() and self._prev.text 4179 4180 if self._match_text_seq("NO", "ACTION"): 4181 action = "NO ACTION" 4182 elif self._match_text_seq("CASCADE"): 4183 action = "CASCADE" 4184 elif self._match_text_seq("RESTRICT"): 4185 action = "RESTRICT" 4186 elif self._match_pair(TokenType.SET, TokenType.NULL): 4187 action = "SET NULL" 4188 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4189 action = "SET DEFAULT" 4190 else: 4191 self.raise_error("Invalid key constraint") 4192 4193 options.append(f"ON {on} {action}") 4194 elif self._match_text_seq("NOT", "ENFORCED"): 4195 options.append("NOT ENFORCED") 4196 elif self._match_text_seq("DEFERRABLE"): 4197 options.append("DEFERRABLE") 4198 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4199 options.append("INITIALLY DEFERRED") 4200 elif self._match_text_seq("NORELY"): 4201 options.append("NORELY") 4202 elif self._match_text_seq("MATCH", "FULL"): 4203 options.append("MATCH FULL") 4204 else: 4205 break 4206 4207 return options 4208 4209 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4210 if match and not self._match(TokenType.REFERENCES): 4211 return None 4212 4213 expressions = None 4214 this = self._parse_table(schema=True) 4215 options = self._parse_key_constraint_options() 4216 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4217 4218 def _parse_foreign_key(self) -> exp.ForeignKey: 4219 expressions = self._parse_wrapped_id_vars() 4220 reference = self._parse_references() 4221 options = {} 4222 4223 while self._match(TokenType.ON): 4224 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4225 self.raise_error("Expected DELETE or UPDATE") 4226 4227 kind = self._prev.text.lower() 4228 4229 if self._match_text_seq("NO", "ACTION"): 4230 action = "NO ACTION" 4231 elif self._match(TokenType.SET): 4232 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4233 action = "SET " + self._prev.text.upper() 4234 else: 4235 self._advance() 4236 action = self._prev.text.upper() 4237 4238 options[kind] = action 4239 4240 return self.expression( 4241 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4242 ) 4243 4244 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4245 return self._parse_field() 4246 4247 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4248 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4249 4250 id_vars = self._parse_wrapped_id_vars() 4251 return self.expression( 4252 exp.PeriodForSystemTimeConstraint, 4253 this=seq_get(id_vars, 0), 4254 expression=seq_get(id_vars, 1), 4255 ) 4256 4257 def _parse_primary_key( 4258 self, wrapped_optional: bool = False, in_props: bool = False 4259 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4260 desc = ( 4261 self._match_set((TokenType.ASC, TokenType.DESC)) 4262 and self._prev.token_type == TokenType.DESC 4263 ) 4264 4265 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4266 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4267 4268 expressions = self._parse_wrapped_csv( 4269 self._parse_primary_key_part, optional=wrapped_optional 4270 ) 4271 options = self._parse_key_constraint_options() 4272 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4273 4274 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4275 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4276 4277 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4278 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4279 return this 4280 4281 bracket_kind = self._prev.token_type 4282 expressions = self._parse_csv( 4283 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4284 ) 4285 4286 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4287 self.raise_error("Expected ]") 4288 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4289 self.raise_error("Expected }") 4290 4291 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4292 if bracket_kind == TokenType.L_BRACE: 4293 this = self.expression(exp.Struct, expressions=expressions) 4294 elif not this or this.name.upper() == "ARRAY": 4295 this = self.expression(exp.Array, expressions=expressions) 4296 else: 4297 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4298 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4299 4300 self._add_comments(this) 4301 return self._parse_bracket(this) 4302 4303 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4304 if self._match(TokenType.COLON): 4305 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4306 return this 4307 4308 def _parse_case(self) -> t.Optional[exp.Expression]: 4309 ifs = [] 4310 default = None 4311 4312 comments = self._prev_comments 4313 expression = self._parse_conjunction() 4314 4315 while self._match(TokenType.WHEN): 4316 this = self._parse_conjunction() 4317 self._match(TokenType.THEN) 4318 then = self._parse_conjunction() 4319 ifs.append(self.expression(exp.If, this=this, true=then)) 4320 4321 if self._match(TokenType.ELSE): 4322 default = self._parse_conjunction() 4323 4324 if not self._match(TokenType.END): 4325 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4326 default = exp.column("interval") 4327 else: 4328 self.raise_error("Expected END after CASE", self._prev) 4329 4330 return self._parse_window( 4331 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4332 ) 4333 4334 def _parse_if(self) -> t.Optional[exp.Expression]: 4335 if self._match(TokenType.L_PAREN): 4336 args = self._parse_csv(self._parse_conjunction) 4337 this = self.validate_expression(exp.If.from_arg_list(args), args) 4338 self._match_r_paren() 4339 else: 4340 index = self._index - 1 4341 condition = self._parse_conjunction() 4342 4343 if not condition: 4344 self._retreat(index) 4345 return None 4346 4347 self._match(TokenType.THEN) 4348 true = self._parse_conjunction() 4349 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4350 self._match(TokenType.END) 4351 this = self.expression(exp.If, this=condition, true=true, false=false) 4352 4353 return self._parse_window(this) 4354 4355 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4356 if not self._match_text_seq("VALUE", "FOR"): 4357 self._retreat(self._index - 1) 4358 return None 4359 4360 return self.expression( 4361 exp.NextValueFor, 4362 this=self._parse_column(), 4363 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4364 ) 4365 4366 def _parse_extract(self) -> exp.Extract: 4367 this = self._parse_function() or self._parse_var() or self._parse_type() 4368 4369 if self._match(TokenType.FROM): 4370 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4371 4372 if not self._match(TokenType.COMMA): 4373 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4374 4375 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4376 4377 def _parse_any_value(self) -> exp.AnyValue: 4378 this = self._parse_lambda() 4379 is_max = None 4380 having = None 4381 4382 if self._match(TokenType.HAVING): 4383 self._match_texts(("MAX", "MIN")) 4384 is_max = self._prev.text == "MAX" 4385 having = self._parse_column() 4386 4387 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4388 4389 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4390 this = self._parse_conjunction() 4391 4392 if not self._match(TokenType.ALIAS): 4393 if self._match(TokenType.COMMA): 4394 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4395 4396 self.raise_error("Expected AS after CAST") 4397 4398 fmt = None 4399 to = self._parse_types() 4400 4401 if self._match(TokenType.FORMAT): 4402 fmt_string = self._parse_string() 4403 fmt = self._parse_at_time_zone(fmt_string) 4404 4405 if not to: 4406 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4407 if to.this in exp.DataType.TEMPORAL_TYPES: 4408 this = self.expression( 4409 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4410 this=this, 4411 format=exp.Literal.string( 4412 format_time( 4413 fmt_string.this if fmt_string else "", 4414 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4415 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4416 ) 4417 ), 4418 ) 4419 4420 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4421 this.set("zone", fmt.args["zone"]) 4422 return this 4423 elif not to: 4424 self.raise_error("Expected TYPE after CAST") 4425 elif isinstance(to, exp.Identifier): 4426 to = exp.DataType.build(to.name, udt=True) 4427 elif to.this == exp.DataType.Type.CHAR: 4428 if self._match(TokenType.CHARACTER_SET): 4429 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4430 4431 return self.expression( 4432 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4433 ) 4434 4435 def _parse_string_agg(self) -> exp.Expression: 4436 if self._match(TokenType.DISTINCT): 4437 args: t.List[t.Optional[exp.Expression]] = [ 4438 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4439 ] 4440 if self._match(TokenType.COMMA): 4441 args.extend(self._parse_csv(self._parse_conjunction)) 4442 else: 4443 args = self._parse_csv(self._parse_conjunction) # type: ignore 4444 4445 index = self._index 4446 if not self._match(TokenType.R_PAREN) and args: 4447 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4448 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4449 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4450 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4451 4452 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4453 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4454 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4455 if not self._match_text_seq("WITHIN", "GROUP"): 4456 self._retreat(index) 4457 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4458 4459 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4460 order = self._parse_order(this=seq_get(args, 0)) 4461 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4462 4463 def _parse_convert( 4464 self, strict: bool, safe: t.Optional[bool] = None 4465 ) -> t.Optional[exp.Expression]: 4466 this = self._parse_bitwise() 4467 4468 if self._match(TokenType.USING): 4469 to: t.Optional[exp.Expression] = self.expression( 4470 exp.CharacterSet, this=self._parse_var() 4471 ) 4472 elif self._match(TokenType.COMMA): 4473 to = self._parse_types() 4474 else: 4475 to = None 4476 4477 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4478 4479 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4480 """ 4481 There are generally two variants of the DECODE function: 4482 4483 - DECODE(bin, charset) 4484 - DECODE(expression, search, result [, search, result] ... [, default]) 4485 4486 The second variant will always be parsed into a CASE expression. Note that NULL 4487 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4488 instead of relying on pattern matching. 4489 """ 4490 args = self._parse_csv(self._parse_conjunction) 4491 4492 if len(args) < 3: 4493 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4494 4495 expression, *expressions = args 4496 if not expression: 4497 return None 4498 4499 ifs = [] 4500 for search, result in zip(expressions[::2], expressions[1::2]): 4501 if not search or not result: 4502 return None 4503 4504 if isinstance(search, exp.Literal): 4505 ifs.append( 4506 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4507 ) 4508 elif isinstance(search, exp.Null): 4509 ifs.append( 4510 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4511 ) 4512 else: 4513 cond = exp.or_( 4514 exp.EQ(this=expression.copy(), expression=search), 4515 exp.and_( 4516 exp.Is(this=expression.copy(), expression=exp.Null()), 4517 exp.Is(this=search.copy(), expression=exp.Null()), 4518 copy=False, 4519 ), 4520 copy=False, 4521 ) 4522 ifs.append(exp.If(this=cond, true=result)) 4523 4524 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4525 4526 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4527 self._match_text_seq("KEY") 4528 key = self._parse_column() 4529 self._match_set((TokenType.COLON, TokenType.COMMA)) 4530 self._match_text_seq("VALUE") 4531 value = self._parse_bitwise() 4532 4533 if not key and not value: 4534 return None 4535 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4536 4537 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4538 if not this or not self._match_text_seq("FORMAT", "JSON"): 4539 return this 4540 4541 return self.expression(exp.FormatJson, this=this) 4542 4543 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4544 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4545 for value in values: 4546 if self._match_text_seq(value, "ON", on): 4547 return f"{value} ON {on}" 4548 4549 return None 4550 4551 def _parse_json_object(self) -> exp.JSONObject: 4552 star = self._parse_star() 4553 expressions = ( 4554 [star] 4555 if star 4556 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4557 ) 4558 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4559 4560 unique_keys = None 4561 if self._match_text_seq("WITH", "UNIQUE"): 4562 unique_keys = True 4563 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4564 unique_keys = False 4565 4566 self._match_text_seq("KEYS") 4567 4568 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4569 self._parse_type() 4570 ) 4571 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4572 4573 return self.expression( 4574 exp.JSONObject, 4575 expressions=expressions, 4576 null_handling=null_handling, 4577 unique_keys=unique_keys, 4578 return_type=return_type, 4579 encoding=encoding, 4580 ) 4581 4582 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4583 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4584 if not self._match_text_seq("NESTED"): 4585 this = self._parse_id_var() 4586 kind = self._parse_types(allow_identifiers=False) 4587 nested = None 4588 else: 4589 this = None 4590 kind = None 4591 nested = True 4592 4593 path = self._match_text_seq("PATH") and self._parse_string() 4594 nested_schema = nested and self._parse_json_schema() 4595 4596 return self.expression( 4597 exp.JSONColumnDef, 4598 this=this, 4599 kind=kind, 4600 path=path, 4601 nested_schema=nested_schema, 4602 ) 4603 4604 def _parse_json_schema(self) -> exp.JSONSchema: 4605 self._match_text_seq("COLUMNS") 4606 return self.expression( 4607 exp.JSONSchema, 4608 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4609 ) 4610 4611 def _parse_json_table(self) -> exp.JSONTable: 4612 this = self._parse_format_json(self._parse_bitwise()) 4613 path = self._match(TokenType.COMMA) and self._parse_string() 4614 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4615 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4616 schema = self._parse_json_schema() 4617 4618 return exp.JSONTable( 4619 this=this, 4620 schema=schema, 4621 path=path, 4622 error_handling=error_handling, 4623 empty_handling=empty_handling, 4624 ) 4625 4626 def _parse_match_against(self) -> exp.MatchAgainst: 4627 expressions = self._parse_csv(self._parse_column) 4628 4629 self._match_text_seq(")", "AGAINST", "(") 4630 4631 this = self._parse_string() 4632 4633 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4634 modifier = "IN NATURAL LANGUAGE MODE" 4635 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4636 modifier = f"{modifier} WITH QUERY EXPANSION" 4637 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4638 modifier = "IN BOOLEAN MODE" 4639 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4640 modifier = "WITH QUERY EXPANSION" 4641 else: 4642 modifier = None 4643 4644 return self.expression( 4645 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4646 ) 4647 4648 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4649 def _parse_open_json(self) -> exp.OpenJSON: 4650 this = self._parse_bitwise() 4651 path = self._match(TokenType.COMMA) and self._parse_string() 4652 4653 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4654 this = self._parse_field(any_token=True) 4655 kind = self._parse_types() 4656 path = self._parse_string() 4657 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4658 4659 return self.expression( 4660 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4661 ) 4662 4663 expressions = None 4664 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4665 self._match_l_paren() 4666 expressions = self._parse_csv(_parse_open_json_column_def) 4667 4668 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4669 4670 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4671 args = self._parse_csv(self._parse_bitwise) 4672 4673 if self._match(TokenType.IN): 4674 return self.expression( 4675 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4676 ) 4677 4678 if haystack_first: 4679 haystack = seq_get(args, 0) 4680 needle = seq_get(args, 1) 4681 else: 4682 needle = seq_get(args, 0) 4683 haystack = seq_get(args, 1) 4684 4685 return self.expression( 4686 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4687 ) 4688 4689 def _parse_predict(self) -> exp.Predict: 4690 self._match_text_seq("MODEL") 4691 this = self._parse_table() 4692 4693 self._match(TokenType.COMMA) 4694 self._match_text_seq("TABLE") 4695 4696 return self.expression( 4697 exp.Predict, 4698 this=this, 4699 expression=self._parse_table(), 4700 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4701 ) 4702 4703 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4704 args = self._parse_csv(self._parse_table) 4705 return exp.JoinHint(this=func_name.upper(), expressions=args) 4706 4707 def _parse_substring(self) -> exp.Substring: 4708 # Postgres supports the form: substring(string [from int] [for int]) 4709 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4710 4711 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4712 4713 if self._match(TokenType.FROM): 4714 args.append(self._parse_bitwise()) 4715 if self._match(TokenType.FOR): 4716 args.append(self._parse_bitwise()) 4717 4718 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4719 4720 def _parse_trim(self) -> exp.Trim: 4721 # https://www.w3resource.com/sql/character-functions/trim.php 4722 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4723 4724 position = None 4725 collation = None 4726 expression = None 4727 4728 if self._match_texts(self.TRIM_TYPES): 4729 position = self._prev.text.upper() 4730 4731 this = self._parse_bitwise() 4732 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4733 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4734 expression = self._parse_bitwise() 4735 4736 if invert_order: 4737 this, expression = expression, this 4738 4739 if self._match(TokenType.COLLATE): 4740 collation = self._parse_bitwise() 4741 4742 return self.expression( 4743 exp.Trim, this=this, position=position, expression=expression, collation=collation 4744 ) 4745 4746 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4747 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4748 4749 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4750 return self._parse_window(self._parse_id_var(), alias=True) 4751 4752 def _parse_respect_or_ignore_nulls( 4753 self, this: t.Optional[exp.Expression] 4754 ) -> t.Optional[exp.Expression]: 4755 if self._match_text_seq("IGNORE", "NULLS"): 4756 return self.expression(exp.IgnoreNulls, this=this) 4757 if self._match_text_seq("RESPECT", "NULLS"): 4758 return self.expression(exp.RespectNulls, this=this) 4759 return this 4760 4761 def _parse_window( 4762 self, this: t.Optional[exp.Expression], alias: bool = False 4763 ) -> t.Optional[exp.Expression]: 4764 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4765 self._match(TokenType.WHERE) 4766 this = self.expression( 4767 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4768 ) 4769 self._match_r_paren() 4770 4771 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4772 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4773 if self._match_text_seq("WITHIN", "GROUP"): 4774 order = self._parse_wrapped(self._parse_order) 4775 this = self.expression(exp.WithinGroup, this=this, expression=order) 4776 4777 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4778 # Some dialects choose to implement and some do not. 4779 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4780 4781 # There is some code above in _parse_lambda that handles 4782 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4783 4784 # The below changes handle 4785 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4786 4787 # Oracle allows both formats 4788 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4789 # and Snowflake chose to do the same for familiarity 4790 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4791 this = self._parse_respect_or_ignore_nulls(this) 4792 4793 # bigquery select from window x AS (partition by ...) 4794 if alias: 4795 over = None 4796 self._match(TokenType.ALIAS) 4797 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4798 return this 4799 else: 4800 over = self._prev.text.upper() 4801 4802 if not self._match(TokenType.L_PAREN): 4803 return self.expression( 4804 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4805 ) 4806 4807 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4808 4809 first = self._match(TokenType.FIRST) 4810 if self._match_text_seq("LAST"): 4811 first = False 4812 4813 partition, order = self._parse_partition_and_order() 4814 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4815 4816 if kind: 4817 self._match(TokenType.BETWEEN) 4818 start = self._parse_window_spec() 4819 self._match(TokenType.AND) 4820 end = self._parse_window_spec() 4821 4822 spec = self.expression( 4823 exp.WindowSpec, 4824 kind=kind, 4825 start=start["value"], 4826 start_side=start["side"], 4827 end=end["value"], 4828 end_side=end["side"], 4829 ) 4830 else: 4831 spec = None 4832 4833 self._match_r_paren() 4834 4835 window = self.expression( 4836 exp.Window, 4837 this=this, 4838 partition_by=partition, 4839 order=order, 4840 spec=spec, 4841 alias=window_alias, 4842 over=over, 4843 first=first, 4844 ) 4845 4846 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4847 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4848 return self._parse_window(window, alias=alias) 4849 4850 return window 4851 4852 def _parse_partition_and_order( 4853 self, 4854 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4855 return self._parse_partition_by(), self._parse_order() 4856 4857 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4858 self._match(TokenType.BETWEEN) 4859 4860 return { 4861 "value": ( 4862 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4863 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4864 or self._parse_bitwise() 4865 ), 4866 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4867 } 4868 4869 def _parse_alias( 4870 self, this: t.Optional[exp.Expression], explicit: bool = False 4871 ) -> t.Optional[exp.Expression]: 4872 any_token = self._match(TokenType.ALIAS) 4873 comments = self._prev_comments 4874 4875 if explicit and not any_token: 4876 return this 4877 4878 if self._match(TokenType.L_PAREN): 4879 aliases = self.expression( 4880 exp.Aliases, 4881 comments=comments, 4882 this=this, 4883 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4884 ) 4885 self._match_r_paren(aliases) 4886 return aliases 4887 4888 alias = self._parse_id_var(any_token) 4889 4890 if alias: 4891 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4892 4893 return this 4894 4895 def _parse_id_var( 4896 self, 4897 any_token: bool = True, 4898 tokens: t.Optional[t.Collection[TokenType]] = None, 4899 ) -> t.Optional[exp.Expression]: 4900 identifier = self._parse_identifier() 4901 4902 if identifier: 4903 return identifier 4904 4905 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4906 quoted = self._prev.token_type == TokenType.STRING 4907 return exp.Identifier(this=self._prev.text, quoted=quoted) 4908 4909 return None 4910 4911 def _parse_string(self) -> t.Optional[exp.Expression]: 4912 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4913 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4914 return self._parse_placeholder() 4915 4916 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4917 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4918 4919 def _parse_number(self) -> t.Optional[exp.Expression]: 4920 if self._match(TokenType.NUMBER): 4921 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4922 return self._parse_placeholder() 4923 4924 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4925 if self._match(TokenType.IDENTIFIER): 4926 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4927 return self._parse_placeholder() 4928 4929 def _parse_var( 4930 self, 4931 any_token: bool = False, 4932 tokens: t.Optional[t.Collection[TokenType]] = None, 4933 upper: bool = False, 4934 ) -> t.Optional[exp.Expression]: 4935 if ( 4936 (any_token and self._advance_any()) 4937 or self._match(TokenType.VAR) 4938 or (self._match_set(tokens) if tokens else False) 4939 ): 4940 return self.expression( 4941 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 4942 ) 4943 return self._parse_placeholder() 4944 4945 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 4946 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 4947 self._advance() 4948 return self._prev 4949 return None 4950 4951 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4952 return self._parse_var() or self._parse_string() 4953 4954 def _parse_null(self) -> t.Optional[exp.Expression]: 4955 if self._match_set(self.NULL_TOKENS): 4956 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4957 return self._parse_placeholder() 4958 4959 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4960 if self._match(TokenType.TRUE): 4961 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4962 if self._match(TokenType.FALSE): 4963 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4964 return self._parse_placeholder() 4965 4966 def _parse_star(self) -> t.Optional[exp.Expression]: 4967 if self._match(TokenType.STAR): 4968 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4969 return self._parse_placeholder() 4970 4971 def _parse_parameter(self) -> exp.Parameter: 4972 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4973 return ( 4974 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4975 ) 4976 4977 self._match(TokenType.L_BRACE) 4978 this = _parse_parameter_part() 4979 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4980 self._match(TokenType.R_BRACE) 4981 4982 return self.expression(exp.Parameter, this=this, expression=expression) 4983 4984 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4985 if self._match_set(self.PLACEHOLDER_PARSERS): 4986 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4987 if placeholder: 4988 return placeholder 4989 self._advance(-1) 4990 return None 4991 4992 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4993 if not self._match(TokenType.EXCEPT): 4994 return None 4995 if self._match(TokenType.L_PAREN, advance=False): 4996 return self._parse_wrapped_csv(self._parse_column) 4997 4998 except_column = self._parse_column() 4999 return [except_column] if except_column else None 5000 5001 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5002 if not self._match(TokenType.REPLACE): 5003 return None 5004 if self._match(TokenType.L_PAREN, advance=False): 5005 return self._parse_wrapped_csv(self._parse_expression) 5006 5007 replace_expression = self._parse_expression() 5008 return [replace_expression] if replace_expression else None 5009 5010 def _parse_csv( 5011 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5012 ) -> t.List[exp.Expression]: 5013 parse_result = parse_method() 5014 items = [parse_result] if parse_result is not None else [] 5015 5016 while self._match(sep): 5017 self._add_comments(parse_result) 5018 parse_result = parse_method() 5019 if parse_result is not None: 5020 items.append(parse_result) 5021 5022 return items 5023 5024 def _parse_tokens( 5025 self, parse_method: t.Callable, expressions: t.Dict 5026 ) -> t.Optional[exp.Expression]: 5027 this = parse_method() 5028 5029 while self._match_set(expressions): 5030 this = self.expression( 5031 expressions[self._prev.token_type], 5032 this=this, 5033 comments=self._prev_comments, 5034 expression=parse_method(), 5035 ) 5036 5037 return this 5038 5039 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5040 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5041 5042 def _parse_wrapped_csv( 5043 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5044 ) -> t.List[exp.Expression]: 5045 return self._parse_wrapped( 5046 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5047 ) 5048 5049 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5050 wrapped = self._match(TokenType.L_PAREN) 5051 if not wrapped and not optional: 5052 self.raise_error("Expecting (") 5053 parse_result = parse_method() 5054 if wrapped: 5055 self._match_r_paren() 5056 return parse_result 5057 5058 def _parse_expressions(self) -> t.List[exp.Expression]: 5059 return self._parse_csv(self._parse_expression) 5060 5061 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5062 return self._parse_select() or self._parse_set_operations( 5063 self._parse_expression() if alias else self._parse_conjunction() 5064 ) 5065 5066 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5067 return self._parse_query_modifiers( 5068 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5069 ) 5070 5071 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5072 this = None 5073 if self._match_texts(self.TRANSACTION_KIND): 5074 this = self._prev.text 5075 5076 self._match_texts(("TRANSACTION", "WORK")) 5077 5078 modes = [] 5079 while True: 5080 mode = [] 5081 while self._match(TokenType.VAR): 5082 mode.append(self._prev.text) 5083 5084 if mode: 5085 modes.append(" ".join(mode)) 5086 if not self._match(TokenType.COMMA): 5087 break 5088 5089 return self.expression(exp.Transaction, this=this, modes=modes) 5090 5091 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5092 chain = None 5093 savepoint = None 5094 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5095 5096 self._match_texts(("TRANSACTION", "WORK")) 5097 5098 if self._match_text_seq("TO"): 5099 self._match_text_seq("SAVEPOINT") 5100 savepoint = self._parse_id_var() 5101 5102 if self._match(TokenType.AND): 5103 chain = not self._match_text_seq("NO") 5104 self._match_text_seq("CHAIN") 5105 5106 if is_rollback: 5107 return self.expression(exp.Rollback, savepoint=savepoint) 5108 5109 return self.expression(exp.Commit, chain=chain) 5110 5111 def _parse_refresh(self) -> exp.Refresh: 5112 self._match(TokenType.TABLE) 5113 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5114 5115 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5116 if not self._match_text_seq("ADD"): 5117 return None 5118 5119 self._match(TokenType.COLUMN) 5120 exists_column = self._parse_exists(not_=True) 5121 expression = self._parse_field_def() 5122 5123 if expression: 5124 expression.set("exists", exists_column) 5125 5126 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5127 if self._match_texts(("FIRST", "AFTER")): 5128 position = self._prev.text 5129 column_position = self.expression( 5130 exp.ColumnPosition, this=self._parse_column(), position=position 5131 ) 5132 expression.set("position", column_position) 5133 5134 return expression 5135 5136 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5137 drop = self._match(TokenType.DROP) and self._parse_drop() 5138 if drop and not isinstance(drop, exp.Command): 5139 drop.set("kind", drop.args.get("kind", "COLUMN")) 5140 return drop 5141 5142 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5143 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5144 return self.expression( 5145 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5146 ) 5147 5148 def _parse_add_constraint(self) -> exp.AddConstraint: 5149 this = None 5150 kind = self._prev.token_type 5151 5152 if kind == TokenType.CONSTRAINT: 5153 this = self._parse_id_var() 5154 5155 if self._match_text_seq("CHECK"): 5156 expression = self._parse_wrapped(self._parse_conjunction) 5157 enforced = self._match_text_seq("ENFORCED") 5158 5159 return self.expression( 5160 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5161 ) 5162 5163 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5164 expression = self._parse_foreign_key() 5165 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5166 expression = self._parse_primary_key() 5167 else: 5168 expression = None 5169 5170 return self.expression(exp.AddConstraint, this=this, expression=expression) 5171 5172 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5173 index = self._index - 1 5174 5175 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5176 return self._parse_csv(self._parse_add_constraint) 5177 5178 self._retreat(index) 5179 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5180 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5181 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5182 5183 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5184 self._match(TokenType.COLUMN) 5185 column = self._parse_field(any_token=True) 5186 5187 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5188 return self.expression(exp.AlterColumn, this=column, drop=True) 5189 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5190 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5191 5192 self._match_text_seq("SET", "DATA") 5193 return self.expression( 5194 exp.AlterColumn, 5195 this=column, 5196 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5197 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5198 using=self._match(TokenType.USING) and self._parse_conjunction(), 5199 ) 5200 5201 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5202 index = self._index - 1 5203 5204 partition_exists = self._parse_exists() 5205 if self._match(TokenType.PARTITION, advance=False): 5206 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5207 5208 self._retreat(index) 5209 return self._parse_csv(self._parse_drop_column) 5210 5211 def _parse_alter_table_rename(self) -> exp.RenameTable: 5212 self._match_text_seq("TO") 5213 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5214 5215 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5216 start = self._prev 5217 5218 if not self._match(TokenType.TABLE): 5219 return self._parse_as_command(start) 5220 5221 exists = self._parse_exists() 5222 only = self._match_text_seq("ONLY") 5223 this = self._parse_table(schema=True) 5224 5225 if self._next: 5226 self._advance() 5227 5228 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5229 if parser: 5230 actions = ensure_list(parser(self)) 5231 5232 if not self._curr: 5233 return self.expression( 5234 exp.AlterTable, 5235 this=this, 5236 exists=exists, 5237 actions=actions, 5238 only=only, 5239 ) 5240 5241 return self._parse_as_command(start) 5242 5243 def _parse_merge(self) -> exp.Merge: 5244 self._match(TokenType.INTO) 5245 target = self._parse_table() 5246 5247 if target and self._match(TokenType.ALIAS, advance=False): 5248 target.set("alias", self._parse_table_alias()) 5249 5250 self._match(TokenType.USING) 5251 using = self._parse_table() 5252 5253 self._match(TokenType.ON) 5254 on = self._parse_conjunction() 5255 5256 return self.expression( 5257 exp.Merge, 5258 this=target, 5259 using=using, 5260 on=on, 5261 expressions=self._parse_when_matched(), 5262 ) 5263 5264 def _parse_when_matched(self) -> t.List[exp.When]: 5265 whens = [] 5266 5267 while self._match(TokenType.WHEN): 5268 matched = not self._match(TokenType.NOT) 5269 self._match_text_seq("MATCHED") 5270 source = ( 5271 False 5272 if self._match_text_seq("BY", "TARGET") 5273 else self._match_text_seq("BY", "SOURCE") 5274 ) 5275 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5276 5277 self._match(TokenType.THEN) 5278 5279 if self._match(TokenType.INSERT): 5280 _this = self._parse_star() 5281 if _this: 5282 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5283 else: 5284 then = self.expression( 5285 exp.Insert, 5286 this=self._parse_value(), 5287 expression=self._match(TokenType.VALUES) and self._parse_value(), 5288 ) 5289 elif self._match(TokenType.UPDATE): 5290 expressions = self._parse_star() 5291 if expressions: 5292 then = self.expression(exp.Update, expressions=expressions) 5293 else: 5294 then = self.expression( 5295 exp.Update, 5296 expressions=self._match(TokenType.SET) 5297 and self._parse_csv(self._parse_equality), 5298 ) 5299 elif self._match(TokenType.DELETE): 5300 then = self.expression(exp.Var, this=self._prev.text) 5301 else: 5302 then = None 5303 5304 whens.append( 5305 self.expression( 5306 exp.When, 5307 matched=matched, 5308 source=source, 5309 condition=condition, 5310 then=then, 5311 ) 5312 ) 5313 return whens 5314 5315 def _parse_show(self) -> t.Optional[exp.Expression]: 5316 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5317 if parser: 5318 return parser(self) 5319 return self._parse_as_command(self._prev) 5320 5321 def _parse_set_item_assignment( 5322 self, kind: t.Optional[str] = None 5323 ) -> t.Optional[exp.Expression]: 5324 index = self._index 5325 5326 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5327 return self._parse_set_transaction(global_=kind == "GLOBAL") 5328 5329 left = self._parse_primary() or self._parse_id_var() 5330 assignment_delimiter = self._match_texts(("=", "TO")) 5331 5332 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5333 self._retreat(index) 5334 return None 5335 5336 right = self._parse_statement() or self._parse_id_var() 5337 this = self.expression(exp.EQ, this=left, expression=right) 5338 5339 return self.expression(exp.SetItem, this=this, kind=kind) 5340 5341 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5342 self._match_text_seq("TRANSACTION") 5343 characteristics = self._parse_csv( 5344 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5345 ) 5346 return self.expression( 5347 exp.SetItem, 5348 expressions=characteristics, 5349 kind="TRANSACTION", 5350 **{"global": global_}, # type: ignore 5351 ) 5352 5353 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5354 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5355 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5356 5357 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5358 index = self._index 5359 set_ = self.expression( 5360 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5361 ) 5362 5363 if self._curr: 5364 self._retreat(index) 5365 return self._parse_as_command(self._prev) 5366 5367 return set_ 5368 5369 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5370 for option in options: 5371 if self._match_text_seq(*option.split(" ")): 5372 return exp.var(option) 5373 return None 5374 5375 def _parse_as_command(self, start: Token) -> exp.Command: 5376 while self._curr: 5377 self._advance() 5378 text = self._find_sql(start, self._prev) 5379 size = len(start.text) 5380 return exp.Command(this=text[:size], expression=text[size:]) 5381 5382 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5383 settings = [] 5384 5385 self._match_l_paren() 5386 kind = self._parse_id_var() 5387 5388 if self._match(TokenType.L_PAREN): 5389 while True: 5390 key = self._parse_id_var() 5391 value = self._parse_primary() 5392 5393 if not key and value is None: 5394 break 5395 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5396 self._match(TokenType.R_PAREN) 5397 5398 self._match_r_paren() 5399 5400 return self.expression( 5401 exp.DictProperty, 5402 this=this, 5403 kind=kind.this if kind else None, 5404 settings=settings, 5405 ) 5406 5407 def _parse_dict_range(self, this: str) -> exp.DictRange: 5408 self._match_l_paren() 5409 has_min = self._match_text_seq("MIN") 5410 if has_min: 5411 min = self._parse_var() or self._parse_primary() 5412 self._match_text_seq("MAX") 5413 max = self._parse_var() or self._parse_primary() 5414 else: 5415 max = self._parse_var() or self._parse_primary() 5416 min = exp.Literal.number(0) 5417 self._match_r_paren() 5418 return self.expression(exp.DictRange, this=this, min=min, max=max) 5419 5420 def _parse_comprehension( 5421 self, this: t.Optional[exp.Expression] 5422 ) -> t.Optional[exp.Comprehension]: 5423 index = self._index 5424 expression = self._parse_column() 5425 if not self._match(TokenType.IN): 5426 self._retreat(index - 1) 5427 return None 5428 iterator = self._parse_column() 5429 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5430 return self.expression( 5431 exp.Comprehension, 5432 this=this, 5433 expression=expression, 5434 iterator=iterator, 5435 condition=condition, 5436 ) 5437 5438 def _find_parser( 5439 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5440 ) -> t.Optional[t.Callable]: 5441 if not self._curr: 5442 return None 5443 5444 index = self._index 5445 this = [] 5446 while True: 5447 # The current token might be multiple words 5448 curr = self._curr.text.upper() 5449 key = curr.split(" ") 5450 this.append(curr) 5451 5452 self._advance() 5453 result, trie = in_trie(trie, key) 5454 if result == TrieResult.FAILED: 5455 break 5456 5457 if result == TrieResult.EXISTS: 5458 subparser = parsers[" ".join(this)] 5459 return subparser 5460 5461 self._retreat(index) 5462 return None 5463 5464 def _match(self, token_type, advance=True, expression=None): 5465 if not self._curr: 5466 return None 5467 5468 if self._curr.token_type == token_type: 5469 if advance: 5470 self._advance() 5471 self._add_comments(expression) 5472 return True 5473 5474 return None 5475 5476 def _match_set(self, types, advance=True): 5477 if not self._curr: 5478 return None 5479 5480 if self._curr.token_type in types: 5481 if advance: 5482 self._advance() 5483 return True 5484 5485 return None 5486 5487 def _match_pair(self, token_type_a, token_type_b, advance=True): 5488 if not self._curr or not self._next: 5489 return None 5490 5491 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5492 if advance: 5493 self._advance(2) 5494 return True 5495 5496 return None 5497 5498 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5499 if not self._match(TokenType.L_PAREN, expression=expression): 5500 self.raise_error("Expecting (") 5501 5502 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5503 if not self._match(TokenType.R_PAREN, expression=expression): 5504 self.raise_error("Expecting )") 5505 5506 def _match_texts(self, texts, advance=True): 5507 if self._curr and self._curr.text.upper() in texts: 5508 if advance: 5509 self._advance() 5510 return True 5511 return False 5512 5513 def _match_text_seq(self, *texts, advance=True): 5514 index = self._index 5515 for text in texts: 5516 if self._curr and self._curr.text.upper() == text: 5517 self._advance() 5518 else: 5519 self._retreat(index) 5520 return False 5521 5522 if not advance: 5523 self._retreat(index) 5524 5525 return True 5526 5527 @t.overload 5528 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5529 ... 5530 5531 @t.overload 5532 def _replace_columns_with_dots( 5533 self, this: t.Optional[exp.Expression] 5534 ) -> t.Optional[exp.Expression]: 5535 ... 5536 5537 def _replace_columns_with_dots(self, this): 5538 if isinstance(this, exp.Dot): 5539 exp.replace_children(this, self._replace_columns_with_dots) 5540 elif isinstance(this, exp.Column): 5541 exp.replace_children(this, self._replace_columns_with_dots) 5542 table = this.args.get("table") 5543 this = ( 5544 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5545 ) 5546 5547 return this 5548 5549 def _replace_lambda( 5550 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5551 ) -> t.Optional[exp.Expression]: 5552 if not node: 5553 return node 5554 5555 for column in node.find_all(exp.Column): 5556 if column.parts[0].name in lambda_variables: 5557 dot_or_id = column.to_dot() if column.table else column.this 5558 parent = column.parent 5559 5560 while isinstance(parent, exp.Dot): 5561 if not isinstance(parent.parent, exp.Dot): 5562 parent.replace(dot_or_id) 5563 break 5564 parent = parent.parent 5565 else: 5566 if column is node: 5567 node = dot_or_id 5568 else: 5569 column.replace(dot_or_id) 5570 return node
22def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap( 33 keys=exp.Array(expressions=keys), 34 values=exp.Array(expressions=values), 35 )
51def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
74class Parser(metaclass=_Parser): 75 """ 76 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 77 78 Args: 79 error_level: The desired error level. 80 Default: ErrorLevel.IMMEDIATE 81 error_message_context: Determines the amount of context to capture from a 82 query string when displaying the error message (in number of characters). 83 Default: 100 84 max_errors: Maximum number of error messages to include in a raised ParseError. 85 This is only relevant if error_level is ErrorLevel.RAISE. 86 Default: 3 87 """ 88 89 FUNCTIONS: t.Dict[str, t.Callable] = { 90 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 91 "CONCAT": lambda args, dialect: exp.Concat( 92 expressions=args, 93 safe=not dialect.STRICT_STRING_CONCAT, 94 coalesce=dialect.CONCAT_COALESCE, 95 ), 96 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 97 expressions=args, 98 safe=not dialect.STRICT_STRING_CONCAT, 99 coalesce=dialect.CONCAT_COALESCE, 100 ), 101 "DATE_TO_DATE_STR": lambda args: exp.Cast( 102 this=seq_get(args, 0), 103 to=exp.DataType(this=exp.DataType.Type.TEXT), 104 ), 105 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 106 "LIKE": parse_like, 107 "LOG": parse_logarithm, 108 "TIME_TO_TIME_STR": lambda args: exp.Cast( 109 this=seq_get(args, 0), 110 to=exp.DataType(this=exp.DataType.Type.TEXT), 111 ), 112 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 113 this=exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 start=exp.Literal.number(1), 118 length=exp.Literal.number(10), 119 ), 120 "VAR_MAP": parse_var_map, 121 } 122 123 NO_PAREN_FUNCTIONS = { 124 TokenType.CURRENT_DATE: exp.CurrentDate, 125 TokenType.CURRENT_DATETIME: exp.CurrentDate, 126 TokenType.CURRENT_TIME: exp.CurrentTime, 127 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 128 TokenType.CURRENT_USER: exp.CurrentUser, 129 } 130 131 STRUCT_TYPE_TOKENS = { 132 TokenType.NESTED, 133 TokenType.STRUCT, 134 } 135 136 NESTED_TYPE_TOKENS = { 137 TokenType.ARRAY, 138 TokenType.LOWCARDINALITY, 139 TokenType.MAP, 140 TokenType.NULLABLE, 141 *STRUCT_TYPE_TOKENS, 142 } 143 144 ENUM_TYPE_TOKENS = { 145 TokenType.ENUM, 146 TokenType.ENUM8, 147 TokenType.ENUM16, 148 } 149 150 TYPE_TOKENS = { 151 TokenType.BIT, 152 TokenType.BOOLEAN, 153 TokenType.TINYINT, 154 TokenType.UTINYINT, 155 TokenType.SMALLINT, 156 TokenType.USMALLINT, 157 TokenType.INT, 158 TokenType.UINT, 159 TokenType.BIGINT, 160 TokenType.UBIGINT, 161 TokenType.INT128, 162 TokenType.UINT128, 163 TokenType.INT256, 164 TokenType.UINT256, 165 TokenType.MEDIUMINT, 166 TokenType.UMEDIUMINT, 167 TokenType.FIXEDSTRING, 168 TokenType.FLOAT, 169 TokenType.DOUBLE, 170 TokenType.CHAR, 171 TokenType.NCHAR, 172 TokenType.VARCHAR, 173 TokenType.NVARCHAR, 174 TokenType.TEXT, 175 TokenType.MEDIUMTEXT, 176 TokenType.LONGTEXT, 177 TokenType.MEDIUMBLOB, 178 TokenType.LONGBLOB, 179 TokenType.BINARY, 180 TokenType.VARBINARY, 181 TokenType.JSON, 182 TokenType.JSONB, 183 TokenType.INTERVAL, 184 TokenType.TINYBLOB, 185 TokenType.TINYTEXT, 186 TokenType.TIME, 187 TokenType.TIMETZ, 188 TokenType.TIMESTAMP, 189 TokenType.TIMESTAMP_S, 190 TokenType.TIMESTAMP_MS, 191 TokenType.TIMESTAMP_NS, 192 TokenType.TIMESTAMPTZ, 193 TokenType.TIMESTAMPLTZ, 194 TokenType.DATETIME, 195 TokenType.DATETIME64, 196 TokenType.DATE, 197 TokenType.INT4RANGE, 198 TokenType.INT4MULTIRANGE, 199 TokenType.INT8RANGE, 200 TokenType.INT8MULTIRANGE, 201 TokenType.NUMRANGE, 202 TokenType.NUMMULTIRANGE, 203 TokenType.TSRANGE, 204 TokenType.TSMULTIRANGE, 205 TokenType.TSTZRANGE, 206 TokenType.TSTZMULTIRANGE, 207 TokenType.DATERANGE, 208 TokenType.DATEMULTIRANGE, 209 TokenType.DECIMAL, 210 TokenType.UDECIMAL, 211 TokenType.BIGDECIMAL, 212 TokenType.UUID, 213 TokenType.GEOGRAPHY, 214 TokenType.GEOMETRY, 215 TokenType.HLLSKETCH, 216 TokenType.HSTORE, 217 TokenType.PSEUDO_TYPE, 218 TokenType.SUPER, 219 TokenType.SERIAL, 220 TokenType.SMALLSERIAL, 221 TokenType.BIGSERIAL, 222 TokenType.XML, 223 TokenType.YEAR, 224 TokenType.UNIQUEIDENTIFIER, 225 TokenType.USERDEFINED, 226 TokenType.MONEY, 227 TokenType.SMALLMONEY, 228 TokenType.ROWVERSION, 229 TokenType.IMAGE, 230 TokenType.VARIANT, 231 TokenType.OBJECT, 232 TokenType.OBJECT_IDENTIFIER, 233 TokenType.INET, 234 TokenType.IPADDRESS, 235 TokenType.IPPREFIX, 236 TokenType.UNKNOWN, 237 TokenType.NULL, 238 *ENUM_TYPE_TOKENS, 239 *NESTED_TYPE_TOKENS, 240 } 241 242 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 243 TokenType.BIGINT: TokenType.UBIGINT, 244 TokenType.INT: TokenType.UINT, 245 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 246 TokenType.SMALLINT: TokenType.USMALLINT, 247 TokenType.TINYINT: TokenType.UTINYINT, 248 TokenType.DECIMAL: TokenType.UDECIMAL, 249 } 250 251 SUBQUERY_PREDICATES = { 252 TokenType.ANY: exp.Any, 253 TokenType.ALL: exp.All, 254 TokenType.EXISTS: exp.Exists, 255 TokenType.SOME: exp.Any, 256 } 257 258 RESERVED_TOKENS = { 259 *Tokenizer.SINGLE_TOKENS.values(), 260 TokenType.SELECT, 261 } 262 263 DB_CREATABLES = { 264 TokenType.DATABASE, 265 TokenType.SCHEMA, 266 TokenType.TABLE, 267 TokenType.VIEW, 268 TokenType.MODEL, 269 TokenType.DICTIONARY, 270 } 271 272 CREATABLES = { 273 TokenType.COLUMN, 274 TokenType.CONSTRAINT, 275 TokenType.FUNCTION, 276 TokenType.INDEX, 277 TokenType.PROCEDURE, 278 TokenType.FOREIGN_KEY, 279 *DB_CREATABLES, 280 } 281 282 # Tokens that can represent identifiers 283 ID_VAR_TOKENS = { 284 TokenType.VAR, 285 TokenType.ANTI, 286 TokenType.APPLY, 287 TokenType.ASC, 288 TokenType.AUTO_INCREMENT, 289 TokenType.BEGIN, 290 TokenType.CACHE, 291 TokenType.CASE, 292 TokenType.COLLATE, 293 TokenType.COMMAND, 294 TokenType.COMMENT, 295 TokenType.COMMIT, 296 TokenType.CONSTRAINT, 297 TokenType.DEFAULT, 298 TokenType.DELETE, 299 TokenType.DESC, 300 TokenType.DESCRIBE, 301 TokenType.DICTIONARY, 302 TokenType.DIV, 303 TokenType.END, 304 TokenType.EXECUTE, 305 TokenType.ESCAPE, 306 TokenType.FALSE, 307 TokenType.FIRST, 308 TokenType.FILTER, 309 TokenType.FINAL, 310 TokenType.FORMAT, 311 TokenType.FULL, 312 TokenType.IS, 313 TokenType.ISNULL, 314 TokenType.INTERVAL, 315 TokenType.KEEP, 316 TokenType.KILL, 317 TokenType.LEFT, 318 TokenType.LOAD, 319 TokenType.MERGE, 320 TokenType.NATURAL, 321 TokenType.NEXT, 322 TokenType.OFFSET, 323 TokenType.OPERATOR, 324 TokenType.ORDINALITY, 325 TokenType.OVERLAPS, 326 TokenType.OVERWRITE, 327 TokenType.PARTITION, 328 TokenType.PERCENT, 329 TokenType.PIVOT, 330 TokenType.PRAGMA, 331 TokenType.RANGE, 332 TokenType.RECURSIVE, 333 TokenType.REFERENCES, 334 TokenType.REFRESH, 335 TokenType.REPLACE, 336 TokenType.RIGHT, 337 TokenType.ROW, 338 TokenType.ROWS, 339 TokenType.SEMI, 340 TokenType.SET, 341 TokenType.SETTINGS, 342 TokenType.SHOW, 343 TokenType.TEMPORARY, 344 TokenType.TOP, 345 TokenType.TRUE, 346 TokenType.UNIQUE, 347 TokenType.UNPIVOT, 348 TokenType.UPDATE, 349 TokenType.USE, 350 TokenType.VOLATILE, 351 TokenType.WINDOW, 352 *CREATABLES, 353 *SUBQUERY_PREDICATES, 354 *TYPE_TOKENS, 355 *NO_PAREN_FUNCTIONS, 356 } 357 358 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 359 360 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 361 TokenType.ANTI, 362 TokenType.APPLY, 363 TokenType.ASOF, 364 TokenType.FULL, 365 TokenType.LEFT, 366 TokenType.LOCK, 367 TokenType.NATURAL, 368 TokenType.OFFSET, 369 TokenType.RIGHT, 370 TokenType.SEMI, 371 TokenType.WINDOW, 372 } 373 374 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 375 376 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 377 378 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 379 380 FUNC_TOKENS = { 381 TokenType.COLLATE, 382 TokenType.COMMAND, 383 TokenType.CURRENT_DATE, 384 TokenType.CURRENT_DATETIME, 385 TokenType.CURRENT_TIMESTAMP, 386 TokenType.CURRENT_TIME, 387 TokenType.CURRENT_USER, 388 TokenType.FILTER, 389 TokenType.FIRST, 390 TokenType.FORMAT, 391 TokenType.GLOB, 392 TokenType.IDENTIFIER, 393 TokenType.INDEX, 394 TokenType.ISNULL, 395 TokenType.ILIKE, 396 TokenType.INSERT, 397 TokenType.LIKE, 398 TokenType.MERGE, 399 TokenType.OFFSET, 400 TokenType.PRIMARY_KEY, 401 TokenType.RANGE, 402 TokenType.REPLACE, 403 TokenType.RLIKE, 404 TokenType.ROW, 405 TokenType.UNNEST, 406 TokenType.VAR, 407 TokenType.LEFT, 408 TokenType.RIGHT, 409 TokenType.DATE, 410 TokenType.DATETIME, 411 TokenType.TABLE, 412 TokenType.TIMESTAMP, 413 TokenType.TIMESTAMPTZ, 414 TokenType.WINDOW, 415 TokenType.XOR, 416 *TYPE_TOKENS, 417 *SUBQUERY_PREDICATES, 418 } 419 420 CONJUNCTION = { 421 TokenType.AND: exp.And, 422 TokenType.OR: exp.Or, 423 } 424 425 EQUALITY = { 426 TokenType.COLON_EQ: exp.PropertyEQ, 427 TokenType.EQ: exp.EQ, 428 TokenType.NEQ: exp.NEQ, 429 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 430 } 431 432 COMPARISON = { 433 TokenType.GT: exp.GT, 434 TokenType.GTE: exp.GTE, 435 TokenType.LT: exp.LT, 436 TokenType.LTE: exp.LTE, 437 } 438 439 BITWISE = { 440 TokenType.AMP: exp.BitwiseAnd, 441 TokenType.CARET: exp.BitwiseXor, 442 TokenType.PIPE: exp.BitwiseOr, 443 } 444 445 TERM = { 446 TokenType.DASH: exp.Sub, 447 TokenType.PLUS: exp.Add, 448 TokenType.MOD: exp.Mod, 449 TokenType.COLLATE: exp.Collate, 450 } 451 452 FACTOR = { 453 TokenType.DIV: exp.IntDiv, 454 TokenType.LR_ARROW: exp.Distance, 455 TokenType.SLASH: exp.Div, 456 TokenType.STAR: exp.Mul, 457 } 458 459 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 460 461 TIMES = { 462 TokenType.TIME, 463 TokenType.TIMETZ, 464 } 465 466 TIMESTAMPS = { 467 TokenType.TIMESTAMP, 468 TokenType.TIMESTAMPTZ, 469 TokenType.TIMESTAMPLTZ, 470 *TIMES, 471 } 472 473 SET_OPERATIONS = { 474 TokenType.UNION, 475 TokenType.INTERSECT, 476 TokenType.EXCEPT, 477 } 478 479 JOIN_METHODS = { 480 TokenType.NATURAL, 481 TokenType.ASOF, 482 } 483 484 JOIN_SIDES = { 485 TokenType.LEFT, 486 TokenType.RIGHT, 487 TokenType.FULL, 488 } 489 490 JOIN_KINDS = { 491 TokenType.INNER, 492 TokenType.OUTER, 493 TokenType.CROSS, 494 TokenType.SEMI, 495 TokenType.ANTI, 496 } 497 498 JOIN_HINTS: t.Set[str] = set() 499 500 LAMBDAS = { 501 TokenType.ARROW: lambda self, expressions: self.expression( 502 exp.Lambda, 503 this=self._replace_lambda( 504 self._parse_conjunction(), 505 {node.name for node in expressions}, 506 ), 507 expressions=expressions, 508 ), 509 TokenType.FARROW: lambda self, expressions: self.expression( 510 exp.Kwarg, 511 this=exp.var(expressions[0].name), 512 expression=self._parse_conjunction(), 513 ), 514 } 515 516 COLUMN_OPERATORS = { 517 TokenType.DOT: None, 518 TokenType.DCOLON: lambda self, this, to: self.expression( 519 exp.Cast if self.STRICT_CAST else exp.TryCast, 520 this=this, 521 to=to, 522 ), 523 TokenType.ARROW: lambda self, this, path: self.expression( 524 exp.JSONExtract, 525 this=this, 526 expression=path, 527 ), 528 TokenType.DARROW: lambda self, this, path: self.expression( 529 exp.JSONExtractScalar, 530 this=this, 531 expression=path, 532 ), 533 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 534 exp.JSONBExtract, 535 this=this, 536 expression=path, 537 ), 538 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 539 exp.JSONBExtractScalar, 540 this=this, 541 expression=path, 542 ), 543 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 544 exp.JSONBContains, 545 this=this, 546 expression=key, 547 ), 548 } 549 550 EXPRESSION_PARSERS = { 551 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 552 exp.Column: lambda self: self._parse_column(), 553 exp.Condition: lambda self: self._parse_conjunction(), 554 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 555 exp.Expression: lambda self: self._parse_statement(), 556 exp.From: lambda self: self._parse_from(), 557 exp.Group: lambda self: self._parse_group(), 558 exp.Having: lambda self: self._parse_having(), 559 exp.Identifier: lambda self: self._parse_id_var(), 560 exp.Join: lambda self: self._parse_join(), 561 exp.Lambda: lambda self: self._parse_lambda(), 562 exp.Lateral: lambda self: self._parse_lateral(), 563 exp.Limit: lambda self: self._parse_limit(), 564 exp.Offset: lambda self: self._parse_offset(), 565 exp.Order: lambda self: self._parse_order(), 566 exp.Ordered: lambda self: self._parse_ordered(), 567 exp.Properties: lambda self: self._parse_properties(), 568 exp.Qualify: lambda self: self._parse_qualify(), 569 exp.Returning: lambda self: self._parse_returning(), 570 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 571 exp.Table: lambda self: self._parse_table_parts(), 572 exp.TableAlias: lambda self: self._parse_table_alias(), 573 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 574 exp.Where: lambda self: self._parse_where(), 575 exp.Window: lambda self: self._parse_named_window(), 576 exp.With: lambda self: self._parse_with(), 577 "JOIN_TYPE": lambda self: self._parse_join_parts(), 578 } 579 580 STATEMENT_PARSERS = { 581 TokenType.ALTER: lambda self: self._parse_alter(), 582 TokenType.BEGIN: lambda self: self._parse_transaction(), 583 TokenType.CACHE: lambda self: self._parse_cache(), 584 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 585 TokenType.COMMENT: lambda self: self._parse_comment(), 586 TokenType.CREATE: lambda self: self._parse_create(), 587 TokenType.DELETE: lambda self: self._parse_delete(), 588 TokenType.DESC: lambda self: self._parse_describe(), 589 TokenType.DESCRIBE: lambda self: self._parse_describe(), 590 TokenType.DROP: lambda self: self._parse_drop(), 591 TokenType.INSERT: lambda self: self._parse_insert(), 592 TokenType.KILL: lambda self: self._parse_kill(), 593 TokenType.LOAD: lambda self: self._parse_load(), 594 TokenType.MERGE: lambda self: self._parse_merge(), 595 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 596 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 597 TokenType.REFRESH: lambda self: self._parse_refresh(), 598 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 599 TokenType.SET: lambda self: self._parse_set(), 600 TokenType.UNCACHE: lambda self: self._parse_uncache(), 601 TokenType.UPDATE: lambda self: self._parse_update(), 602 TokenType.USE: lambda self: self.expression( 603 exp.Use, 604 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 605 and exp.var(self._prev.text), 606 this=self._parse_table(schema=False), 607 ), 608 } 609 610 UNARY_PARSERS = { 611 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 612 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 613 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 614 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 615 } 616 617 PRIMARY_PARSERS = { 618 TokenType.STRING: lambda self, token: self.expression( 619 exp.Literal, this=token.text, is_string=True 620 ), 621 TokenType.NUMBER: lambda self, token: self.expression( 622 exp.Literal, this=token.text, is_string=False 623 ), 624 TokenType.STAR: lambda self, _: self.expression( 625 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 626 ), 627 TokenType.NULL: lambda self, _: self.expression(exp.Null), 628 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 629 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 630 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 631 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 632 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 633 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 634 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 635 exp.National, this=token.text 636 ), 637 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 638 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 639 exp.RawString, this=token.text 640 ), 641 TokenType.UNICODE_STRING: lambda self, token: self.expression( 642 exp.UnicodeString, 643 this=token.text, 644 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 645 ), 646 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 647 } 648 649 PLACEHOLDER_PARSERS = { 650 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 651 TokenType.PARAMETER: lambda self: self._parse_parameter(), 652 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 653 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 654 else None, 655 } 656 657 RANGE_PARSERS = { 658 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 659 TokenType.GLOB: binary_range_parser(exp.Glob), 660 TokenType.ILIKE: binary_range_parser(exp.ILike), 661 TokenType.IN: lambda self, this: self._parse_in(this), 662 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 663 TokenType.IS: lambda self, this: self._parse_is(this), 664 TokenType.LIKE: binary_range_parser(exp.Like), 665 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 666 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 667 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 668 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 669 } 670 671 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 672 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 673 "AUTO": lambda self: self._parse_auto_property(), 674 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 675 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 676 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 677 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 678 "CHECKSUM": lambda self: self._parse_checksum(), 679 "CLUSTER BY": lambda self: self._parse_cluster(), 680 "CLUSTERED": lambda self: self._parse_clustered_by(), 681 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 682 exp.CollateProperty, **kwargs 683 ), 684 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 685 "COPY": lambda self: self._parse_copy_property(), 686 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 687 "DEFINER": lambda self: self._parse_definer(), 688 "DETERMINISTIC": lambda self: self.expression( 689 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 690 ), 691 "DISTKEY": lambda self: self._parse_distkey(), 692 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 693 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 694 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 695 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 696 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 697 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 698 "FREESPACE": lambda self: self._parse_freespace(), 699 "HEAP": lambda self: self.expression(exp.HeapProperty), 700 "IMMUTABLE": lambda self: self.expression( 701 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 702 ), 703 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 704 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 705 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 706 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 707 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 708 "LIKE": lambda self: self._parse_create_like(), 709 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 710 "LOCK": lambda self: self._parse_locking(), 711 "LOCKING": lambda self: self._parse_locking(), 712 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 713 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 714 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 715 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 716 "NO": lambda self: self._parse_no_property(), 717 "ON": lambda self: self._parse_on_property(), 718 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 719 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 720 "PARTITION": lambda self: self._parse_partitioned_of(), 721 "PARTITION BY": lambda self: self._parse_partitioned_by(), 722 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 723 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 724 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 725 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 726 "REMOTE": lambda self: self._parse_remote_with_connection(), 727 "RETURNS": lambda self: self._parse_returns(), 728 "ROW": lambda self: self._parse_row(), 729 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 730 "SAMPLE": lambda self: self.expression( 731 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 732 ), 733 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 734 "SETTINGS": lambda self: self.expression( 735 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 736 ), 737 "SORTKEY": lambda self: self._parse_sortkey(), 738 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 739 "STABLE": lambda self: self.expression( 740 exp.StabilityProperty, this=exp.Literal.string("STABLE") 741 ), 742 "STORED": lambda self: self._parse_stored(), 743 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 744 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 745 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 746 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 747 "TO": lambda self: self._parse_to_table(), 748 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 749 "TRANSFORM": lambda self: self.expression( 750 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 751 ), 752 "TTL": lambda self: self._parse_ttl(), 753 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 754 "VOLATILE": lambda self: self._parse_volatile_property(), 755 "WITH": lambda self: self._parse_with_property(), 756 } 757 758 CONSTRAINT_PARSERS = { 759 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 760 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 761 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 762 "CHARACTER SET": lambda self: self.expression( 763 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 764 ), 765 "CHECK": lambda self: self.expression( 766 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 767 ), 768 "COLLATE": lambda self: self.expression( 769 exp.CollateColumnConstraint, this=self._parse_var() 770 ), 771 "COMMENT": lambda self: self.expression( 772 exp.CommentColumnConstraint, this=self._parse_string() 773 ), 774 "COMPRESS": lambda self: self._parse_compress(), 775 "CLUSTERED": lambda self: self.expression( 776 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 777 ), 778 "NONCLUSTERED": lambda self: self.expression( 779 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 780 ), 781 "DEFAULT": lambda self: self.expression( 782 exp.DefaultColumnConstraint, this=self._parse_bitwise() 783 ), 784 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 785 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 786 "FORMAT": lambda self: self.expression( 787 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 788 ), 789 "GENERATED": lambda self: self._parse_generated_as_identity(), 790 "IDENTITY": lambda self: self._parse_auto_increment(), 791 "INLINE": lambda self: self._parse_inline(), 792 "LIKE": lambda self: self._parse_create_like(), 793 "NOT": lambda self: self._parse_not_constraint(), 794 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 795 "ON": lambda self: ( 796 self._match(TokenType.UPDATE) 797 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 798 ) 799 or self.expression(exp.OnProperty, this=self._parse_id_var()), 800 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 801 "PERIOD": lambda self: self._parse_period_for_system_time(), 802 "PRIMARY KEY": lambda self: self._parse_primary_key(), 803 "REFERENCES": lambda self: self._parse_references(match=False), 804 "TITLE": lambda self: self.expression( 805 exp.TitleColumnConstraint, this=self._parse_var_or_string() 806 ), 807 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 808 "UNIQUE": lambda self: self._parse_unique(), 809 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 810 "WITH": lambda self: self.expression( 811 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 812 ), 813 } 814 815 ALTER_PARSERS = { 816 "ADD": lambda self: self._parse_alter_table_add(), 817 "ALTER": lambda self: self._parse_alter_table_alter(), 818 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 819 "DROP": lambda self: self._parse_alter_table_drop(), 820 "RENAME": lambda self: self._parse_alter_table_rename(), 821 } 822 823 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 824 825 NO_PAREN_FUNCTION_PARSERS = { 826 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 827 "CASE": lambda self: self._parse_case(), 828 "IF": lambda self: self._parse_if(), 829 "NEXT": lambda self: self._parse_next_value_for(), 830 } 831 832 INVALID_FUNC_NAME_TOKENS = { 833 TokenType.IDENTIFIER, 834 TokenType.STRING, 835 } 836 837 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 838 839 FUNCTION_PARSERS = { 840 "ANY_VALUE": lambda self: self._parse_any_value(), 841 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 842 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 843 "DECODE": lambda self: self._parse_decode(), 844 "EXTRACT": lambda self: self._parse_extract(), 845 "JSON_OBJECT": lambda self: self._parse_json_object(), 846 "JSON_TABLE": lambda self: self._parse_json_table(), 847 "MATCH": lambda self: self._parse_match_against(), 848 "OPENJSON": lambda self: self._parse_open_json(), 849 "POSITION": lambda self: self._parse_position(), 850 "PREDICT": lambda self: self._parse_predict(), 851 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 852 "STRING_AGG": lambda self: self._parse_string_agg(), 853 "SUBSTRING": lambda self: self._parse_substring(), 854 "TRIM": lambda self: self._parse_trim(), 855 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 856 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 857 } 858 859 QUERY_MODIFIER_PARSERS = { 860 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 861 TokenType.WHERE: lambda self: ("where", self._parse_where()), 862 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 863 TokenType.HAVING: lambda self: ("having", self._parse_having()), 864 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 865 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 866 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 867 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 868 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 869 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 870 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 871 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 872 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 873 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 874 TokenType.CLUSTER_BY: lambda self: ( 875 "cluster", 876 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 877 ), 878 TokenType.DISTRIBUTE_BY: lambda self: ( 879 "distribute", 880 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 881 ), 882 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 883 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 884 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 885 } 886 887 SET_PARSERS = { 888 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 889 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 890 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 891 "TRANSACTION": lambda self: self._parse_set_transaction(), 892 } 893 894 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 895 896 TYPE_LITERAL_PARSERS = { 897 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 898 } 899 900 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 901 902 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 903 904 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 905 906 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 907 TRANSACTION_CHARACTERISTICS = { 908 "ISOLATION LEVEL REPEATABLE READ", 909 "ISOLATION LEVEL READ COMMITTED", 910 "ISOLATION LEVEL READ UNCOMMITTED", 911 "ISOLATION LEVEL SERIALIZABLE", 912 "READ WRITE", 913 "READ ONLY", 914 } 915 916 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 917 918 CLONE_KEYWORDS = {"CLONE", "COPY"} 919 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 920 921 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 922 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 923 924 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 925 926 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 927 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 928 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 929 930 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 931 932 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 933 934 DISTINCT_TOKENS = {TokenType.DISTINCT} 935 936 NULL_TOKENS = {TokenType.NULL} 937 938 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 939 940 STRICT_CAST = True 941 942 PREFIXED_PIVOT_COLUMNS = False 943 IDENTIFY_PIVOT_STRINGS = False 944 945 LOG_DEFAULTS_TO_LN = False 946 947 # Whether or not ADD is present for each column added by ALTER TABLE 948 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 949 950 # Whether or not the table sample clause expects CSV syntax 951 TABLESAMPLE_CSV = False 952 953 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 954 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 955 956 # Whether the TRIM function expects the characters to trim as its first argument 957 TRIM_PATTERN_FIRST = False 958 959 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 960 MODIFIERS_ATTACHED_TO_UNION = True 961 UNION_MODIFIERS = {"order", "limit", "offset"} 962 963 __slots__ = ( 964 "error_level", 965 "error_message_context", 966 "max_errors", 967 "dialect", 968 "sql", 969 "errors", 970 "_tokens", 971 "_index", 972 "_curr", 973 "_next", 974 "_prev", 975 "_prev_comments", 976 ) 977 978 # Autofilled 979 SHOW_TRIE: t.Dict = {} 980 SET_TRIE: t.Dict = {} 981 982 def __init__( 983 self, 984 error_level: t.Optional[ErrorLevel] = None, 985 error_message_context: int = 100, 986 max_errors: int = 3, 987 dialect: DialectType = None, 988 ): 989 from sqlglot.dialects import Dialect 990 991 self.error_level = error_level or ErrorLevel.IMMEDIATE 992 self.error_message_context = error_message_context 993 self.max_errors = max_errors 994 self.dialect = Dialect.get_or_raise(dialect) 995 self.reset() 996 997 def reset(self): 998 self.sql = "" 999 self.errors = [] 1000 self._tokens = [] 1001 self._index = 0 1002 self._curr = None 1003 self._next = None 1004 self._prev = None 1005 self._prev_comments = None 1006 1007 def parse( 1008 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1009 ) -> t.List[t.Optional[exp.Expression]]: 1010 """ 1011 Parses a list of tokens and returns a list of syntax trees, one tree 1012 per parsed SQL statement. 1013 1014 Args: 1015 raw_tokens: The list of tokens. 1016 sql: The original SQL string, used to produce helpful debug messages. 1017 1018 Returns: 1019 The list of the produced syntax trees. 1020 """ 1021 return self._parse( 1022 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1023 ) 1024 1025 def parse_into( 1026 self, 1027 expression_types: exp.IntoType, 1028 raw_tokens: t.List[Token], 1029 sql: t.Optional[str] = None, 1030 ) -> t.List[t.Optional[exp.Expression]]: 1031 """ 1032 Parses a list of tokens into a given Expression type. If a collection of Expression 1033 types is given instead, this method will try to parse the token list into each one 1034 of them, stopping at the first for which the parsing succeeds. 1035 1036 Args: 1037 expression_types: The expression type(s) to try and parse the token list into. 1038 raw_tokens: The list of tokens. 1039 sql: The original SQL string, used to produce helpful debug messages. 1040 1041 Returns: 1042 The target Expression. 1043 """ 1044 errors = [] 1045 for expression_type in ensure_list(expression_types): 1046 parser = self.EXPRESSION_PARSERS.get(expression_type) 1047 if not parser: 1048 raise TypeError(f"No parser registered for {expression_type}") 1049 1050 try: 1051 return self._parse(parser, raw_tokens, sql) 1052 except ParseError as e: 1053 e.errors[0]["into_expression"] = expression_type 1054 errors.append(e) 1055 1056 raise ParseError( 1057 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1058 errors=merge_errors(errors), 1059 ) from errors[-1] 1060 1061 def _parse( 1062 self, 1063 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1064 raw_tokens: t.List[Token], 1065 sql: t.Optional[str] = None, 1066 ) -> t.List[t.Optional[exp.Expression]]: 1067 self.reset() 1068 self.sql = sql or "" 1069 1070 total = len(raw_tokens) 1071 chunks: t.List[t.List[Token]] = [[]] 1072 1073 for i, token in enumerate(raw_tokens): 1074 if token.token_type == TokenType.SEMICOLON: 1075 if i < total - 1: 1076 chunks.append([]) 1077 else: 1078 chunks[-1].append(token) 1079 1080 expressions = [] 1081 1082 for tokens in chunks: 1083 self._index = -1 1084 self._tokens = tokens 1085 self._advance() 1086 1087 expressions.append(parse_method(self)) 1088 1089 if self._index < len(self._tokens): 1090 self.raise_error("Invalid expression / Unexpected token") 1091 1092 self.check_errors() 1093 1094 return expressions 1095 1096 def check_errors(self) -> None: 1097 """Logs or raises any found errors, depending on the chosen error level setting.""" 1098 if self.error_level == ErrorLevel.WARN: 1099 for error in self.errors: 1100 logger.error(str(error)) 1101 elif self.error_level == ErrorLevel.RAISE and self.errors: 1102 raise ParseError( 1103 concat_messages(self.errors, self.max_errors), 1104 errors=merge_errors(self.errors), 1105 ) 1106 1107 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1108 """ 1109 Appends an error in the list of recorded errors or raises it, depending on the chosen 1110 error level setting. 1111 """ 1112 token = token or self._curr or self._prev or Token.string("") 1113 start = token.start 1114 end = token.end + 1 1115 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1116 highlight = self.sql[start:end] 1117 end_context = self.sql[end : end + self.error_message_context] 1118 1119 error = ParseError.new( 1120 f"{message}. Line {token.line}, Col: {token.col}.\n" 1121 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1122 description=message, 1123 line=token.line, 1124 col=token.col, 1125 start_context=start_context, 1126 highlight=highlight, 1127 end_context=end_context, 1128 ) 1129 1130 if self.error_level == ErrorLevel.IMMEDIATE: 1131 raise error 1132 1133 self.errors.append(error) 1134 1135 def expression( 1136 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1137 ) -> E: 1138 """ 1139 Creates a new, validated Expression. 1140 1141 Args: 1142 exp_class: The expression class to instantiate. 1143 comments: An optional list of comments to attach to the expression. 1144 kwargs: The arguments to set for the expression along with their respective values. 1145 1146 Returns: 1147 The target expression. 1148 """ 1149 instance = exp_class(**kwargs) 1150 instance.add_comments(comments) if comments else self._add_comments(instance) 1151 return self.validate_expression(instance) 1152 1153 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1154 if expression and self._prev_comments: 1155 expression.add_comments(self._prev_comments) 1156 self._prev_comments = None 1157 1158 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1159 """ 1160 Validates an Expression, making sure that all its mandatory arguments are set. 1161 1162 Args: 1163 expression: The expression to validate. 1164 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1165 1166 Returns: 1167 The validated expression. 1168 """ 1169 if self.error_level != ErrorLevel.IGNORE: 1170 for error_message in expression.error_messages(args): 1171 self.raise_error(error_message) 1172 1173 return expression 1174 1175 def _find_sql(self, start: Token, end: Token) -> str: 1176 return self.sql[start.start : end.end + 1] 1177 1178 def _is_connected(self) -> bool: 1179 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1180 1181 def _advance(self, times: int = 1) -> None: 1182 self._index += times 1183 self._curr = seq_get(self._tokens, self._index) 1184 self._next = seq_get(self._tokens, self._index + 1) 1185 1186 if self._index > 0: 1187 self._prev = self._tokens[self._index - 1] 1188 self._prev_comments = self._prev.comments 1189 else: 1190 self._prev = None 1191 self._prev_comments = None 1192 1193 def _retreat(self, index: int) -> None: 1194 if index != self._index: 1195 self._advance(index - self._index) 1196 1197 def _parse_command(self) -> exp.Command: 1198 return self.expression( 1199 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1200 ) 1201 1202 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1203 start = self._prev 1204 exists = self._parse_exists() if allow_exists else None 1205 1206 self._match(TokenType.ON) 1207 1208 kind = self._match_set(self.CREATABLES) and self._prev 1209 if not kind: 1210 return self._parse_as_command(start) 1211 1212 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1213 this = self._parse_user_defined_function(kind=kind.token_type) 1214 elif kind.token_type == TokenType.TABLE: 1215 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1216 elif kind.token_type == TokenType.COLUMN: 1217 this = self._parse_column() 1218 else: 1219 this = self._parse_id_var() 1220 1221 self._match(TokenType.IS) 1222 1223 return self.expression( 1224 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1225 ) 1226 1227 def _parse_to_table( 1228 self, 1229 ) -> exp.ToTableProperty: 1230 table = self._parse_table_parts(schema=True) 1231 return self.expression(exp.ToTableProperty, this=table) 1232 1233 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1234 def _parse_ttl(self) -> exp.Expression: 1235 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1236 this = self._parse_bitwise() 1237 1238 if self._match_text_seq("DELETE"): 1239 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1240 if self._match_text_seq("RECOMPRESS"): 1241 return self.expression( 1242 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1243 ) 1244 if self._match_text_seq("TO", "DISK"): 1245 return self.expression( 1246 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1247 ) 1248 if self._match_text_seq("TO", "VOLUME"): 1249 return self.expression( 1250 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1251 ) 1252 1253 return this 1254 1255 expressions = self._parse_csv(_parse_ttl_action) 1256 where = self._parse_where() 1257 group = self._parse_group() 1258 1259 aggregates = None 1260 if group and self._match(TokenType.SET): 1261 aggregates = self._parse_csv(self._parse_set_item) 1262 1263 return self.expression( 1264 exp.MergeTreeTTL, 1265 expressions=expressions, 1266 where=where, 1267 group=group, 1268 aggregates=aggregates, 1269 ) 1270 1271 def _parse_statement(self) -> t.Optional[exp.Expression]: 1272 if self._curr is None: 1273 return None 1274 1275 if self._match_set(self.STATEMENT_PARSERS): 1276 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1277 1278 if self._match_set(Tokenizer.COMMANDS): 1279 return self._parse_command() 1280 1281 expression = self._parse_expression() 1282 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1283 return self._parse_query_modifiers(expression) 1284 1285 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1286 start = self._prev 1287 temporary = self._match(TokenType.TEMPORARY) 1288 materialized = self._match_text_seq("MATERIALIZED") 1289 1290 kind = self._match_set(self.CREATABLES) and self._prev.text 1291 if not kind: 1292 return self._parse_as_command(start) 1293 1294 return self.expression( 1295 exp.Drop, 1296 comments=start.comments, 1297 exists=exists or self._parse_exists(), 1298 this=self._parse_table(schema=True), 1299 kind=kind, 1300 temporary=temporary, 1301 materialized=materialized, 1302 cascade=self._match_text_seq("CASCADE"), 1303 constraints=self._match_text_seq("CONSTRAINTS"), 1304 purge=self._match_text_seq("PURGE"), 1305 ) 1306 1307 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1308 return ( 1309 self._match_text_seq("IF") 1310 and (not not_ or self._match(TokenType.NOT)) 1311 and self._match(TokenType.EXISTS) 1312 ) 1313 1314 def _parse_create(self) -> exp.Create | exp.Command: 1315 # Note: this can't be None because we've matched a statement parser 1316 start = self._prev 1317 comments = self._prev_comments 1318 1319 replace = start.text.upper() == "REPLACE" or self._match_pair( 1320 TokenType.OR, TokenType.REPLACE 1321 ) 1322 unique = self._match(TokenType.UNIQUE) 1323 1324 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1325 self._advance() 1326 1327 properties = None 1328 create_token = self._match_set(self.CREATABLES) and self._prev 1329 1330 if not create_token: 1331 # exp.Properties.Location.POST_CREATE 1332 properties = self._parse_properties() 1333 create_token = self._match_set(self.CREATABLES) and self._prev 1334 1335 if not properties or not create_token: 1336 return self._parse_as_command(start) 1337 1338 exists = self._parse_exists(not_=True) 1339 this = None 1340 expression: t.Optional[exp.Expression] = None 1341 indexes = None 1342 no_schema_binding = None 1343 begin = None 1344 end = None 1345 clone = None 1346 1347 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1348 nonlocal properties 1349 if properties and temp_props: 1350 properties.expressions.extend(temp_props.expressions) 1351 elif temp_props: 1352 properties = temp_props 1353 1354 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1355 this = self._parse_user_defined_function(kind=create_token.token_type) 1356 1357 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1358 extend_props(self._parse_properties()) 1359 1360 self._match(TokenType.ALIAS) 1361 1362 if self._match(TokenType.COMMAND): 1363 expression = self._parse_as_command(self._prev) 1364 else: 1365 begin = self._match(TokenType.BEGIN) 1366 return_ = self._match_text_seq("RETURN") 1367 1368 if self._match(TokenType.STRING, advance=False): 1369 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1370 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1371 expression = self._parse_string() 1372 extend_props(self._parse_properties()) 1373 else: 1374 expression = self._parse_statement() 1375 1376 end = self._match_text_seq("END") 1377 1378 if return_: 1379 expression = self.expression(exp.Return, this=expression) 1380 elif create_token.token_type == TokenType.INDEX: 1381 this = self._parse_index(index=self._parse_id_var()) 1382 elif create_token.token_type in self.DB_CREATABLES: 1383 table_parts = self._parse_table_parts(schema=True) 1384 1385 # exp.Properties.Location.POST_NAME 1386 self._match(TokenType.COMMA) 1387 extend_props(self._parse_properties(before=True)) 1388 1389 this = self._parse_schema(this=table_parts) 1390 1391 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1392 extend_props(self._parse_properties()) 1393 1394 self._match(TokenType.ALIAS) 1395 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1396 # exp.Properties.Location.POST_ALIAS 1397 extend_props(self._parse_properties()) 1398 1399 expression = self._parse_ddl_select() 1400 1401 if create_token.token_type == TokenType.TABLE: 1402 # exp.Properties.Location.POST_EXPRESSION 1403 extend_props(self._parse_properties()) 1404 1405 indexes = [] 1406 while True: 1407 index = self._parse_index() 1408 1409 # exp.Properties.Location.POST_INDEX 1410 extend_props(self._parse_properties()) 1411 1412 if not index: 1413 break 1414 else: 1415 self._match(TokenType.COMMA) 1416 indexes.append(index) 1417 elif create_token.token_type == TokenType.VIEW: 1418 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1419 no_schema_binding = True 1420 1421 shallow = self._match_text_seq("SHALLOW") 1422 1423 if self._match_texts(self.CLONE_KEYWORDS): 1424 copy = self._prev.text.lower() == "copy" 1425 clone = self.expression( 1426 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1427 ) 1428 1429 return self.expression( 1430 exp.Create, 1431 comments=comments, 1432 this=this, 1433 kind=create_token.text.upper(), 1434 replace=replace, 1435 unique=unique, 1436 expression=expression, 1437 exists=exists, 1438 properties=properties, 1439 indexes=indexes, 1440 no_schema_binding=no_schema_binding, 1441 begin=begin, 1442 end=end, 1443 clone=clone, 1444 ) 1445 1446 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1447 # only used for teradata currently 1448 self._match(TokenType.COMMA) 1449 1450 kwargs = { 1451 "no": self._match_text_seq("NO"), 1452 "dual": self._match_text_seq("DUAL"), 1453 "before": self._match_text_seq("BEFORE"), 1454 "default": self._match_text_seq("DEFAULT"), 1455 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1456 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1457 "after": self._match_text_seq("AFTER"), 1458 "minimum": self._match_texts(("MIN", "MINIMUM")), 1459 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1460 } 1461 1462 if self._match_texts(self.PROPERTY_PARSERS): 1463 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1464 try: 1465 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1466 except TypeError: 1467 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1468 1469 return None 1470 1471 def _parse_property(self) -> t.Optional[exp.Expression]: 1472 if self._match_texts(self.PROPERTY_PARSERS): 1473 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1474 1475 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1476 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1477 1478 if self._match_text_seq("COMPOUND", "SORTKEY"): 1479 return self._parse_sortkey(compound=True) 1480 1481 if self._match_text_seq("SQL", "SECURITY"): 1482 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1483 1484 index = self._index 1485 key = self._parse_column() 1486 1487 if not self._match(TokenType.EQ): 1488 self._retreat(index) 1489 return None 1490 1491 return self.expression( 1492 exp.Property, 1493 this=key.to_dot() if isinstance(key, exp.Column) else key, 1494 value=self._parse_column() or self._parse_var(any_token=True), 1495 ) 1496 1497 def _parse_stored(self) -> exp.FileFormatProperty: 1498 self._match(TokenType.ALIAS) 1499 1500 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1501 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1502 1503 return self.expression( 1504 exp.FileFormatProperty, 1505 this=self.expression( 1506 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1507 ) 1508 if input_format or output_format 1509 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1510 ) 1511 1512 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1513 self._match(TokenType.EQ) 1514 self._match(TokenType.ALIAS) 1515 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1516 1517 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1518 properties = [] 1519 while True: 1520 if before: 1521 prop = self._parse_property_before() 1522 else: 1523 prop = self._parse_property() 1524 1525 if not prop: 1526 break 1527 for p in ensure_list(prop): 1528 properties.append(p) 1529 1530 if properties: 1531 return self.expression(exp.Properties, expressions=properties) 1532 1533 return None 1534 1535 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1536 return self.expression( 1537 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1538 ) 1539 1540 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1541 if self._index >= 2: 1542 pre_volatile_token = self._tokens[self._index - 2] 1543 else: 1544 pre_volatile_token = None 1545 1546 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1547 return exp.VolatileProperty() 1548 1549 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1550 1551 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1552 self._match_pair(TokenType.EQ, TokenType.ON) 1553 1554 prop = self.expression(exp.WithSystemVersioningProperty) 1555 if self._match(TokenType.L_PAREN): 1556 self._match_text_seq("HISTORY_TABLE", "=") 1557 prop.set("this", self._parse_table_parts()) 1558 1559 if self._match(TokenType.COMMA): 1560 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1561 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1562 1563 self._match_r_paren() 1564 1565 return prop 1566 1567 def _parse_with_property( 1568 self, 1569 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1570 if self._match(TokenType.L_PAREN, advance=False): 1571 return self._parse_wrapped_csv(self._parse_property) 1572 1573 if self._match_text_seq("JOURNAL"): 1574 return self._parse_withjournaltable() 1575 1576 if self._match_text_seq("DATA"): 1577 return self._parse_withdata(no=False) 1578 elif self._match_text_seq("NO", "DATA"): 1579 return self._parse_withdata(no=True) 1580 1581 if not self._next: 1582 return None 1583 1584 return self._parse_withisolatedloading() 1585 1586 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1587 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1588 self._match(TokenType.EQ) 1589 1590 user = self._parse_id_var() 1591 self._match(TokenType.PARAMETER) 1592 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1593 1594 if not user or not host: 1595 return None 1596 1597 return exp.DefinerProperty(this=f"{user}@{host}") 1598 1599 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1600 self._match(TokenType.TABLE) 1601 self._match(TokenType.EQ) 1602 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1603 1604 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1605 return self.expression(exp.LogProperty, no=no) 1606 1607 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1608 return self.expression(exp.JournalProperty, **kwargs) 1609 1610 def _parse_checksum(self) -> exp.ChecksumProperty: 1611 self._match(TokenType.EQ) 1612 1613 on = None 1614 if self._match(TokenType.ON): 1615 on = True 1616 elif self._match_text_seq("OFF"): 1617 on = False 1618 1619 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1620 1621 def _parse_cluster(self) -> exp.Cluster: 1622 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1623 1624 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1625 self._match_text_seq("BY") 1626 1627 self._match_l_paren() 1628 expressions = self._parse_csv(self._parse_column) 1629 self._match_r_paren() 1630 1631 if self._match_text_seq("SORTED", "BY"): 1632 self._match_l_paren() 1633 sorted_by = self._parse_csv(self._parse_ordered) 1634 self._match_r_paren() 1635 else: 1636 sorted_by = None 1637 1638 self._match(TokenType.INTO) 1639 buckets = self._parse_number() 1640 self._match_text_seq("BUCKETS") 1641 1642 return self.expression( 1643 exp.ClusteredByProperty, 1644 expressions=expressions, 1645 sorted_by=sorted_by, 1646 buckets=buckets, 1647 ) 1648 1649 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1650 if not self._match_text_seq("GRANTS"): 1651 self._retreat(self._index - 1) 1652 return None 1653 1654 return self.expression(exp.CopyGrantsProperty) 1655 1656 def _parse_freespace(self) -> exp.FreespaceProperty: 1657 self._match(TokenType.EQ) 1658 return self.expression( 1659 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1660 ) 1661 1662 def _parse_mergeblockratio( 1663 self, no: bool = False, default: bool = False 1664 ) -> exp.MergeBlockRatioProperty: 1665 if self._match(TokenType.EQ): 1666 return self.expression( 1667 exp.MergeBlockRatioProperty, 1668 this=self._parse_number(), 1669 percent=self._match(TokenType.PERCENT), 1670 ) 1671 1672 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1673 1674 def _parse_datablocksize( 1675 self, 1676 default: t.Optional[bool] = None, 1677 minimum: t.Optional[bool] = None, 1678 maximum: t.Optional[bool] = None, 1679 ) -> exp.DataBlocksizeProperty: 1680 self._match(TokenType.EQ) 1681 size = self._parse_number() 1682 1683 units = None 1684 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1685 units = self._prev.text 1686 1687 return self.expression( 1688 exp.DataBlocksizeProperty, 1689 size=size, 1690 units=units, 1691 default=default, 1692 minimum=minimum, 1693 maximum=maximum, 1694 ) 1695 1696 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1697 self._match(TokenType.EQ) 1698 always = self._match_text_seq("ALWAYS") 1699 manual = self._match_text_seq("MANUAL") 1700 never = self._match_text_seq("NEVER") 1701 default = self._match_text_seq("DEFAULT") 1702 1703 autotemp = None 1704 if self._match_text_seq("AUTOTEMP"): 1705 autotemp = self._parse_schema() 1706 1707 return self.expression( 1708 exp.BlockCompressionProperty, 1709 always=always, 1710 manual=manual, 1711 never=never, 1712 default=default, 1713 autotemp=autotemp, 1714 ) 1715 1716 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1717 no = self._match_text_seq("NO") 1718 concurrent = self._match_text_seq("CONCURRENT") 1719 self._match_text_seq("ISOLATED", "LOADING") 1720 for_all = self._match_text_seq("FOR", "ALL") 1721 for_insert = self._match_text_seq("FOR", "INSERT") 1722 for_none = self._match_text_seq("FOR", "NONE") 1723 return self.expression( 1724 exp.IsolatedLoadingProperty, 1725 no=no, 1726 concurrent=concurrent, 1727 for_all=for_all, 1728 for_insert=for_insert, 1729 for_none=for_none, 1730 ) 1731 1732 def _parse_locking(self) -> exp.LockingProperty: 1733 if self._match(TokenType.TABLE): 1734 kind = "TABLE" 1735 elif self._match(TokenType.VIEW): 1736 kind = "VIEW" 1737 elif self._match(TokenType.ROW): 1738 kind = "ROW" 1739 elif self._match_text_seq("DATABASE"): 1740 kind = "DATABASE" 1741 else: 1742 kind = None 1743 1744 if kind in ("DATABASE", "TABLE", "VIEW"): 1745 this = self._parse_table_parts() 1746 else: 1747 this = None 1748 1749 if self._match(TokenType.FOR): 1750 for_or_in = "FOR" 1751 elif self._match(TokenType.IN): 1752 for_or_in = "IN" 1753 else: 1754 for_or_in = None 1755 1756 if self._match_text_seq("ACCESS"): 1757 lock_type = "ACCESS" 1758 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1759 lock_type = "EXCLUSIVE" 1760 elif self._match_text_seq("SHARE"): 1761 lock_type = "SHARE" 1762 elif self._match_text_seq("READ"): 1763 lock_type = "READ" 1764 elif self._match_text_seq("WRITE"): 1765 lock_type = "WRITE" 1766 elif self._match_text_seq("CHECKSUM"): 1767 lock_type = "CHECKSUM" 1768 else: 1769 lock_type = None 1770 1771 override = self._match_text_seq("OVERRIDE") 1772 1773 return self.expression( 1774 exp.LockingProperty, 1775 this=this, 1776 kind=kind, 1777 for_or_in=for_or_in, 1778 lock_type=lock_type, 1779 override=override, 1780 ) 1781 1782 def _parse_partition_by(self) -> t.List[exp.Expression]: 1783 if self._match(TokenType.PARTITION_BY): 1784 return self._parse_csv(self._parse_conjunction) 1785 return [] 1786 1787 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1788 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1789 if self._match_text_seq("MINVALUE"): 1790 return exp.var("MINVALUE") 1791 if self._match_text_seq("MAXVALUE"): 1792 return exp.var("MAXVALUE") 1793 return self._parse_bitwise() 1794 1795 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1796 expression = None 1797 from_expressions = None 1798 to_expressions = None 1799 1800 if self._match(TokenType.IN): 1801 this = self._parse_wrapped_csv(self._parse_bitwise) 1802 elif self._match(TokenType.FROM): 1803 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1804 self._match_text_seq("TO") 1805 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1806 elif self._match_text_seq("WITH", "(", "MODULUS"): 1807 this = self._parse_number() 1808 self._match_text_seq(",", "REMAINDER") 1809 expression = self._parse_number() 1810 self._match_r_paren() 1811 else: 1812 self.raise_error("Failed to parse partition bound spec.") 1813 1814 return self.expression( 1815 exp.PartitionBoundSpec, 1816 this=this, 1817 expression=expression, 1818 from_expressions=from_expressions, 1819 to_expressions=to_expressions, 1820 ) 1821 1822 # https://www.postgresql.org/docs/current/sql-createtable.html 1823 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1824 if not self._match_text_seq("OF"): 1825 self._retreat(self._index - 1) 1826 return None 1827 1828 this = self._parse_table(schema=True) 1829 1830 if self._match(TokenType.DEFAULT): 1831 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1832 elif self._match_text_seq("FOR", "VALUES"): 1833 expression = self._parse_partition_bound_spec() 1834 else: 1835 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1836 1837 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1838 1839 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1840 self._match(TokenType.EQ) 1841 return self.expression( 1842 exp.PartitionedByProperty, 1843 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1844 ) 1845 1846 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1847 if self._match_text_seq("AND", "STATISTICS"): 1848 statistics = True 1849 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1850 statistics = False 1851 else: 1852 statistics = None 1853 1854 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1855 1856 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1857 if self._match_text_seq("PRIMARY", "INDEX"): 1858 return exp.NoPrimaryIndexProperty() 1859 return None 1860 1861 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1862 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1863 return exp.OnCommitProperty() 1864 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1865 return exp.OnCommitProperty(delete=True) 1866 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1867 1868 def _parse_distkey(self) -> exp.DistKeyProperty: 1869 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1870 1871 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1872 table = self._parse_table(schema=True) 1873 1874 options = [] 1875 while self._match_texts(("INCLUDING", "EXCLUDING")): 1876 this = self._prev.text.upper() 1877 1878 id_var = self._parse_id_var() 1879 if not id_var: 1880 return None 1881 1882 options.append( 1883 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1884 ) 1885 1886 return self.expression(exp.LikeProperty, this=table, expressions=options) 1887 1888 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1889 return self.expression( 1890 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1891 ) 1892 1893 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1894 self._match(TokenType.EQ) 1895 return self.expression( 1896 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1897 ) 1898 1899 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1900 self._match_text_seq("WITH", "CONNECTION") 1901 return self.expression( 1902 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1903 ) 1904 1905 def _parse_returns(self) -> exp.ReturnsProperty: 1906 value: t.Optional[exp.Expression] 1907 is_table = self._match(TokenType.TABLE) 1908 1909 if is_table: 1910 if self._match(TokenType.LT): 1911 value = self.expression( 1912 exp.Schema, 1913 this="TABLE", 1914 expressions=self._parse_csv(self._parse_struct_types), 1915 ) 1916 if not self._match(TokenType.GT): 1917 self.raise_error("Expecting >") 1918 else: 1919 value = self._parse_schema(exp.var("TABLE")) 1920 else: 1921 value = self._parse_types() 1922 1923 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1924 1925 def _parse_describe(self) -> exp.Describe: 1926 kind = self._match_set(self.CREATABLES) and self._prev.text 1927 this = self._parse_table(schema=True) 1928 properties = self._parse_properties() 1929 expressions = properties.expressions if properties else None 1930 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1931 1932 def _parse_insert(self) -> exp.Insert: 1933 comments = ensure_list(self._prev_comments) 1934 overwrite = self._match(TokenType.OVERWRITE) 1935 ignore = self._match(TokenType.IGNORE) 1936 local = self._match_text_seq("LOCAL") 1937 alternative = None 1938 1939 if self._match_text_seq("DIRECTORY"): 1940 this: t.Optional[exp.Expression] = self.expression( 1941 exp.Directory, 1942 this=self._parse_var_or_string(), 1943 local=local, 1944 row_format=self._parse_row_format(match_row=True), 1945 ) 1946 else: 1947 if self._match(TokenType.OR): 1948 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1949 1950 self._match(TokenType.INTO) 1951 comments += ensure_list(self._prev_comments) 1952 self._match(TokenType.TABLE) 1953 this = self._parse_table(schema=True) 1954 1955 returning = self._parse_returning() 1956 1957 return self.expression( 1958 exp.Insert, 1959 comments=comments, 1960 this=this, 1961 by_name=self._match_text_seq("BY", "NAME"), 1962 exists=self._parse_exists(), 1963 partition=self._parse_partition(), 1964 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1965 and self._parse_conjunction(), 1966 expression=self._parse_ddl_select(), 1967 conflict=self._parse_on_conflict(), 1968 returning=returning or self._parse_returning(), 1969 overwrite=overwrite, 1970 alternative=alternative, 1971 ignore=ignore, 1972 ) 1973 1974 def _parse_kill(self) -> exp.Kill: 1975 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1976 1977 return self.expression( 1978 exp.Kill, 1979 this=self._parse_primary(), 1980 kind=kind, 1981 ) 1982 1983 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1984 conflict = self._match_text_seq("ON", "CONFLICT") 1985 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1986 1987 if not conflict and not duplicate: 1988 return None 1989 1990 nothing = None 1991 expressions = None 1992 key = None 1993 constraint = None 1994 1995 if conflict: 1996 if self._match_text_seq("ON", "CONSTRAINT"): 1997 constraint = self._parse_id_var() 1998 else: 1999 key = self._parse_csv(self._parse_value) 2000 2001 self._match_text_seq("DO") 2002 if self._match_text_seq("NOTHING"): 2003 nothing = True 2004 else: 2005 self._match(TokenType.UPDATE) 2006 self._match(TokenType.SET) 2007 expressions = self._parse_csv(self._parse_equality) 2008 2009 return self.expression( 2010 exp.OnConflict, 2011 duplicate=duplicate, 2012 expressions=expressions, 2013 nothing=nothing, 2014 key=key, 2015 constraint=constraint, 2016 ) 2017 2018 def _parse_returning(self) -> t.Optional[exp.Returning]: 2019 if not self._match(TokenType.RETURNING): 2020 return None 2021 return self.expression( 2022 exp.Returning, 2023 expressions=self._parse_csv(self._parse_expression), 2024 into=self._match(TokenType.INTO) and self._parse_table_part(), 2025 ) 2026 2027 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2028 if not self._match(TokenType.FORMAT): 2029 return None 2030 return self._parse_row_format() 2031 2032 def _parse_row_format( 2033 self, match_row: bool = False 2034 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2035 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2036 return None 2037 2038 if self._match_text_seq("SERDE"): 2039 this = self._parse_string() 2040 2041 serde_properties = None 2042 if self._match(TokenType.SERDE_PROPERTIES): 2043 serde_properties = self.expression( 2044 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2045 ) 2046 2047 return self.expression( 2048 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2049 ) 2050 2051 self._match_text_seq("DELIMITED") 2052 2053 kwargs = {} 2054 2055 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2056 kwargs["fields"] = self._parse_string() 2057 if self._match_text_seq("ESCAPED", "BY"): 2058 kwargs["escaped"] = self._parse_string() 2059 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2060 kwargs["collection_items"] = self._parse_string() 2061 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2062 kwargs["map_keys"] = self._parse_string() 2063 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2064 kwargs["lines"] = self._parse_string() 2065 if self._match_text_seq("NULL", "DEFINED", "AS"): 2066 kwargs["null"] = self._parse_string() 2067 2068 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2069 2070 def _parse_load(self) -> exp.LoadData | exp.Command: 2071 if self._match_text_seq("DATA"): 2072 local = self._match_text_seq("LOCAL") 2073 self._match_text_seq("INPATH") 2074 inpath = self._parse_string() 2075 overwrite = self._match(TokenType.OVERWRITE) 2076 self._match_pair(TokenType.INTO, TokenType.TABLE) 2077 2078 return self.expression( 2079 exp.LoadData, 2080 this=self._parse_table(schema=True), 2081 local=local, 2082 overwrite=overwrite, 2083 inpath=inpath, 2084 partition=self._parse_partition(), 2085 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2086 serde=self._match_text_seq("SERDE") and self._parse_string(), 2087 ) 2088 return self._parse_as_command(self._prev) 2089 2090 def _parse_delete(self) -> exp.Delete: 2091 # This handles MySQL's "Multiple-Table Syntax" 2092 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2093 tables = None 2094 comments = self._prev_comments 2095 if not self._match(TokenType.FROM, advance=False): 2096 tables = self._parse_csv(self._parse_table) or None 2097 2098 returning = self._parse_returning() 2099 2100 return self.expression( 2101 exp.Delete, 2102 comments=comments, 2103 tables=tables, 2104 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2105 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2106 where=self._parse_where(), 2107 returning=returning or self._parse_returning(), 2108 limit=self._parse_limit(), 2109 ) 2110 2111 def _parse_update(self) -> exp.Update: 2112 comments = self._prev_comments 2113 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2114 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2115 returning = self._parse_returning() 2116 return self.expression( 2117 exp.Update, 2118 comments=comments, 2119 **{ # type: ignore 2120 "this": this, 2121 "expressions": expressions, 2122 "from": self._parse_from(joins=True), 2123 "where": self._parse_where(), 2124 "returning": returning or self._parse_returning(), 2125 "order": self._parse_order(), 2126 "limit": self._parse_limit(), 2127 }, 2128 ) 2129 2130 def _parse_uncache(self) -> exp.Uncache: 2131 if not self._match(TokenType.TABLE): 2132 self.raise_error("Expecting TABLE after UNCACHE") 2133 2134 return self.expression( 2135 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2136 ) 2137 2138 def _parse_cache(self) -> exp.Cache: 2139 lazy = self._match_text_seq("LAZY") 2140 self._match(TokenType.TABLE) 2141 table = self._parse_table(schema=True) 2142 2143 options = [] 2144 if self._match_text_seq("OPTIONS"): 2145 self._match_l_paren() 2146 k = self._parse_string() 2147 self._match(TokenType.EQ) 2148 v = self._parse_string() 2149 options = [k, v] 2150 self._match_r_paren() 2151 2152 self._match(TokenType.ALIAS) 2153 return self.expression( 2154 exp.Cache, 2155 this=table, 2156 lazy=lazy, 2157 options=options, 2158 expression=self._parse_select(nested=True), 2159 ) 2160 2161 def _parse_partition(self) -> t.Optional[exp.Partition]: 2162 if not self._match(TokenType.PARTITION): 2163 return None 2164 2165 return self.expression( 2166 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2167 ) 2168 2169 def _parse_value(self) -> exp.Tuple: 2170 if self._match(TokenType.L_PAREN): 2171 expressions = self._parse_csv(self._parse_expression) 2172 self._match_r_paren() 2173 return self.expression(exp.Tuple, expressions=expressions) 2174 2175 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2176 # https://prestodb.io/docs/current/sql/values.html 2177 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2178 2179 def _parse_projections(self) -> t.List[exp.Expression]: 2180 return self._parse_expressions() 2181 2182 def _parse_select( 2183 self, 2184 nested: bool = False, 2185 table: bool = False, 2186 parse_subquery_alias: bool = True, 2187 parse_set_operation: bool = True, 2188 ) -> t.Optional[exp.Expression]: 2189 cte = self._parse_with() 2190 2191 if cte: 2192 this = self._parse_statement() 2193 2194 if not this: 2195 self.raise_error("Failed to parse any statement following CTE") 2196 return cte 2197 2198 if "with" in this.arg_types: 2199 this.set("with", cte) 2200 else: 2201 self.raise_error(f"{this.key} does not support CTE") 2202 this = cte 2203 2204 return this 2205 2206 # duckdb supports leading with FROM x 2207 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2208 2209 if self._match(TokenType.SELECT): 2210 comments = self._prev_comments 2211 2212 hint = self._parse_hint() 2213 all_ = self._match(TokenType.ALL) 2214 distinct = self._match_set(self.DISTINCT_TOKENS) 2215 2216 kind = ( 2217 self._match(TokenType.ALIAS) 2218 and self._match_texts(("STRUCT", "VALUE")) 2219 and self._prev.text.upper() 2220 ) 2221 2222 if distinct: 2223 distinct = self.expression( 2224 exp.Distinct, 2225 on=self._parse_value() if self._match(TokenType.ON) else None, 2226 ) 2227 2228 if all_ and distinct: 2229 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2230 2231 limit = self._parse_limit(top=True) 2232 projections = self._parse_projections() 2233 2234 this = self.expression( 2235 exp.Select, 2236 kind=kind, 2237 hint=hint, 2238 distinct=distinct, 2239 expressions=projections, 2240 limit=limit, 2241 ) 2242 this.comments = comments 2243 2244 into = self._parse_into() 2245 if into: 2246 this.set("into", into) 2247 2248 if not from_: 2249 from_ = self._parse_from() 2250 2251 if from_: 2252 this.set("from", from_) 2253 2254 this = self._parse_query_modifiers(this) 2255 elif (table or nested) and self._match(TokenType.L_PAREN): 2256 if self._match(TokenType.PIVOT): 2257 this = self._parse_simplified_pivot() 2258 elif self._match(TokenType.FROM): 2259 this = exp.select("*").from_( 2260 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2261 ) 2262 else: 2263 this = ( 2264 self._parse_table() 2265 if table 2266 else self._parse_select(nested=True, parse_set_operation=False) 2267 ) 2268 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2269 2270 self._match_r_paren() 2271 2272 # We return early here so that the UNION isn't attached to the subquery by the 2273 # following call to _parse_set_operations, but instead becomes the parent node 2274 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2275 elif self._match(TokenType.VALUES): 2276 this = self.expression( 2277 exp.Values, 2278 expressions=self._parse_csv(self._parse_value), 2279 alias=self._parse_table_alias(), 2280 ) 2281 elif from_: 2282 this = exp.select("*").from_(from_.this, copy=False) 2283 else: 2284 this = None 2285 2286 if parse_set_operation: 2287 return self._parse_set_operations(this) 2288 return this 2289 2290 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2291 if not skip_with_token and not self._match(TokenType.WITH): 2292 return None 2293 2294 comments = self._prev_comments 2295 recursive = self._match(TokenType.RECURSIVE) 2296 2297 expressions = [] 2298 while True: 2299 expressions.append(self._parse_cte()) 2300 2301 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2302 break 2303 else: 2304 self._match(TokenType.WITH) 2305 2306 return self.expression( 2307 exp.With, comments=comments, expressions=expressions, recursive=recursive 2308 ) 2309 2310 def _parse_cte(self) -> exp.CTE: 2311 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2312 if not alias or not alias.this: 2313 self.raise_error("Expected CTE to have alias") 2314 2315 self._match(TokenType.ALIAS) 2316 return self.expression( 2317 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2318 ) 2319 2320 def _parse_table_alias( 2321 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2322 ) -> t.Optional[exp.TableAlias]: 2323 any_token = self._match(TokenType.ALIAS) 2324 alias = ( 2325 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2326 or self._parse_string_as_identifier() 2327 ) 2328 2329 index = self._index 2330 if self._match(TokenType.L_PAREN): 2331 columns = self._parse_csv(self._parse_function_parameter) 2332 self._match_r_paren() if columns else self._retreat(index) 2333 else: 2334 columns = None 2335 2336 if not alias and not columns: 2337 return None 2338 2339 return self.expression(exp.TableAlias, this=alias, columns=columns) 2340 2341 def _parse_subquery( 2342 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2343 ) -> t.Optional[exp.Subquery]: 2344 if not this: 2345 return None 2346 2347 return self.expression( 2348 exp.Subquery, 2349 this=this, 2350 pivots=self._parse_pivots(), 2351 alias=self._parse_table_alias() if parse_alias else None, 2352 ) 2353 2354 def _parse_query_modifiers( 2355 self, this: t.Optional[exp.Expression] 2356 ) -> t.Optional[exp.Expression]: 2357 if isinstance(this, self.MODIFIABLES): 2358 for join in iter(self._parse_join, None): 2359 this.append("joins", join) 2360 for lateral in iter(self._parse_lateral, None): 2361 this.append("laterals", lateral) 2362 2363 while True: 2364 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2365 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2366 key, expression = parser(self) 2367 2368 if expression: 2369 this.set(key, expression) 2370 if key == "limit": 2371 offset = expression.args.pop("offset", None) 2372 if offset: 2373 this.set("offset", exp.Offset(expression=offset)) 2374 continue 2375 break 2376 return this 2377 2378 def _parse_hint(self) -> t.Optional[exp.Hint]: 2379 if self._match(TokenType.HINT): 2380 hints = [] 2381 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2382 hints.extend(hint) 2383 2384 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2385 self.raise_error("Expected */ after HINT") 2386 2387 return self.expression(exp.Hint, expressions=hints) 2388 2389 return None 2390 2391 def _parse_into(self) -> t.Optional[exp.Into]: 2392 if not self._match(TokenType.INTO): 2393 return None 2394 2395 temp = self._match(TokenType.TEMPORARY) 2396 unlogged = self._match_text_seq("UNLOGGED") 2397 self._match(TokenType.TABLE) 2398 2399 return self.expression( 2400 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2401 ) 2402 2403 def _parse_from( 2404 self, joins: bool = False, skip_from_token: bool = False 2405 ) -> t.Optional[exp.From]: 2406 if not skip_from_token and not self._match(TokenType.FROM): 2407 return None 2408 2409 return self.expression( 2410 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2411 ) 2412 2413 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2414 if not self._match(TokenType.MATCH_RECOGNIZE): 2415 return None 2416 2417 self._match_l_paren() 2418 2419 partition = self._parse_partition_by() 2420 order = self._parse_order() 2421 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2422 2423 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2424 rows = exp.var("ONE ROW PER MATCH") 2425 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2426 text = "ALL ROWS PER MATCH" 2427 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2428 text += f" SHOW EMPTY MATCHES" 2429 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2430 text += f" OMIT EMPTY MATCHES" 2431 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2432 text += f" WITH UNMATCHED ROWS" 2433 rows = exp.var(text) 2434 else: 2435 rows = None 2436 2437 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2438 text = "AFTER MATCH SKIP" 2439 if self._match_text_seq("PAST", "LAST", "ROW"): 2440 text += f" PAST LAST ROW" 2441 elif self._match_text_seq("TO", "NEXT", "ROW"): 2442 text += f" TO NEXT ROW" 2443 elif self._match_text_seq("TO", "FIRST"): 2444 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2445 elif self._match_text_seq("TO", "LAST"): 2446 text += f" TO LAST {self._advance_any().text}" # type: ignore 2447 after = exp.var(text) 2448 else: 2449 after = None 2450 2451 if self._match_text_seq("PATTERN"): 2452 self._match_l_paren() 2453 2454 if not self._curr: 2455 self.raise_error("Expecting )", self._curr) 2456 2457 paren = 1 2458 start = self._curr 2459 2460 while self._curr and paren > 0: 2461 if self._curr.token_type == TokenType.L_PAREN: 2462 paren += 1 2463 if self._curr.token_type == TokenType.R_PAREN: 2464 paren -= 1 2465 2466 end = self._prev 2467 self._advance() 2468 2469 if paren > 0: 2470 self.raise_error("Expecting )", self._curr) 2471 2472 pattern = exp.var(self._find_sql(start, end)) 2473 else: 2474 pattern = None 2475 2476 define = ( 2477 self._parse_csv(self._parse_name_as_expression) 2478 if self._match_text_seq("DEFINE") 2479 else None 2480 ) 2481 2482 self._match_r_paren() 2483 2484 return self.expression( 2485 exp.MatchRecognize, 2486 partition_by=partition, 2487 order=order, 2488 measures=measures, 2489 rows=rows, 2490 after=after, 2491 pattern=pattern, 2492 define=define, 2493 alias=self._parse_table_alias(), 2494 ) 2495 2496 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2497 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2498 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2499 cross_apply = False 2500 2501 if cross_apply is not None: 2502 this = self._parse_select(table=True) 2503 view = None 2504 outer = None 2505 elif self._match(TokenType.LATERAL): 2506 this = self._parse_select(table=True) 2507 view = self._match(TokenType.VIEW) 2508 outer = self._match(TokenType.OUTER) 2509 else: 2510 return None 2511 2512 if not this: 2513 this = ( 2514 self._parse_unnest() 2515 or self._parse_function() 2516 or self._parse_id_var(any_token=False) 2517 ) 2518 2519 while self._match(TokenType.DOT): 2520 this = exp.Dot( 2521 this=this, 2522 expression=self._parse_function() or self._parse_id_var(any_token=False), 2523 ) 2524 2525 if view: 2526 table = self._parse_id_var(any_token=False) 2527 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2528 table_alias: t.Optional[exp.TableAlias] = self.expression( 2529 exp.TableAlias, this=table, columns=columns 2530 ) 2531 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2532 # We move the alias from the lateral's child node to the lateral itself 2533 table_alias = this.args["alias"].pop() 2534 else: 2535 table_alias = self._parse_table_alias() 2536 2537 return self.expression( 2538 exp.Lateral, 2539 this=this, 2540 view=view, 2541 outer=outer, 2542 alias=table_alias, 2543 cross_apply=cross_apply, 2544 ) 2545 2546 def _parse_join_parts( 2547 self, 2548 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2549 return ( 2550 self._match_set(self.JOIN_METHODS) and self._prev, 2551 self._match_set(self.JOIN_SIDES) and self._prev, 2552 self._match_set(self.JOIN_KINDS) and self._prev, 2553 ) 2554 2555 def _parse_join( 2556 self, skip_join_token: bool = False, parse_bracket: bool = False 2557 ) -> t.Optional[exp.Join]: 2558 if self._match(TokenType.COMMA): 2559 return self.expression(exp.Join, this=self._parse_table()) 2560 2561 index = self._index 2562 method, side, kind = self._parse_join_parts() 2563 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2564 join = self._match(TokenType.JOIN) 2565 2566 if not skip_join_token and not join: 2567 self._retreat(index) 2568 kind = None 2569 method = None 2570 side = None 2571 2572 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2573 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2574 2575 if not skip_join_token and not join and not outer_apply and not cross_apply: 2576 return None 2577 2578 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2579 2580 if method: 2581 kwargs["method"] = method.text 2582 if side: 2583 kwargs["side"] = side.text 2584 if kind: 2585 kwargs["kind"] = kind.text 2586 if hint: 2587 kwargs["hint"] = hint 2588 2589 if self._match(TokenType.ON): 2590 kwargs["on"] = self._parse_conjunction() 2591 elif self._match(TokenType.USING): 2592 kwargs["using"] = self._parse_wrapped_id_vars() 2593 elif not (kind and kind.token_type == TokenType.CROSS): 2594 index = self._index 2595 join = self._parse_join() 2596 2597 if join and self._match(TokenType.ON): 2598 kwargs["on"] = self._parse_conjunction() 2599 elif join and self._match(TokenType.USING): 2600 kwargs["using"] = self._parse_wrapped_id_vars() 2601 else: 2602 join = None 2603 self._retreat(index) 2604 2605 kwargs["this"].set("joins", [join] if join else None) 2606 2607 comments = [c for token in (method, side, kind) if token for c in token.comments] 2608 return self.expression(exp.Join, comments=comments, **kwargs) 2609 2610 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2611 this = self._parse_conjunction() 2612 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2613 return this 2614 2615 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2616 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2617 2618 return this 2619 2620 def _parse_index( 2621 self, 2622 index: t.Optional[exp.Expression] = None, 2623 ) -> t.Optional[exp.Index]: 2624 if index: 2625 unique = None 2626 primary = None 2627 amp = None 2628 2629 self._match(TokenType.ON) 2630 self._match(TokenType.TABLE) # hive 2631 table = self._parse_table_parts(schema=True) 2632 else: 2633 unique = self._match(TokenType.UNIQUE) 2634 primary = self._match_text_seq("PRIMARY") 2635 amp = self._match_text_seq("AMP") 2636 2637 if not self._match(TokenType.INDEX): 2638 return None 2639 2640 index = self._parse_id_var() 2641 table = None 2642 2643 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2644 2645 if self._match(TokenType.L_PAREN, advance=False): 2646 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2647 else: 2648 columns = None 2649 2650 return self.expression( 2651 exp.Index, 2652 this=index, 2653 table=table, 2654 using=using, 2655 columns=columns, 2656 unique=unique, 2657 primary=primary, 2658 amp=amp, 2659 partition_by=self._parse_partition_by(), 2660 where=self._parse_where(), 2661 ) 2662 2663 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2664 hints: t.List[exp.Expression] = [] 2665 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2666 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2667 hints.append( 2668 self.expression( 2669 exp.WithTableHint, 2670 expressions=self._parse_csv( 2671 lambda: self._parse_function() or self._parse_var(any_token=True) 2672 ), 2673 ) 2674 ) 2675 self._match_r_paren() 2676 else: 2677 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2678 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2679 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2680 2681 self._match_texts(("INDEX", "KEY")) 2682 if self._match(TokenType.FOR): 2683 hint.set("target", self._advance_any() and self._prev.text.upper()) 2684 2685 hint.set("expressions", self._parse_wrapped_id_vars()) 2686 hints.append(hint) 2687 2688 return hints or None 2689 2690 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2691 return ( 2692 (not schema and self._parse_function(optional_parens=False)) 2693 or self._parse_id_var(any_token=False) 2694 or self._parse_string_as_identifier() 2695 or self._parse_placeholder() 2696 ) 2697 2698 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2699 catalog = None 2700 db = None 2701 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2702 2703 while self._match(TokenType.DOT): 2704 if catalog: 2705 # This allows nesting the table in arbitrarily many dot expressions if needed 2706 table = self.expression( 2707 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2708 ) 2709 else: 2710 catalog = db 2711 db = table 2712 table = self._parse_table_part(schema=schema) or "" 2713 2714 if not table: 2715 self.raise_error(f"Expected table name but got {self._curr}") 2716 2717 return self.expression( 2718 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2719 ) 2720 2721 def _parse_table( 2722 self, 2723 schema: bool = False, 2724 joins: bool = False, 2725 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2726 parse_bracket: bool = False, 2727 ) -> t.Optional[exp.Expression]: 2728 lateral = self._parse_lateral() 2729 if lateral: 2730 return lateral 2731 2732 unnest = self._parse_unnest() 2733 if unnest: 2734 return unnest 2735 2736 values = self._parse_derived_table_values() 2737 if values: 2738 return values 2739 2740 subquery = self._parse_select(table=True) 2741 if subquery: 2742 if not subquery.args.get("pivots"): 2743 subquery.set("pivots", self._parse_pivots()) 2744 return subquery 2745 2746 bracket = parse_bracket and self._parse_bracket(None) 2747 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2748 this = t.cast( 2749 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2750 ) 2751 2752 if schema: 2753 return self._parse_schema(this=this) 2754 2755 version = self._parse_version() 2756 2757 if version: 2758 this.set("version", version) 2759 2760 if self.dialect.ALIAS_POST_TABLESAMPLE: 2761 table_sample = self._parse_table_sample() 2762 2763 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2764 if alias: 2765 this.set("alias", alias) 2766 2767 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2768 return self.expression( 2769 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2770 ) 2771 2772 this.set("hints", self._parse_table_hints()) 2773 2774 if not this.args.get("pivots"): 2775 this.set("pivots", self._parse_pivots()) 2776 2777 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2778 table_sample = self._parse_table_sample() 2779 2780 if table_sample: 2781 table_sample.set("this", this) 2782 this = table_sample 2783 2784 if joins: 2785 for join in iter(self._parse_join, None): 2786 this.append("joins", join) 2787 2788 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2789 this.set("ordinality", True) 2790 this.set("alias", self._parse_table_alias()) 2791 2792 return this 2793 2794 def _parse_version(self) -> t.Optional[exp.Version]: 2795 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2796 this = "TIMESTAMP" 2797 elif self._match(TokenType.VERSION_SNAPSHOT): 2798 this = "VERSION" 2799 else: 2800 return None 2801 2802 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2803 kind = self._prev.text.upper() 2804 start = self._parse_bitwise() 2805 self._match_texts(("TO", "AND")) 2806 end = self._parse_bitwise() 2807 expression: t.Optional[exp.Expression] = self.expression( 2808 exp.Tuple, expressions=[start, end] 2809 ) 2810 elif self._match_text_seq("CONTAINED", "IN"): 2811 kind = "CONTAINED IN" 2812 expression = self.expression( 2813 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2814 ) 2815 elif self._match(TokenType.ALL): 2816 kind = "ALL" 2817 expression = None 2818 else: 2819 self._match_text_seq("AS", "OF") 2820 kind = "AS OF" 2821 expression = self._parse_type() 2822 2823 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2824 2825 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2826 if not self._match(TokenType.UNNEST): 2827 return None 2828 2829 expressions = self._parse_wrapped_csv(self._parse_equality) 2830 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2831 2832 alias = self._parse_table_alias() if with_alias else None 2833 2834 if alias: 2835 if self.dialect.UNNEST_COLUMN_ONLY: 2836 if alias.args.get("columns"): 2837 self.raise_error("Unexpected extra column alias in unnest.") 2838 2839 alias.set("columns", [alias.this]) 2840 alias.set("this", None) 2841 2842 columns = alias.args.get("columns") or [] 2843 if offset and len(expressions) < len(columns): 2844 offset = columns.pop() 2845 2846 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2847 self._match(TokenType.ALIAS) 2848 offset = self._parse_id_var( 2849 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2850 ) or exp.to_identifier("offset") 2851 2852 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2853 2854 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2855 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2856 if not is_derived and not self._match(TokenType.VALUES): 2857 return None 2858 2859 expressions = self._parse_csv(self._parse_value) 2860 alias = self._parse_table_alias() 2861 2862 if is_derived: 2863 self._match_r_paren() 2864 2865 return self.expression( 2866 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2867 ) 2868 2869 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2870 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2871 as_modifier and self._match_text_seq("USING", "SAMPLE") 2872 ): 2873 return None 2874 2875 bucket_numerator = None 2876 bucket_denominator = None 2877 bucket_field = None 2878 percent = None 2879 size = None 2880 seed = None 2881 2882 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2883 matched_l_paren = self._match(TokenType.L_PAREN) 2884 2885 if self.TABLESAMPLE_CSV: 2886 num = None 2887 expressions = self._parse_csv(self._parse_primary) 2888 else: 2889 expressions = None 2890 num = ( 2891 self._parse_factor() 2892 if self._match(TokenType.NUMBER, advance=False) 2893 else self._parse_primary() or self._parse_placeholder() 2894 ) 2895 2896 if self._match_text_seq("BUCKET"): 2897 bucket_numerator = self._parse_number() 2898 self._match_text_seq("OUT", "OF") 2899 bucket_denominator = bucket_denominator = self._parse_number() 2900 self._match(TokenType.ON) 2901 bucket_field = self._parse_field() 2902 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2903 percent = num 2904 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2905 size = num 2906 else: 2907 percent = num 2908 2909 if matched_l_paren: 2910 self._match_r_paren() 2911 2912 if self._match(TokenType.L_PAREN): 2913 method = self._parse_var(upper=True) 2914 seed = self._match(TokenType.COMMA) and self._parse_number() 2915 self._match_r_paren() 2916 elif self._match_texts(("SEED", "REPEATABLE")): 2917 seed = self._parse_wrapped(self._parse_number) 2918 2919 return self.expression( 2920 exp.TableSample, 2921 expressions=expressions, 2922 method=method, 2923 bucket_numerator=bucket_numerator, 2924 bucket_denominator=bucket_denominator, 2925 bucket_field=bucket_field, 2926 percent=percent, 2927 size=size, 2928 seed=seed, 2929 ) 2930 2931 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2932 return list(iter(self._parse_pivot, None)) or None 2933 2934 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2935 return list(iter(self._parse_join, None)) or None 2936 2937 # https://duckdb.org/docs/sql/statements/pivot 2938 def _parse_simplified_pivot(self) -> exp.Pivot: 2939 def _parse_on() -> t.Optional[exp.Expression]: 2940 this = self._parse_bitwise() 2941 return self._parse_in(this) if self._match(TokenType.IN) else this 2942 2943 this = self._parse_table() 2944 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2945 using = self._match(TokenType.USING) and self._parse_csv( 2946 lambda: self._parse_alias(self._parse_function()) 2947 ) 2948 group = self._parse_group() 2949 return self.expression( 2950 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2951 ) 2952 2953 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2954 index = self._index 2955 include_nulls = None 2956 2957 if self._match(TokenType.PIVOT): 2958 unpivot = False 2959 elif self._match(TokenType.UNPIVOT): 2960 unpivot = True 2961 2962 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2963 if self._match_text_seq("INCLUDE", "NULLS"): 2964 include_nulls = True 2965 elif self._match_text_seq("EXCLUDE", "NULLS"): 2966 include_nulls = False 2967 else: 2968 return None 2969 2970 expressions = [] 2971 field = None 2972 2973 if not self._match(TokenType.L_PAREN): 2974 self._retreat(index) 2975 return None 2976 2977 if unpivot: 2978 expressions = self._parse_csv(self._parse_column) 2979 else: 2980 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2981 2982 if not expressions: 2983 self.raise_error("Failed to parse PIVOT's aggregation list") 2984 2985 if not self._match(TokenType.FOR): 2986 self.raise_error("Expecting FOR") 2987 2988 value = self._parse_column() 2989 2990 if not self._match(TokenType.IN): 2991 self.raise_error("Expecting IN") 2992 2993 field = self._parse_in(value, alias=True) 2994 2995 self._match_r_paren() 2996 2997 pivot = self.expression( 2998 exp.Pivot, 2999 expressions=expressions, 3000 field=field, 3001 unpivot=unpivot, 3002 include_nulls=include_nulls, 3003 ) 3004 3005 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3006 pivot.set("alias", self._parse_table_alias()) 3007 3008 if not unpivot: 3009 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3010 3011 columns: t.List[exp.Expression] = [] 3012 for fld in pivot.args["field"].expressions: 3013 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3014 for name in names: 3015 if self.PREFIXED_PIVOT_COLUMNS: 3016 name = f"{name}_{field_name}" if name else field_name 3017 else: 3018 name = f"{field_name}_{name}" if name else field_name 3019 3020 columns.append(exp.to_identifier(name)) 3021 3022 pivot.set("columns", columns) 3023 3024 return pivot 3025 3026 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3027 return [agg.alias for agg in aggregations] 3028 3029 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3030 if not skip_where_token and not self._match(TokenType.WHERE): 3031 return None 3032 3033 return self.expression( 3034 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3035 ) 3036 3037 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3038 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3039 return None 3040 3041 elements = defaultdict(list) 3042 3043 if self._match(TokenType.ALL): 3044 return self.expression(exp.Group, all=True) 3045 3046 while True: 3047 expressions = self._parse_csv(self._parse_conjunction) 3048 if expressions: 3049 elements["expressions"].extend(expressions) 3050 3051 grouping_sets = self._parse_grouping_sets() 3052 if grouping_sets: 3053 elements["grouping_sets"].extend(grouping_sets) 3054 3055 rollup = None 3056 cube = None 3057 totals = None 3058 3059 index = self._index 3060 with_ = self._match(TokenType.WITH) 3061 if self._match(TokenType.ROLLUP): 3062 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3063 elements["rollup"].extend(ensure_list(rollup)) 3064 3065 if self._match(TokenType.CUBE): 3066 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3067 elements["cube"].extend(ensure_list(cube)) 3068 3069 if self._match_text_seq("TOTALS"): 3070 totals = True 3071 elements["totals"] = True # type: ignore 3072 3073 if not (grouping_sets or rollup or cube or totals): 3074 if with_: 3075 self._retreat(index) 3076 break 3077 3078 return self.expression(exp.Group, **elements) # type: ignore 3079 3080 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3081 if not self._match(TokenType.GROUPING_SETS): 3082 return None 3083 3084 return self._parse_wrapped_csv(self._parse_grouping_set) 3085 3086 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3087 if self._match(TokenType.L_PAREN): 3088 grouping_set = self._parse_csv(self._parse_column) 3089 self._match_r_paren() 3090 return self.expression(exp.Tuple, expressions=grouping_set) 3091 3092 return self._parse_column() 3093 3094 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3095 if not skip_having_token and not self._match(TokenType.HAVING): 3096 return None 3097 return self.expression(exp.Having, this=self._parse_conjunction()) 3098 3099 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3100 if not self._match(TokenType.QUALIFY): 3101 return None 3102 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3103 3104 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3105 if skip_start_token: 3106 start = None 3107 elif self._match(TokenType.START_WITH): 3108 start = self._parse_conjunction() 3109 else: 3110 return None 3111 3112 self._match(TokenType.CONNECT_BY) 3113 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3114 exp.Prior, this=self._parse_bitwise() 3115 ) 3116 connect = self._parse_conjunction() 3117 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3118 3119 if not start and self._match(TokenType.START_WITH): 3120 start = self._parse_conjunction() 3121 3122 return self.expression(exp.Connect, start=start, connect=connect) 3123 3124 def _parse_name_as_expression(self) -> exp.Alias: 3125 return self.expression( 3126 exp.Alias, 3127 alias=self._parse_id_var(any_token=True), 3128 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3129 ) 3130 3131 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3132 if self._match_text_seq("INTERPOLATE"): 3133 return self._parse_wrapped_csv(self._parse_name_as_expression) 3134 return None 3135 3136 def _parse_order( 3137 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3138 ) -> t.Optional[exp.Expression]: 3139 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3140 return this 3141 3142 return self.expression( 3143 exp.Order, 3144 this=this, 3145 expressions=self._parse_csv(self._parse_ordered), 3146 interpolate=self._parse_interpolate(), 3147 ) 3148 3149 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3150 if not self._match(token): 3151 return None 3152 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3153 3154 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3155 this = parse_method() if parse_method else self._parse_conjunction() 3156 3157 asc = self._match(TokenType.ASC) 3158 desc = self._match(TokenType.DESC) or (asc and False) 3159 3160 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3161 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3162 3163 nulls_first = is_nulls_first or False 3164 explicitly_null_ordered = is_nulls_first or is_nulls_last 3165 3166 if ( 3167 not explicitly_null_ordered 3168 and ( 3169 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3170 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3171 ) 3172 and self.dialect.NULL_ORDERING != "nulls_are_last" 3173 ): 3174 nulls_first = True 3175 3176 if self._match_text_seq("WITH", "FILL"): 3177 with_fill = self.expression( 3178 exp.WithFill, 3179 **{ # type: ignore 3180 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3181 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3182 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3183 }, 3184 ) 3185 else: 3186 with_fill = None 3187 3188 return self.expression( 3189 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3190 ) 3191 3192 def _parse_limit( 3193 self, this: t.Optional[exp.Expression] = None, top: bool = False 3194 ) -> t.Optional[exp.Expression]: 3195 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3196 comments = self._prev_comments 3197 if top: 3198 limit_paren = self._match(TokenType.L_PAREN) 3199 expression = self._parse_term() if limit_paren else self._parse_number() 3200 3201 if limit_paren: 3202 self._match_r_paren() 3203 else: 3204 expression = self._parse_term() 3205 3206 if self._match(TokenType.COMMA): 3207 offset = expression 3208 expression = self._parse_term() 3209 else: 3210 offset = None 3211 3212 limit_exp = self.expression( 3213 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3214 ) 3215 3216 return limit_exp 3217 3218 if self._match(TokenType.FETCH): 3219 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3220 direction = self._prev.text.upper() if direction else "FIRST" 3221 3222 count = self._parse_field(tokens=self.FETCH_TOKENS) 3223 percent = self._match(TokenType.PERCENT) 3224 3225 self._match_set((TokenType.ROW, TokenType.ROWS)) 3226 3227 only = self._match_text_seq("ONLY") 3228 with_ties = self._match_text_seq("WITH", "TIES") 3229 3230 if only and with_ties: 3231 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3232 3233 return self.expression( 3234 exp.Fetch, 3235 direction=direction, 3236 count=count, 3237 percent=percent, 3238 with_ties=with_ties, 3239 ) 3240 3241 return this 3242 3243 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3244 if not self._match(TokenType.OFFSET): 3245 return this 3246 3247 count = self._parse_term() 3248 self._match_set((TokenType.ROW, TokenType.ROWS)) 3249 return self.expression(exp.Offset, this=this, expression=count) 3250 3251 def _parse_locks(self) -> t.List[exp.Lock]: 3252 locks = [] 3253 while True: 3254 if self._match_text_seq("FOR", "UPDATE"): 3255 update = True 3256 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3257 "LOCK", "IN", "SHARE", "MODE" 3258 ): 3259 update = False 3260 else: 3261 break 3262 3263 expressions = None 3264 if self._match_text_seq("OF"): 3265 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3266 3267 wait: t.Optional[bool | exp.Expression] = None 3268 if self._match_text_seq("NOWAIT"): 3269 wait = True 3270 elif self._match_text_seq("WAIT"): 3271 wait = self._parse_primary() 3272 elif self._match_text_seq("SKIP", "LOCKED"): 3273 wait = False 3274 3275 locks.append( 3276 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3277 ) 3278 3279 return locks 3280 3281 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3282 while this and self._match_set(self.SET_OPERATIONS): 3283 token_type = self._prev.token_type 3284 3285 if token_type == TokenType.UNION: 3286 operation = exp.Union 3287 elif token_type == TokenType.EXCEPT: 3288 operation = exp.Except 3289 else: 3290 operation = exp.Intersect 3291 3292 comments = self._prev.comments 3293 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3294 by_name = self._match_text_seq("BY", "NAME") 3295 expression = self._parse_select(nested=True, parse_set_operation=False) 3296 3297 this = self.expression( 3298 operation, 3299 comments=comments, 3300 this=this, 3301 distinct=distinct, 3302 by_name=by_name, 3303 expression=expression, 3304 ) 3305 3306 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3307 expression = this.expression 3308 3309 if expression: 3310 for arg in self.UNION_MODIFIERS: 3311 expr = expression.args.get(arg) 3312 if expr: 3313 this.set(arg, expr.pop()) 3314 3315 return this 3316 3317 def _parse_expression(self) -> t.Optional[exp.Expression]: 3318 return self._parse_alias(self._parse_conjunction()) 3319 3320 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3321 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3322 3323 def _parse_equality(self) -> t.Optional[exp.Expression]: 3324 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3325 3326 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3327 return self._parse_tokens(self._parse_range, self.COMPARISON) 3328 3329 def _parse_range(self) -> t.Optional[exp.Expression]: 3330 this = self._parse_bitwise() 3331 negate = self._match(TokenType.NOT) 3332 3333 if self._match_set(self.RANGE_PARSERS): 3334 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3335 if not expression: 3336 return this 3337 3338 this = expression 3339 elif self._match(TokenType.ISNULL): 3340 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3341 3342 # Postgres supports ISNULL and NOTNULL for conditions. 3343 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3344 if self._match(TokenType.NOTNULL): 3345 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3346 this = self.expression(exp.Not, this=this) 3347 3348 if negate: 3349 this = self.expression(exp.Not, this=this) 3350 3351 if self._match(TokenType.IS): 3352 this = self._parse_is(this) 3353 3354 return this 3355 3356 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3357 index = self._index - 1 3358 negate = self._match(TokenType.NOT) 3359 3360 if self._match_text_seq("DISTINCT", "FROM"): 3361 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3362 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3363 3364 expression = self._parse_null() or self._parse_boolean() 3365 if not expression: 3366 self._retreat(index) 3367 return None 3368 3369 this = self.expression(exp.Is, this=this, expression=expression) 3370 return self.expression(exp.Not, this=this) if negate else this 3371 3372 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3373 unnest = self._parse_unnest(with_alias=False) 3374 if unnest: 3375 this = self.expression(exp.In, this=this, unnest=unnest) 3376 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3377 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3378 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3379 3380 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3381 this = self.expression(exp.In, this=this, query=expressions[0]) 3382 else: 3383 this = self.expression(exp.In, this=this, expressions=expressions) 3384 3385 if matched_l_paren: 3386 self._match_r_paren(this) 3387 elif not self._match(TokenType.R_BRACKET, expression=this): 3388 self.raise_error("Expecting ]") 3389 else: 3390 this = self.expression(exp.In, this=this, field=self._parse_field()) 3391 3392 return this 3393 3394 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3395 low = self._parse_bitwise() 3396 self._match(TokenType.AND) 3397 high = self._parse_bitwise() 3398 return self.expression(exp.Between, this=this, low=low, high=high) 3399 3400 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3401 if not self._match(TokenType.ESCAPE): 3402 return this 3403 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3404 3405 def _parse_interval(self) -> t.Optional[exp.Interval]: 3406 index = self._index 3407 3408 if not self._match(TokenType.INTERVAL): 3409 return None 3410 3411 if self._match(TokenType.STRING, advance=False): 3412 this = self._parse_primary() 3413 else: 3414 this = self._parse_term() 3415 3416 if not this or ( 3417 isinstance(this, exp.Column) 3418 and not this.table 3419 and not this.this.quoted 3420 and this.name.upper() == "IS" 3421 ): 3422 self._retreat(index) 3423 return None 3424 3425 unit = self._parse_function() or self._parse_var(any_token=True, upper=True) 3426 3427 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3428 # each INTERVAL expression into this canonical form so it's easy to transpile 3429 if this and this.is_number: 3430 this = exp.Literal.string(this.name) 3431 elif this and this.is_string: 3432 parts = this.name.split() 3433 3434 if len(parts) == 2: 3435 if unit: 3436 # This is not actually a unit, it's something else (e.g. a "window side") 3437 unit = None 3438 self._retreat(self._index - 1) 3439 3440 this = exp.Literal.string(parts[0]) 3441 unit = self.expression(exp.Var, this=parts[1].upper()) 3442 3443 return self.expression(exp.Interval, this=this, unit=unit) 3444 3445 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3446 this = self._parse_term() 3447 3448 while True: 3449 if self._match_set(self.BITWISE): 3450 this = self.expression( 3451 self.BITWISE[self._prev.token_type], 3452 this=this, 3453 expression=self._parse_term(), 3454 ) 3455 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3456 this = self.expression( 3457 exp.DPipe, 3458 this=this, 3459 expression=self._parse_term(), 3460 safe=not self.dialect.STRICT_STRING_CONCAT, 3461 ) 3462 elif self._match(TokenType.DQMARK): 3463 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3464 elif self._match_pair(TokenType.LT, TokenType.LT): 3465 this = self.expression( 3466 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3467 ) 3468 elif self._match_pair(TokenType.GT, TokenType.GT): 3469 this = self.expression( 3470 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3471 ) 3472 else: 3473 break 3474 3475 return this 3476 3477 def _parse_term(self) -> t.Optional[exp.Expression]: 3478 return self._parse_tokens(self._parse_factor, self.TERM) 3479 3480 def _parse_factor(self) -> t.Optional[exp.Expression]: 3481 if self.EXPONENT: 3482 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3483 else: 3484 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3485 if isinstance(factor, exp.Div): 3486 factor.args["typed"] = self.dialect.TYPED_DIVISION 3487 factor.args["safe"] = self.dialect.SAFE_DIVISION 3488 return factor 3489 3490 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3491 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3492 3493 def _parse_unary(self) -> t.Optional[exp.Expression]: 3494 if self._match_set(self.UNARY_PARSERS): 3495 return self.UNARY_PARSERS[self._prev.token_type](self) 3496 return self._parse_at_time_zone(self._parse_type()) 3497 3498 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3499 interval = parse_interval and self._parse_interval() 3500 if interval: 3501 return interval 3502 3503 index = self._index 3504 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3505 this = self._parse_column() 3506 3507 if data_type: 3508 if isinstance(this, exp.Literal): 3509 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3510 if parser: 3511 return parser(self, this, data_type) 3512 return self.expression(exp.Cast, this=this, to=data_type) 3513 if not data_type.expressions: 3514 self._retreat(index) 3515 return self._parse_column() 3516 return self._parse_column_ops(data_type) 3517 3518 return this and self._parse_column_ops(this) 3519 3520 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3521 this = self._parse_type() 3522 if not this: 3523 return None 3524 3525 return self.expression( 3526 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3527 ) 3528 3529 def _parse_types( 3530 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3531 ) -> t.Optional[exp.Expression]: 3532 index = self._index 3533 3534 prefix = self._match_text_seq("SYSUDTLIB", ".") 3535 3536 if not self._match_set(self.TYPE_TOKENS): 3537 identifier = allow_identifiers and self._parse_id_var( 3538 any_token=False, tokens=(TokenType.VAR,) 3539 ) 3540 3541 if identifier: 3542 tokens = self.dialect.tokenize(identifier.name) 3543 3544 if len(tokens) != 1: 3545 self.raise_error("Unexpected identifier", self._prev) 3546 3547 if tokens[0].token_type in self.TYPE_TOKENS: 3548 self._prev = tokens[0] 3549 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3550 type_name = identifier.name 3551 3552 while self._match(TokenType.DOT): 3553 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3554 3555 return exp.DataType.build(type_name, udt=True) 3556 else: 3557 return None 3558 else: 3559 return None 3560 3561 type_token = self._prev.token_type 3562 3563 if type_token == TokenType.PSEUDO_TYPE: 3564 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3565 3566 if type_token == TokenType.OBJECT_IDENTIFIER: 3567 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3568 3569 nested = type_token in self.NESTED_TYPE_TOKENS 3570 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3571 expressions = None 3572 maybe_func = False 3573 3574 if self._match(TokenType.L_PAREN): 3575 if is_struct: 3576 expressions = self._parse_csv(self._parse_struct_types) 3577 elif nested: 3578 expressions = self._parse_csv( 3579 lambda: self._parse_types( 3580 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3581 ) 3582 ) 3583 elif type_token in self.ENUM_TYPE_TOKENS: 3584 expressions = self._parse_csv(self._parse_equality) 3585 else: 3586 expressions = self._parse_csv(self._parse_type_size) 3587 3588 if not expressions or not self._match(TokenType.R_PAREN): 3589 self._retreat(index) 3590 return None 3591 3592 maybe_func = True 3593 3594 this: t.Optional[exp.Expression] = None 3595 values: t.Optional[t.List[exp.Expression]] = None 3596 3597 if nested and self._match(TokenType.LT): 3598 if is_struct: 3599 expressions = self._parse_csv(self._parse_struct_types) 3600 else: 3601 expressions = self._parse_csv( 3602 lambda: self._parse_types( 3603 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3604 ) 3605 ) 3606 3607 if not self._match(TokenType.GT): 3608 self.raise_error("Expecting >") 3609 3610 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3611 values = self._parse_csv(self._parse_conjunction) 3612 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3613 3614 if type_token in self.TIMESTAMPS: 3615 if self._match_text_seq("WITH", "TIME", "ZONE"): 3616 maybe_func = False 3617 tz_type = ( 3618 exp.DataType.Type.TIMETZ 3619 if type_token in self.TIMES 3620 else exp.DataType.Type.TIMESTAMPTZ 3621 ) 3622 this = exp.DataType(this=tz_type, expressions=expressions) 3623 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3624 maybe_func = False 3625 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3626 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3627 maybe_func = False 3628 elif type_token == TokenType.INTERVAL: 3629 unit = self._parse_var() 3630 3631 if self._match_text_seq("TO"): 3632 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3633 else: 3634 span = None 3635 3636 if span or not unit: 3637 this = self.expression( 3638 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3639 ) 3640 else: 3641 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3642 3643 if maybe_func and check_func: 3644 index2 = self._index 3645 peek = self._parse_string() 3646 3647 if not peek: 3648 self._retreat(index) 3649 return None 3650 3651 self._retreat(index2) 3652 3653 if not this: 3654 if self._match_text_seq("UNSIGNED"): 3655 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3656 if not unsigned_type_token: 3657 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3658 3659 type_token = unsigned_type_token or type_token 3660 3661 this = exp.DataType( 3662 this=exp.DataType.Type[type_token.value], 3663 expressions=expressions, 3664 nested=nested, 3665 values=values, 3666 prefix=prefix, 3667 ) 3668 3669 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3670 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3671 3672 return this 3673 3674 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3675 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3676 self._match(TokenType.COLON) 3677 return self._parse_column_def(this) 3678 3679 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3680 if not self._match_text_seq("AT", "TIME", "ZONE"): 3681 return this 3682 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3683 3684 def _parse_column(self) -> t.Optional[exp.Expression]: 3685 this = self._parse_field() 3686 if isinstance(this, exp.Identifier): 3687 this = self.expression(exp.Column, this=this) 3688 elif not this: 3689 return self._parse_bracket(this) 3690 return self._parse_column_ops(this) 3691 3692 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3693 this = self._parse_bracket(this) 3694 3695 while self._match_set(self.COLUMN_OPERATORS): 3696 op_token = self._prev.token_type 3697 op = self.COLUMN_OPERATORS.get(op_token) 3698 3699 if op_token == TokenType.DCOLON: 3700 field = self._parse_types() 3701 if not field: 3702 self.raise_error("Expected type") 3703 elif op and self._curr: 3704 self._advance() 3705 value = self._prev.text 3706 field = ( 3707 exp.Literal.number(value) 3708 if self._prev.token_type == TokenType.NUMBER 3709 else exp.Literal.string(value) 3710 ) 3711 else: 3712 field = self._parse_field(anonymous_func=True, any_token=True) 3713 3714 if isinstance(field, exp.Func): 3715 # bigquery allows function calls like x.y.count(...) 3716 # SAFE.SUBSTR(...) 3717 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3718 this = self._replace_columns_with_dots(this) 3719 3720 if op: 3721 this = op(self, this, field) 3722 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3723 this = self.expression( 3724 exp.Column, 3725 this=field, 3726 table=this.this, 3727 db=this.args.get("table"), 3728 catalog=this.args.get("db"), 3729 ) 3730 else: 3731 this = self.expression(exp.Dot, this=this, expression=field) 3732 this = self._parse_bracket(this) 3733 return this 3734 3735 def _parse_primary(self) -> t.Optional[exp.Expression]: 3736 if self._match_set(self.PRIMARY_PARSERS): 3737 token_type = self._prev.token_type 3738 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3739 3740 if token_type == TokenType.STRING: 3741 expressions = [primary] 3742 while self._match(TokenType.STRING): 3743 expressions.append(exp.Literal.string(self._prev.text)) 3744 3745 if len(expressions) > 1: 3746 return self.expression(exp.Concat, expressions=expressions) 3747 3748 return primary 3749 3750 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3751 return exp.Literal.number(f"0.{self._prev.text}") 3752 3753 if self._match(TokenType.L_PAREN): 3754 comments = self._prev_comments 3755 query = self._parse_select() 3756 3757 if query: 3758 expressions = [query] 3759 else: 3760 expressions = self._parse_expressions() 3761 3762 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3763 3764 if isinstance(this, exp.Subqueryable): 3765 this = self._parse_set_operations( 3766 self._parse_subquery(this=this, parse_alias=False) 3767 ) 3768 elif len(expressions) > 1: 3769 this = self.expression(exp.Tuple, expressions=expressions) 3770 else: 3771 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3772 3773 if this: 3774 this.add_comments(comments) 3775 3776 self._match_r_paren(expression=this) 3777 return this 3778 3779 return None 3780 3781 def _parse_field( 3782 self, 3783 any_token: bool = False, 3784 tokens: t.Optional[t.Collection[TokenType]] = None, 3785 anonymous_func: bool = False, 3786 ) -> t.Optional[exp.Expression]: 3787 return ( 3788 self._parse_primary() 3789 or self._parse_function(anonymous=anonymous_func) 3790 or self._parse_id_var(any_token=any_token, tokens=tokens) 3791 ) 3792 3793 def _parse_function( 3794 self, 3795 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3796 anonymous: bool = False, 3797 optional_parens: bool = True, 3798 ) -> t.Optional[exp.Expression]: 3799 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3800 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3801 fn_syntax = False 3802 if ( 3803 self._match(TokenType.L_BRACE, advance=False) 3804 and self._next 3805 and self._next.text.upper() == "FN" 3806 ): 3807 self._advance(2) 3808 fn_syntax = True 3809 3810 func = self._parse_function_call( 3811 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3812 ) 3813 3814 if fn_syntax: 3815 self._match(TokenType.R_BRACE) 3816 3817 return func 3818 3819 def _parse_function_call( 3820 self, 3821 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3822 anonymous: bool = False, 3823 optional_parens: bool = True, 3824 ) -> t.Optional[exp.Expression]: 3825 if not self._curr: 3826 return None 3827 3828 comments = self._curr.comments 3829 token_type = self._curr.token_type 3830 this = self._curr.text 3831 upper = this.upper() 3832 3833 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3834 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3835 self._advance() 3836 return parser(self) 3837 3838 if not self._next or self._next.token_type != TokenType.L_PAREN: 3839 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3840 self._advance() 3841 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3842 3843 return None 3844 3845 if token_type not in self.FUNC_TOKENS: 3846 return None 3847 3848 self._advance(2) 3849 3850 parser = self.FUNCTION_PARSERS.get(upper) 3851 if parser and not anonymous: 3852 this = parser(self) 3853 else: 3854 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3855 3856 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3857 this = self.expression(subquery_predicate, this=self._parse_select()) 3858 self._match_r_paren() 3859 return this 3860 3861 if functions is None: 3862 functions = self.FUNCTIONS 3863 3864 function = functions.get(upper) 3865 3866 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3867 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3868 3869 if function and not anonymous: 3870 if "dialect" in function.__code__.co_varnames: 3871 func = function(args, dialect=self.dialect) 3872 else: 3873 func = function(args) 3874 3875 func = self.validate_expression(func, args) 3876 if not self.dialect.NORMALIZE_FUNCTIONS: 3877 func.meta["name"] = this 3878 3879 this = func 3880 else: 3881 this = self.expression(exp.Anonymous, this=this, expressions=args) 3882 3883 if isinstance(this, exp.Expression): 3884 this.add_comments(comments) 3885 3886 self._match_r_paren(this) 3887 return self._parse_window(this) 3888 3889 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3890 return self._parse_column_def(self._parse_id_var()) 3891 3892 def _parse_user_defined_function( 3893 self, kind: t.Optional[TokenType] = None 3894 ) -> t.Optional[exp.Expression]: 3895 this = self._parse_id_var() 3896 3897 while self._match(TokenType.DOT): 3898 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3899 3900 if not self._match(TokenType.L_PAREN): 3901 return this 3902 3903 expressions = self._parse_csv(self._parse_function_parameter) 3904 self._match_r_paren() 3905 return self.expression( 3906 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3907 ) 3908 3909 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3910 literal = self._parse_primary() 3911 if literal: 3912 return self.expression(exp.Introducer, this=token.text, expression=literal) 3913 3914 return self.expression(exp.Identifier, this=token.text) 3915 3916 def _parse_session_parameter(self) -> exp.SessionParameter: 3917 kind = None 3918 this = self._parse_id_var() or self._parse_primary() 3919 3920 if this and self._match(TokenType.DOT): 3921 kind = this.name 3922 this = self._parse_var() or self._parse_primary() 3923 3924 return self.expression(exp.SessionParameter, this=this, kind=kind) 3925 3926 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3927 index = self._index 3928 3929 if self._match(TokenType.L_PAREN): 3930 expressions = t.cast( 3931 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3932 ) 3933 3934 if not self._match(TokenType.R_PAREN): 3935 self._retreat(index) 3936 else: 3937 expressions = [self._parse_id_var()] 3938 3939 if self._match_set(self.LAMBDAS): 3940 return self.LAMBDAS[self._prev.token_type](self, expressions) 3941 3942 self._retreat(index) 3943 3944 this: t.Optional[exp.Expression] 3945 3946 if self._match(TokenType.DISTINCT): 3947 this = self.expression( 3948 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3949 ) 3950 else: 3951 this = self._parse_select_or_expression(alias=alias) 3952 3953 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3954 3955 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3956 index = self._index 3957 3958 if not self.errors: 3959 try: 3960 if self._parse_select(nested=True): 3961 return this 3962 except ParseError: 3963 pass 3964 finally: 3965 self.errors.clear() 3966 self._retreat(index) 3967 3968 if not self._match(TokenType.L_PAREN): 3969 return this 3970 3971 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3972 3973 self._match_r_paren() 3974 return self.expression(exp.Schema, this=this, expressions=args) 3975 3976 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3977 return self._parse_column_def(self._parse_field(any_token=True)) 3978 3979 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3980 # column defs are not really columns, they're identifiers 3981 if isinstance(this, exp.Column): 3982 this = this.this 3983 3984 kind = self._parse_types(schema=True) 3985 3986 if self._match_text_seq("FOR", "ORDINALITY"): 3987 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3988 3989 constraints: t.List[exp.Expression] = [] 3990 3991 if not kind and self._match(TokenType.ALIAS): 3992 constraints.append( 3993 self.expression( 3994 exp.ComputedColumnConstraint, 3995 this=self._parse_conjunction(), 3996 persisted=self._match_text_seq("PERSISTED"), 3997 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3998 ) 3999 ) 4000 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4001 self._match(TokenType.ALIAS) 4002 constraints.append( 4003 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4004 ) 4005 4006 while True: 4007 constraint = self._parse_column_constraint() 4008 if not constraint: 4009 break 4010 constraints.append(constraint) 4011 4012 if not kind and not constraints: 4013 return this 4014 4015 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4016 4017 def _parse_auto_increment( 4018 self, 4019 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4020 start = None 4021 increment = None 4022 4023 if self._match(TokenType.L_PAREN, advance=False): 4024 args = self._parse_wrapped_csv(self._parse_bitwise) 4025 start = seq_get(args, 0) 4026 increment = seq_get(args, 1) 4027 elif self._match_text_seq("START"): 4028 start = self._parse_bitwise() 4029 self._match_text_seq("INCREMENT") 4030 increment = self._parse_bitwise() 4031 4032 if start and increment: 4033 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4034 4035 return exp.AutoIncrementColumnConstraint() 4036 4037 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4038 if not self._match_text_seq("REFRESH"): 4039 self._retreat(self._index - 1) 4040 return None 4041 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4042 4043 def _parse_compress(self) -> exp.CompressColumnConstraint: 4044 if self._match(TokenType.L_PAREN, advance=False): 4045 return self.expression( 4046 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4047 ) 4048 4049 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4050 4051 def _parse_generated_as_identity( 4052 self, 4053 ) -> ( 4054 exp.GeneratedAsIdentityColumnConstraint 4055 | exp.ComputedColumnConstraint 4056 | exp.GeneratedAsRowColumnConstraint 4057 ): 4058 if self._match_text_seq("BY", "DEFAULT"): 4059 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4060 this = self.expression( 4061 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4062 ) 4063 else: 4064 self._match_text_seq("ALWAYS") 4065 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4066 4067 self._match(TokenType.ALIAS) 4068 4069 if self._match_text_seq("ROW"): 4070 start = self._match_text_seq("START") 4071 if not start: 4072 self._match(TokenType.END) 4073 hidden = self._match_text_seq("HIDDEN") 4074 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4075 4076 identity = self._match_text_seq("IDENTITY") 4077 4078 if self._match(TokenType.L_PAREN): 4079 if self._match(TokenType.START_WITH): 4080 this.set("start", self._parse_bitwise()) 4081 if self._match_text_seq("INCREMENT", "BY"): 4082 this.set("increment", self._parse_bitwise()) 4083 if self._match_text_seq("MINVALUE"): 4084 this.set("minvalue", self._parse_bitwise()) 4085 if self._match_text_seq("MAXVALUE"): 4086 this.set("maxvalue", self._parse_bitwise()) 4087 4088 if self._match_text_seq("CYCLE"): 4089 this.set("cycle", True) 4090 elif self._match_text_seq("NO", "CYCLE"): 4091 this.set("cycle", False) 4092 4093 if not identity: 4094 this.set("expression", self._parse_bitwise()) 4095 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4096 args = self._parse_csv(self._parse_bitwise) 4097 this.set("start", seq_get(args, 0)) 4098 this.set("increment", seq_get(args, 1)) 4099 4100 self._match_r_paren() 4101 4102 return this 4103 4104 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4105 self._match_text_seq("LENGTH") 4106 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4107 4108 def _parse_not_constraint( 4109 self, 4110 ) -> t.Optional[exp.Expression]: 4111 if self._match_text_seq("NULL"): 4112 return self.expression(exp.NotNullColumnConstraint) 4113 if self._match_text_seq("CASESPECIFIC"): 4114 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4115 if self._match_text_seq("FOR", "REPLICATION"): 4116 return self.expression(exp.NotForReplicationColumnConstraint) 4117 return None 4118 4119 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4120 if self._match(TokenType.CONSTRAINT): 4121 this = self._parse_id_var() 4122 else: 4123 this = None 4124 4125 if self._match_texts(self.CONSTRAINT_PARSERS): 4126 return self.expression( 4127 exp.ColumnConstraint, 4128 this=this, 4129 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4130 ) 4131 4132 return this 4133 4134 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4135 if not self._match(TokenType.CONSTRAINT): 4136 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4137 4138 this = self._parse_id_var() 4139 expressions = [] 4140 4141 while True: 4142 constraint = self._parse_unnamed_constraint() or self._parse_function() 4143 if not constraint: 4144 break 4145 expressions.append(constraint) 4146 4147 return self.expression(exp.Constraint, this=this, expressions=expressions) 4148 4149 def _parse_unnamed_constraint( 4150 self, constraints: t.Optional[t.Collection[str]] = None 4151 ) -> t.Optional[exp.Expression]: 4152 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4153 constraints or self.CONSTRAINT_PARSERS 4154 ): 4155 return None 4156 4157 constraint = self._prev.text.upper() 4158 if constraint not in self.CONSTRAINT_PARSERS: 4159 self.raise_error(f"No parser found for schema constraint {constraint}.") 4160 4161 return self.CONSTRAINT_PARSERS[constraint](self) 4162 4163 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4164 self._match_text_seq("KEY") 4165 return self.expression( 4166 exp.UniqueColumnConstraint, 4167 this=self._parse_schema(self._parse_id_var(any_token=False)), 4168 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4169 ) 4170 4171 def _parse_key_constraint_options(self) -> t.List[str]: 4172 options = [] 4173 while True: 4174 if not self._curr: 4175 break 4176 4177 if self._match(TokenType.ON): 4178 action = None 4179 on = self._advance_any() and self._prev.text 4180 4181 if self._match_text_seq("NO", "ACTION"): 4182 action = "NO ACTION" 4183 elif self._match_text_seq("CASCADE"): 4184 action = "CASCADE" 4185 elif self._match_text_seq("RESTRICT"): 4186 action = "RESTRICT" 4187 elif self._match_pair(TokenType.SET, TokenType.NULL): 4188 action = "SET NULL" 4189 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4190 action = "SET DEFAULT" 4191 else: 4192 self.raise_error("Invalid key constraint") 4193 4194 options.append(f"ON {on} {action}") 4195 elif self._match_text_seq("NOT", "ENFORCED"): 4196 options.append("NOT ENFORCED") 4197 elif self._match_text_seq("DEFERRABLE"): 4198 options.append("DEFERRABLE") 4199 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4200 options.append("INITIALLY DEFERRED") 4201 elif self._match_text_seq("NORELY"): 4202 options.append("NORELY") 4203 elif self._match_text_seq("MATCH", "FULL"): 4204 options.append("MATCH FULL") 4205 else: 4206 break 4207 4208 return options 4209 4210 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4211 if match and not self._match(TokenType.REFERENCES): 4212 return None 4213 4214 expressions = None 4215 this = self._parse_table(schema=True) 4216 options = self._parse_key_constraint_options() 4217 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4218 4219 def _parse_foreign_key(self) -> exp.ForeignKey: 4220 expressions = self._parse_wrapped_id_vars() 4221 reference = self._parse_references() 4222 options = {} 4223 4224 while self._match(TokenType.ON): 4225 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4226 self.raise_error("Expected DELETE or UPDATE") 4227 4228 kind = self._prev.text.lower() 4229 4230 if self._match_text_seq("NO", "ACTION"): 4231 action = "NO ACTION" 4232 elif self._match(TokenType.SET): 4233 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4234 action = "SET " + self._prev.text.upper() 4235 else: 4236 self._advance() 4237 action = self._prev.text.upper() 4238 4239 options[kind] = action 4240 4241 return self.expression( 4242 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4243 ) 4244 4245 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4246 return self._parse_field() 4247 4248 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4249 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4250 4251 id_vars = self._parse_wrapped_id_vars() 4252 return self.expression( 4253 exp.PeriodForSystemTimeConstraint, 4254 this=seq_get(id_vars, 0), 4255 expression=seq_get(id_vars, 1), 4256 ) 4257 4258 def _parse_primary_key( 4259 self, wrapped_optional: bool = False, in_props: bool = False 4260 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4261 desc = ( 4262 self._match_set((TokenType.ASC, TokenType.DESC)) 4263 and self._prev.token_type == TokenType.DESC 4264 ) 4265 4266 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4267 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4268 4269 expressions = self._parse_wrapped_csv( 4270 self._parse_primary_key_part, optional=wrapped_optional 4271 ) 4272 options = self._parse_key_constraint_options() 4273 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4274 4275 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4276 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4277 4278 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4279 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4280 return this 4281 4282 bracket_kind = self._prev.token_type 4283 expressions = self._parse_csv( 4284 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4285 ) 4286 4287 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4288 self.raise_error("Expected ]") 4289 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4290 self.raise_error("Expected }") 4291 4292 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4293 if bracket_kind == TokenType.L_BRACE: 4294 this = self.expression(exp.Struct, expressions=expressions) 4295 elif not this or this.name.upper() == "ARRAY": 4296 this = self.expression(exp.Array, expressions=expressions) 4297 else: 4298 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4299 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4300 4301 self._add_comments(this) 4302 return self._parse_bracket(this) 4303 4304 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4305 if self._match(TokenType.COLON): 4306 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4307 return this 4308 4309 def _parse_case(self) -> t.Optional[exp.Expression]: 4310 ifs = [] 4311 default = None 4312 4313 comments = self._prev_comments 4314 expression = self._parse_conjunction() 4315 4316 while self._match(TokenType.WHEN): 4317 this = self._parse_conjunction() 4318 self._match(TokenType.THEN) 4319 then = self._parse_conjunction() 4320 ifs.append(self.expression(exp.If, this=this, true=then)) 4321 4322 if self._match(TokenType.ELSE): 4323 default = self._parse_conjunction() 4324 4325 if not self._match(TokenType.END): 4326 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4327 default = exp.column("interval") 4328 else: 4329 self.raise_error("Expected END after CASE", self._prev) 4330 4331 return self._parse_window( 4332 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4333 ) 4334 4335 def _parse_if(self) -> t.Optional[exp.Expression]: 4336 if self._match(TokenType.L_PAREN): 4337 args = self._parse_csv(self._parse_conjunction) 4338 this = self.validate_expression(exp.If.from_arg_list(args), args) 4339 self._match_r_paren() 4340 else: 4341 index = self._index - 1 4342 condition = self._parse_conjunction() 4343 4344 if not condition: 4345 self._retreat(index) 4346 return None 4347 4348 self._match(TokenType.THEN) 4349 true = self._parse_conjunction() 4350 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4351 self._match(TokenType.END) 4352 this = self.expression(exp.If, this=condition, true=true, false=false) 4353 4354 return self._parse_window(this) 4355 4356 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4357 if not self._match_text_seq("VALUE", "FOR"): 4358 self._retreat(self._index - 1) 4359 return None 4360 4361 return self.expression( 4362 exp.NextValueFor, 4363 this=self._parse_column(), 4364 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4365 ) 4366 4367 def _parse_extract(self) -> exp.Extract: 4368 this = self._parse_function() or self._parse_var() or self._parse_type() 4369 4370 if self._match(TokenType.FROM): 4371 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4372 4373 if not self._match(TokenType.COMMA): 4374 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4375 4376 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4377 4378 def _parse_any_value(self) -> exp.AnyValue: 4379 this = self._parse_lambda() 4380 is_max = None 4381 having = None 4382 4383 if self._match(TokenType.HAVING): 4384 self._match_texts(("MAX", "MIN")) 4385 is_max = self._prev.text == "MAX" 4386 having = self._parse_column() 4387 4388 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4389 4390 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4391 this = self._parse_conjunction() 4392 4393 if not self._match(TokenType.ALIAS): 4394 if self._match(TokenType.COMMA): 4395 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4396 4397 self.raise_error("Expected AS after CAST") 4398 4399 fmt = None 4400 to = self._parse_types() 4401 4402 if self._match(TokenType.FORMAT): 4403 fmt_string = self._parse_string() 4404 fmt = self._parse_at_time_zone(fmt_string) 4405 4406 if not to: 4407 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4408 if to.this in exp.DataType.TEMPORAL_TYPES: 4409 this = self.expression( 4410 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4411 this=this, 4412 format=exp.Literal.string( 4413 format_time( 4414 fmt_string.this if fmt_string else "", 4415 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4416 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4417 ) 4418 ), 4419 ) 4420 4421 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4422 this.set("zone", fmt.args["zone"]) 4423 return this 4424 elif not to: 4425 self.raise_error("Expected TYPE after CAST") 4426 elif isinstance(to, exp.Identifier): 4427 to = exp.DataType.build(to.name, udt=True) 4428 elif to.this == exp.DataType.Type.CHAR: 4429 if self._match(TokenType.CHARACTER_SET): 4430 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4431 4432 return self.expression( 4433 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4434 ) 4435 4436 def _parse_string_agg(self) -> exp.Expression: 4437 if self._match(TokenType.DISTINCT): 4438 args: t.List[t.Optional[exp.Expression]] = [ 4439 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4440 ] 4441 if self._match(TokenType.COMMA): 4442 args.extend(self._parse_csv(self._parse_conjunction)) 4443 else: 4444 args = self._parse_csv(self._parse_conjunction) # type: ignore 4445 4446 index = self._index 4447 if not self._match(TokenType.R_PAREN) and args: 4448 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4449 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4450 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4451 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4452 4453 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4454 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4455 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4456 if not self._match_text_seq("WITHIN", "GROUP"): 4457 self._retreat(index) 4458 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4459 4460 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4461 order = self._parse_order(this=seq_get(args, 0)) 4462 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4463 4464 def _parse_convert( 4465 self, strict: bool, safe: t.Optional[bool] = None 4466 ) -> t.Optional[exp.Expression]: 4467 this = self._parse_bitwise() 4468 4469 if self._match(TokenType.USING): 4470 to: t.Optional[exp.Expression] = self.expression( 4471 exp.CharacterSet, this=self._parse_var() 4472 ) 4473 elif self._match(TokenType.COMMA): 4474 to = self._parse_types() 4475 else: 4476 to = None 4477 4478 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4479 4480 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4481 """ 4482 There are generally two variants of the DECODE function: 4483 4484 - DECODE(bin, charset) 4485 - DECODE(expression, search, result [, search, result] ... [, default]) 4486 4487 The second variant will always be parsed into a CASE expression. Note that NULL 4488 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4489 instead of relying on pattern matching. 4490 """ 4491 args = self._parse_csv(self._parse_conjunction) 4492 4493 if len(args) < 3: 4494 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4495 4496 expression, *expressions = args 4497 if not expression: 4498 return None 4499 4500 ifs = [] 4501 for search, result in zip(expressions[::2], expressions[1::2]): 4502 if not search or not result: 4503 return None 4504 4505 if isinstance(search, exp.Literal): 4506 ifs.append( 4507 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4508 ) 4509 elif isinstance(search, exp.Null): 4510 ifs.append( 4511 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4512 ) 4513 else: 4514 cond = exp.or_( 4515 exp.EQ(this=expression.copy(), expression=search), 4516 exp.and_( 4517 exp.Is(this=expression.copy(), expression=exp.Null()), 4518 exp.Is(this=search.copy(), expression=exp.Null()), 4519 copy=False, 4520 ), 4521 copy=False, 4522 ) 4523 ifs.append(exp.If(this=cond, true=result)) 4524 4525 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4526 4527 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4528 self._match_text_seq("KEY") 4529 key = self._parse_column() 4530 self._match_set((TokenType.COLON, TokenType.COMMA)) 4531 self._match_text_seq("VALUE") 4532 value = self._parse_bitwise() 4533 4534 if not key and not value: 4535 return None 4536 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4537 4538 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4539 if not this or not self._match_text_seq("FORMAT", "JSON"): 4540 return this 4541 4542 return self.expression(exp.FormatJson, this=this) 4543 4544 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4545 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4546 for value in values: 4547 if self._match_text_seq(value, "ON", on): 4548 return f"{value} ON {on}" 4549 4550 return None 4551 4552 def _parse_json_object(self) -> exp.JSONObject: 4553 star = self._parse_star() 4554 expressions = ( 4555 [star] 4556 if star 4557 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4558 ) 4559 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4560 4561 unique_keys = None 4562 if self._match_text_seq("WITH", "UNIQUE"): 4563 unique_keys = True 4564 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4565 unique_keys = False 4566 4567 self._match_text_seq("KEYS") 4568 4569 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4570 self._parse_type() 4571 ) 4572 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4573 4574 return self.expression( 4575 exp.JSONObject, 4576 expressions=expressions, 4577 null_handling=null_handling, 4578 unique_keys=unique_keys, 4579 return_type=return_type, 4580 encoding=encoding, 4581 ) 4582 4583 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4584 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4585 if not self._match_text_seq("NESTED"): 4586 this = self._parse_id_var() 4587 kind = self._parse_types(allow_identifiers=False) 4588 nested = None 4589 else: 4590 this = None 4591 kind = None 4592 nested = True 4593 4594 path = self._match_text_seq("PATH") and self._parse_string() 4595 nested_schema = nested and self._parse_json_schema() 4596 4597 return self.expression( 4598 exp.JSONColumnDef, 4599 this=this, 4600 kind=kind, 4601 path=path, 4602 nested_schema=nested_schema, 4603 ) 4604 4605 def _parse_json_schema(self) -> exp.JSONSchema: 4606 self._match_text_seq("COLUMNS") 4607 return self.expression( 4608 exp.JSONSchema, 4609 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4610 ) 4611 4612 def _parse_json_table(self) -> exp.JSONTable: 4613 this = self._parse_format_json(self._parse_bitwise()) 4614 path = self._match(TokenType.COMMA) and self._parse_string() 4615 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4616 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4617 schema = self._parse_json_schema() 4618 4619 return exp.JSONTable( 4620 this=this, 4621 schema=schema, 4622 path=path, 4623 error_handling=error_handling, 4624 empty_handling=empty_handling, 4625 ) 4626 4627 def _parse_match_against(self) -> exp.MatchAgainst: 4628 expressions = self._parse_csv(self._parse_column) 4629 4630 self._match_text_seq(")", "AGAINST", "(") 4631 4632 this = self._parse_string() 4633 4634 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4635 modifier = "IN NATURAL LANGUAGE MODE" 4636 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4637 modifier = f"{modifier} WITH QUERY EXPANSION" 4638 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4639 modifier = "IN BOOLEAN MODE" 4640 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4641 modifier = "WITH QUERY EXPANSION" 4642 else: 4643 modifier = None 4644 4645 return self.expression( 4646 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4647 ) 4648 4649 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4650 def _parse_open_json(self) -> exp.OpenJSON: 4651 this = self._parse_bitwise() 4652 path = self._match(TokenType.COMMA) and self._parse_string() 4653 4654 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4655 this = self._parse_field(any_token=True) 4656 kind = self._parse_types() 4657 path = self._parse_string() 4658 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4659 4660 return self.expression( 4661 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4662 ) 4663 4664 expressions = None 4665 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4666 self._match_l_paren() 4667 expressions = self._parse_csv(_parse_open_json_column_def) 4668 4669 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4670 4671 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4672 args = self._parse_csv(self._parse_bitwise) 4673 4674 if self._match(TokenType.IN): 4675 return self.expression( 4676 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4677 ) 4678 4679 if haystack_first: 4680 haystack = seq_get(args, 0) 4681 needle = seq_get(args, 1) 4682 else: 4683 needle = seq_get(args, 0) 4684 haystack = seq_get(args, 1) 4685 4686 return self.expression( 4687 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4688 ) 4689 4690 def _parse_predict(self) -> exp.Predict: 4691 self._match_text_seq("MODEL") 4692 this = self._parse_table() 4693 4694 self._match(TokenType.COMMA) 4695 self._match_text_seq("TABLE") 4696 4697 return self.expression( 4698 exp.Predict, 4699 this=this, 4700 expression=self._parse_table(), 4701 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4702 ) 4703 4704 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4705 args = self._parse_csv(self._parse_table) 4706 return exp.JoinHint(this=func_name.upper(), expressions=args) 4707 4708 def _parse_substring(self) -> exp.Substring: 4709 # Postgres supports the form: substring(string [from int] [for int]) 4710 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4711 4712 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4713 4714 if self._match(TokenType.FROM): 4715 args.append(self._parse_bitwise()) 4716 if self._match(TokenType.FOR): 4717 args.append(self._parse_bitwise()) 4718 4719 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4720 4721 def _parse_trim(self) -> exp.Trim: 4722 # https://www.w3resource.com/sql/character-functions/trim.php 4723 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4724 4725 position = None 4726 collation = None 4727 expression = None 4728 4729 if self._match_texts(self.TRIM_TYPES): 4730 position = self._prev.text.upper() 4731 4732 this = self._parse_bitwise() 4733 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4734 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4735 expression = self._parse_bitwise() 4736 4737 if invert_order: 4738 this, expression = expression, this 4739 4740 if self._match(TokenType.COLLATE): 4741 collation = self._parse_bitwise() 4742 4743 return self.expression( 4744 exp.Trim, this=this, position=position, expression=expression, collation=collation 4745 ) 4746 4747 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4748 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4749 4750 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4751 return self._parse_window(self._parse_id_var(), alias=True) 4752 4753 def _parse_respect_or_ignore_nulls( 4754 self, this: t.Optional[exp.Expression] 4755 ) -> t.Optional[exp.Expression]: 4756 if self._match_text_seq("IGNORE", "NULLS"): 4757 return self.expression(exp.IgnoreNulls, this=this) 4758 if self._match_text_seq("RESPECT", "NULLS"): 4759 return self.expression(exp.RespectNulls, this=this) 4760 return this 4761 4762 def _parse_window( 4763 self, this: t.Optional[exp.Expression], alias: bool = False 4764 ) -> t.Optional[exp.Expression]: 4765 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4766 self._match(TokenType.WHERE) 4767 this = self.expression( 4768 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4769 ) 4770 self._match_r_paren() 4771 4772 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4773 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4774 if self._match_text_seq("WITHIN", "GROUP"): 4775 order = self._parse_wrapped(self._parse_order) 4776 this = self.expression(exp.WithinGroup, this=this, expression=order) 4777 4778 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4779 # Some dialects choose to implement and some do not. 4780 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4781 4782 # There is some code above in _parse_lambda that handles 4783 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4784 4785 # The below changes handle 4786 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4787 4788 # Oracle allows both formats 4789 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4790 # and Snowflake chose to do the same for familiarity 4791 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4792 this = self._parse_respect_or_ignore_nulls(this) 4793 4794 # bigquery select from window x AS (partition by ...) 4795 if alias: 4796 over = None 4797 self._match(TokenType.ALIAS) 4798 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4799 return this 4800 else: 4801 over = self._prev.text.upper() 4802 4803 if not self._match(TokenType.L_PAREN): 4804 return self.expression( 4805 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4806 ) 4807 4808 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4809 4810 first = self._match(TokenType.FIRST) 4811 if self._match_text_seq("LAST"): 4812 first = False 4813 4814 partition, order = self._parse_partition_and_order() 4815 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4816 4817 if kind: 4818 self._match(TokenType.BETWEEN) 4819 start = self._parse_window_spec() 4820 self._match(TokenType.AND) 4821 end = self._parse_window_spec() 4822 4823 spec = self.expression( 4824 exp.WindowSpec, 4825 kind=kind, 4826 start=start["value"], 4827 start_side=start["side"], 4828 end=end["value"], 4829 end_side=end["side"], 4830 ) 4831 else: 4832 spec = None 4833 4834 self._match_r_paren() 4835 4836 window = self.expression( 4837 exp.Window, 4838 this=this, 4839 partition_by=partition, 4840 order=order, 4841 spec=spec, 4842 alias=window_alias, 4843 over=over, 4844 first=first, 4845 ) 4846 4847 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4848 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4849 return self._parse_window(window, alias=alias) 4850 4851 return window 4852 4853 def _parse_partition_and_order( 4854 self, 4855 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4856 return self._parse_partition_by(), self._parse_order() 4857 4858 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4859 self._match(TokenType.BETWEEN) 4860 4861 return { 4862 "value": ( 4863 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4864 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4865 or self._parse_bitwise() 4866 ), 4867 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4868 } 4869 4870 def _parse_alias( 4871 self, this: t.Optional[exp.Expression], explicit: bool = False 4872 ) -> t.Optional[exp.Expression]: 4873 any_token = self._match(TokenType.ALIAS) 4874 comments = self._prev_comments 4875 4876 if explicit and not any_token: 4877 return this 4878 4879 if self._match(TokenType.L_PAREN): 4880 aliases = self.expression( 4881 exp.Aliases, 4882 comments=comments, 4883 this=this, 4884 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4885 ) 4886 self._match_r_paren(aliases) 4887 return aliases 4888 4889 alias = self._parse_id_var(any_token) 4890 4891 if alias: 4892 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4893 4894 return this 4895 4896 def _parse_id_var( 4897 self, 4898 any_token: bool = True, 4899 tokens: t.Optional[t.Collection[TokenType]] = None, 4900 ) -> t.Optional[exp.Expression]: 4901 identifier = self._parse_identifier() 4902 4903 if identifier: 4904 return identifier 4905 4906 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4907 quoted = self._prev.token_type == TokenType.STRING 4908 return exp.Identifier(this=self._prev.text, quoted=quoted) 4909 4910 return None 4911 4912 def _parse_string(self) -> t.Optional[exp.Expression]: 4913 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4914 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4915 return self._parse_placeholder() 4916 4917 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4918 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4919 4920 def _parse_number(self) -> t.Optional[exp.Expression]: 4921 if self._match(TokenType.NUMBER): 4922 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4923 return self._parse_placeholder() 4924 4925 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4926 if self._match(TokenType.IDENTIFIER): 4927 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4928 return self._parse_placeholder() 4929 4930 def _parse_var( 4931 self, 4932 any_token: bool = False, 4933 tokens: t.Optional[t.Collection[TokenType]] = None, 4934 upper: bool = False, 4935 ) -> t.Optional[exp.Expression]: 4936 if ( 4937 (any_token and self._advance_any()) 4938 or self._match(TokenType.VAR) 4939 or (self._match_set(tokens) if tokens else False) 4940 ): 4941 return self.expression( 4942 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 4943 ) 4944 return self._parse_placeholder() 4945 4946 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 4947 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 4948 self._advance() 4949 return self._prev 4950 return None 4951 4952 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4953 return self._parse_var() or self._parse_string() 4954 4955 def _parse_null(self) -> t.Optional[exp.Expression]: 4956 if self._match_set(self.NULL_TOKENS): 4957 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4958 return self._parse_placeholder() 4959 4960 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4961 if self._match(TokenType.TRUE): 4962 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4963 if self._match(TokenType.FALSE): 4964 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4965 return self._parse_placeholder() 4966 4967 def _parse_star(self) -> t.Optional[exp.Expression]: 4968 if self._match(TokenType.STAR): 4969 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4970 return self._parse_placeholder() 4971 4972 def _parse_parameter(self) -> exp.Parameter: 4973 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4974 return ( 4975 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4976 ) 4977 4978 self._match(TokenType.L_BRACE) 4979 this = _parse_parameter_part() 4980 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4981 self._match(TokenType.R_BRACE) 4982 4983 return self.expression(exp.Parameter, this=this, expression=expression) 4984 4985 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4986 if self._match_set(self.PLACEHOLDER_PARSERS): 4987 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4988 if placeholder: 4989 return placeholder 4990 self._advance(-1) 4991 return None 4992 4993 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4994 if not self._match(TokenType.EXCEPT): 4995 return None 4996 if self._match(TokenType.L_PAREN, advance=False): 4997 return self._parse_wrapped_csv(self._parse_column) 4998 4999 except_column = self._parse_column() 5000 return [except_column] if except_column else None 5001 5002 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5003 if not self._match(TokenType.REPLACE): 5004 return None 5005 if self._match(TokenType.L_PAREN, advance=False): 5006 return self._parse_wrapped_csv(self._parse_expression) 5007 5008 replace_expression = self._parse_expression() 5009 return [replace_expression] if replace_expression else None 5010 5011 def _parse_csv( 5012 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5013 ) -> t.List[exp.Expression]: 5014 parse_result = parse_method() 5015 items = [parse_result] if parse_result is not None else [] 5016 5017 while self._match(sep): 5018 self._add_comments(parse_result) 5019 parse_result = parse_method() 5020 if parse_result is not None: 5021 items.append(parse_result) 5022 5023 return items 5024 5025 def _parse_tokens( 5026 self, parse_method: t.Callable, expressions: t.Dict 5027 ) -> t.Optional[exp.Expression]: 5028 this = parse_method() 5029 5030 while self._match_set(expressions): 5031 this = self.expression( 5032 expressions[self._prev.token_type], 5033 this=this, 5034 comments=self._prev_comments, 5035 expression=parse_method(), 5036 ) 5037 5038 return this 5039 5040 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5041 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5042 5043 def _parse_wrapped_csv( 5044 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5045 ) -> t.List[exp.Expression]: 5046 return self._parse_wrapped( 5047 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5048 ) 5049 5050 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5051 wrapped = self._match(TokenType.L_PAREN) 5052 if not wrapped and not optional: 5053 self.raise_error("Expecting (") 5054 parse_result = parse_method() 5055 if wrapped: 5056 self._match_r_paren() 5057 return parse_result 5058 5059 def _parse_expressions(self) -> t.List[exp.Expression]: 5060 return self._parse_csv(self._parse_expression) 5061 5062 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5063 return self._parse_select() or self._parse_set_operations( 5064 self._parse_expression() if alias else self._parse_conjunction() 5065 ) 5066 5067 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5068 return self._parse_query_modifiers( 5069 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5070 ) 5071 5072 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5073 this = None 5074 if self._match_texts(self.TRANSACTION_KIND): 5075 this = self._prev.text 5076 5077 self._match_texts(("TRANSACTION", "WORK")) 5078 5079 modes = [] 5080 while True: 5081 mode = [] 5082 while self._match(TokenType.VAR): 5083 mode.append(self._prev.text) 5084 5085 if mode: 5086 modes.append(" ".join(mode)) 5087 if not self._match(TokenType.COMMA): 5088 break 5089 5090 return self.expression(exp.Transaction, this=this, modes=modes) 5091 5092 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5093 chain = None 5094 savepoint = None 5095 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5096 5097 self._match_texts(("TRANSACTION", "WORK")) 5098 5099 if self._match_text_seq("TO"): 5100 self._match_text_seq("SAVEPOINT") 5101 savepoint = self._parse_id_var() 5102 5103 if self._match(TokenType.AND): 5104 chain = not self._match_text_seq("NO") 5105 self._match_text_seq("CHAIN") 5106 5107 if is_rollback: 5108 return self.expression(exp.Rollback, savepoint=savepoint) 5109 5110 return self.expression(exp.Commit, chain=chain) 5111 5112 def _parse_refresh(self) -> exp.Refresh: 5113 self._match(TokenType.TABLE) 5114 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5115 5116 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5117 if not self._match_text_seq("ADD"): 5118 return None 5119 5120 self._match(TokenType.COLUMN) 5121 exists_column = self._parse_exists(not_=True) 5122 expression = self._parse_field_def() 5123 5124 if expression: 5125 expression.set("exists", exists_column) 5126 5127 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5128 if self._match_texts(("FIRST", "AFTER")): 5129 position = self._prev.text 5130 column_position = self.expression( 5131 exp.ColumnPosition, this=self._parse_column(), position=position 5132 ) 5133 expression.set("position", column_position) 5134 5135 return expression 5136 5137 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5138 drop = self._match(TokenType.DROP) and self._parse_drop() 5139 if drop and not isinstance(drop, exp.Command): 5140 drop.set("kind", drop.args.get("kind", "COLUMN")) 5141 return drop 5142 5143 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5144 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5145 return self.expression( 5146 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5147 ) 5148 5149 def _parse_add_constraint(self) -> exp.AddConstraint: 5150 this = None 5151 kind = self._prev.token_type 5152 5153 if kind == TokenType.CONSTRAINT: 5154 this = self._parse_id_var() 5155 5156 if self._match_text_seq("CHECK"): 5157 expression = self._parse_wrapped(self._parse_conjunction) 5158 enforced = self._match_text_seq("ENFORCED") 5159 5160 return self.expression( 5161 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5162 ) 5163 5164 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5165 expression = self._parse_foreign_key() 5166 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5167 expression = self._parse_primary_key() 5168 else: 5169 expression = None 5170 5171 return self.expression(exp.AddConstraint, this=this, expression=expression) 5172 5173 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5174 index = self._index - 1 5175 5176 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5177 return self._parse_csv(self._parse_add_constraint) 5178 5179 self._retreat(index) 5180 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5181 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5182 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5183 5184 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5185 self._match(TokenType.COLUMN) 5186 column = self._parse_field(any_token=True) 5187 5188 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5189 return self.expression(exp.AlterColumn, this=column, drop=True) 5190 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5191 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5192 5193 self._match_text_seq("SET", "DATA") 5194 return self.expression( 5195 exp.AlterColumn, 5196 this=column, 5197 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5198 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5199 using=self._match(TokenType.USING) and self._parse_conjunction(), 5200 ) 5201 5202 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5203 index = self._index - 1 5204 5205 partition_exists = self._parse_exists() 5206 if self._match(TokenType.PARTITION, advance=False): 5207 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5208 5209 self._retreat(index) 5210 return self._parse_csv(self._parse_drop_column) 5211 5212 def _parse_alter_table_rename(self) -> exp.RenameTable: 5213 self._match_text_seq("TO") 5214 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5215 5216 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5217 start = self._prev 5218 5219 if not self._match(TokenType.TABLE): 5220 return self._parse_as_command(start) 5221 5222 exists = self._parse_exists() 5223 only = self._match_text_seq("ONLY") 5224 this = self._parse_table(schema=True) 5225 5226 if self._next: 5227 self._advance() 5228 5229 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5230 if parser: 5231 actions = ensure_list(parser(self)) 5232 5233 if not self._curr: 5234 return self.expression( 5235 exp.AlterTable, 5236 this=this, 5237 exists=exists, 5238 actions=actions, 5239 only=only, 5240 ) 5241 5242 return self._parse_as_command(start) 5243 5244 def _parse_merge(self) -> exp.Merge: 5245 self._match(TokenType.INTO) 5246 target = self._parse_table() 5247 5248 if target and self._match(TokenType.ALIAS, advance=False): 5249 target.set("alias", self._parse_table_alias()) 5250 5251 self._match(TokenType.USING) 5252 using = self._parse_table() 5253 5254 self._match(TokenType.ON) 5255 on = self._parse_conjunction() 5256 5257 return self.expression( 5258 exp.Merge, 5259 this=target, 5260 using=using, 5261 on=on, 5262 expressions=self._parse_when_matched(), 5263 ) 5264 5265 def _parse_when_matched(self) -> t.List[exp.When]: 5266 whens = [] 5267 5268 while self._match(TokenType.WHEN): 5269 matched = not self._match(TokenType.NOT) 5270 self._match_text_seq("MATCHED") 5271 source = ( 5272 False 5273 if self._match_text_seq("BY", "TARGET") 5274 else self._match_text_seq("BY", "SOURCE") 5275 ) 5276 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5277 5278 self._match(TokenType.THEN) 5279 5280 if self._match(TokenType.INSERT): 5281 _this = self._parse_star() 5282 if _this: 5283 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5284 else: 5285 then = self.expression( 5286 exp.Insert, 5287 this=self._parse_value(), 5288 expression=self._match(TokenType.VALUES) and self._parse_value(), 5289 ) 5290 elif self._match(TokenType.UPDATE): 5291 expressions = self._parse_star() 5292 if expressions: 5293 then = self.expression(exp.Update, expressions=expressions) 5294 else: 5295 then = self.expression( 5296 exp.Update, 5297 expressions=self._match(TokenType.SET) 5298 and self._parse_csv(self._parse_equality), 5299 ) 5300 elif self._match(TokenType.DELETE): 5301 then = self.expression(exp.Var, this=self._prev.text) 5302 else: 5303 then = None 5304 5305 whens.append( 5306 self.expression( 5307 exp.When, 5308 matched=matched, 5309 source=source, 5310 condition=condition, 5311 then=then, 5312 ) 5313 ) 5314 return whens 5315 5316 def _parse_show(self) -> t.Optional[exp.Expression]: 5317 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5318 if parser: 5319 return parser(self) 5320 return self._parse_as_command(self._prev) 5321 5322 def _parse_set_item_assignment( 5323 self, kind: t.Optional[str] = None 5324 ) -> t.Optional[exp.Expression]: 5325 index = self._index 5326 5327 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5328 return self._parse_set_transaction(global_=kind == "GLOBAL") 5329 5330 left = self._parse_primary() or self._parse_id_var() 5331 assignment_delimiter = self._match_texts(("=", "TO")) 5332 5333 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5334 self._retreat(index) 5335 return None 5336 5337 right = self._parse_statement() or self._parse_id_var() 5338 this = self.expression(exp.EQ, this=left, expression=right) 5339 5340 return self.expression(exp.SetItem, this=this, kind=kind) 5341 5342 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5343 self._match_text_seq("TRANSACTION") 5344 characteristics = self._parse_csv( 5345 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5346 ) 5347 return self.expression( 5348 exp.SetItem, 5349 expressions=characteristics, 5350 kind="TRANSACTION", 5351 **{"global": global_}, # type: ignore 5352 ) 5353 5354 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5355 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5356 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5357 5358 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5359 index = self._index 5360 set_ = self.expression( 5361 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5362 ) 5363 5364 if self._curr: 5365 self._retreat(index) 5366 return self._parse_as_command(self._prev) 5367 5368 return set_ 5369 5370 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5371 for option in options: 5372 if self._match_text_seq(*option.split(" ")): 5373 return exp.var(option) 5374 return None 5375 5376 def _parse_as_command(self, start: Token) -> exp.Command: 5377 while self._curr: 5378 self._advance() 5379 text = self._find_sql(start, self._prev) 5380 size = len(start.text) 5381 return exp.Command(this=text[:size], expression=text[size:]) 5382 5383 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5384 settings = [] 5385 5386 self._match_l_paren() 5387 kind = self._parse_id_var() 5388 5389 if self._match(TokenType.L_PAREN): 5390 while True: 5391 key = self._parse_id_var() 5392 value = self._parse_primary() 5393 5394 if not key and value is None: 5395 break 5396 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5397 self._match(TokenType.R_PAREN) 5398 5399 self._match_r_paren() 5400 5401 return self.expression( 5402 exp.DictProperty, 5403 this=this, 5404 kind=kind.this if kind else None, 5405 settings=settings, 5406 ) 5407 5408 def _parse_dict_range(self, this: str) -> exp.DictRange: 5409 self._match_l_paren() 5410 has_min = self._match_text_seq("MIN") 5411 if has_min: 5412 min = self._parse_var() or self._parse_primary() 5413 self._match_text_seq("MAX") 5414 max = self._parse_var() or self._parse_primary() 5415 else: 5416 max = self._parse_var() or self._parse_primary() 5417 min = exp.Literal.number(0) 5418 self._match_r_paren() 5419 return self.expression(exp.DictRange, this=this, min=min, max=max) 5420 5421 def _parse_comprehension( 5422 self, this: t.Optional[exp.Expression] 5423 ) -> t.Optional[exp.Comprehension]: 5424 index = self._index 5425 expression = self._parse_column() 5426 if not self._match(TokenType.IN): 5427 self._retreat(index - 1) 5428 return None 5429 iterator = self._parse_column() 5430 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5431 return self.expression( 5432 exp.Comprehension, 5433 this=this, 5434 expression=expression, 5435 iterator=iterator, 5436 condition=condition, 5437 ) 5438 5439 def _find_parser( 5440 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5441 ) -> t.Optional[t.Callable]: 5442 if not self._curr: 5443 return None 5444 5445 index = self._index 5446 this = [] 5447 while True: 5448 # The current token might be multiple words 5449 curr = self._curr.text.upper() 5450 key = curr.split(" ") 5451 this.append(curr) 5452 5453 self._advance() 5454 result, trie = in_trie(trie, key) 5455 if result == TrieResult.FAILED: 5456 break 5457 5458 if result == TrieResult.EXISTS: 5459 subparser = parsers[" ".join(this)] 5460 return subparser 5461 5462 self._retreat(index) 5463 return None 5464 5465 def _match(self, token_type, advance=True, expression=None): 5466 if not self._curr: 5467 return None 5468 5469 if self._curr.token_type == token_type: 5470 if advance: 5471 self._advance() 5472 self._add_comments(expression) 5473 return True 5474 5475 return None 5476 5477 def _match_set(self, types, advance=True): 5478 if not self._curr: 5479 return None 5480 5481 if self._curr.token_type in types: 5482 if advance: 5483 self._advance() 5484 return True 5485 5486 return None 5487 5488 def _match_pair(self, token_type_a, token_type_b, advance=True): 5489 if not self._curr or not self._next: 5490 return None 5491 5492 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5493 if advance: 5494 self._advance(2) 5495 return True 5496 5497 return None 5498 5499 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5500 if not self._match(TokenType.L_PAREN, expression=expression): 5501 self.raise_error("Expecting (") 5502 5503 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5504 if not self._match(TokenType.R_PAREN, expression=expression): 5505 self.raise_error("Expecting )") 5506 5507 def _match_texts(self, texts, advance=True): 5508 if self._curr and self._curr.text.upper() in texts: 5509 if advance: 5510 self._advance() 5511 return True 5512 return False 5513 5514 def _match_text_seq(self, *texts, advance=True): 5515 index = self._index 5516 for text in texts: 5517 if self._curr and self._curr.text.upper() == text: 5518 self._advance() 5519 else: 5520 self._retreat(index) 5521 return False 5522 5523 if not advance: 5524 self._retreat(index) 5525 5526 return True 5527 5528 @t.overload 5529 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5530 ... 5531 5532 @t.overload 5533 def _replace_columns_with_dots( 5534 self, this: t.Optional[exp.Expression] 5535 ) -> t.Optional[exp.Expression]: 5536 ... 5537 5538 def _replace_columns_with_dots(self, this): 5539 if isinstance(this, exp.Dot): 5540 exp.replace_children(this, self._replace_columns_with_dots) 5541 elif isinstance(this, exp.Column): 5542 exp.replace_children(this, self._replace_columns_with_dots) 5543 table = this.args.get("table") 5544 this = ( 5545 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5546 ) 5547 5548 return this 5549 5550 def _replace_lambda( 5551 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5552 ) -> t.Optional[exp.Expression]: 5553 if not node: 5554 return node 5555 5556 for column in node.find_all(exp.Column): 5557 if column.parts[0].name in lambda_variables: 5558 dot_or_id = column.to_dot() if column.table else column.this 5559 parent = column.parent 5560 5561 while isinstance(parent, exp.Dot): 5562 if not isinstance(parent.parent, exp.Dot): 5563 parent.replace(dot_or_id) 5564 break 5565 parent = parent.parent 5566 else: 5567 if column is node: 5568 node = dot_or_id 5569 else: 5570 column.replace(dot_or_id) 5571 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
982 def __init__( 983 self, 984 error_level: t.Optional[ErrorLevel] = None, 985 error_message_context: int = 100, 986 max_errors: int = 3, 987 dialect: DialectType = None, 988 ): 989 from sqlglot.dialects import Dialect 990 991 self.error_level = error_level or ErrorLevel.IMMEDIATE 992 self.error_message_context = error_message_context 993 self.max_errors = max_errors 994 self.dialect = Dialect.get_or_raise(dialect) 995 self.reset()
1007 def parse( 1008 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1009 ) -> t.List[t.Optional[exp.Expression]]: 1010 """ 1011 Parses a list of tokens and returns a list of syntax trees, one tree 1012 per parsed SQL statement. 1013 1014 Args: 1015 raw_tokens: The list of tokens. 1016 sql: The original SQL string, used to produce helpful debug messages. 1017 1018 Returns: 1019 The list of the produced syntax trees. 1020 """ 1021 return self._parse( 1022 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1023 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1025 def parse_into( 1026 self, 1027 expression_types: exp.IntoType, 1028 raw_tokens: t.List[Token], 1029 sql: t.Optional[str] = None, 1030 ) -> t.List[t.Optional[exp.Expression]]: 1031 """ 1032 Parses a list of tokens into a given Expression type. If a collection of Expression 1033 types is given instead, this method will try to parse the token list into each one 1034 of them, stopping at the first for which the parsing succeeds. 1035 1036 Args: 1037 expression_types: The expression type(s) to try and parse the token list into. 1038 raw_tokens: The list of tokens. 1039 sql: The original SQL string, used to produce helpful debug messages. 1040 1041 Returns: 1042 The target Expression. 1043 """ 1044 errors = [] 1045 for expression_type in ensure_list(expression_types): 1046 parser = self.EXPRESSION_PARSERS.get(expression_type) 1047 if not parser: 1048 raise TypeError(f"No parser registered for {expression_type}") 1049 1050 try: 1051 return self._parse(parser, raw_tokens, sql) 1052 except ParseError as e: 1053 e.errors[0]["into_expression"] = expression_type 1054 errors.append(e) 1055 1056 raise ParseError( 1057 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1058 errors=merge_errors(errors), 1059 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1096 def check_errors(self) -> None: 1097 """Logs or raises any found errors, depending on the chosen error level setting.""" 1098 if self.error_level == ErrorLevel.WARN: 1099 for error in self.errors: 1100 logger.error(str(error)) 1101 elif self.error_level == ErrorLevel.RAISE and self.errors: 1102 raise ParseError( 1103 concat_messages(self.errors, self.max_errors), 1104 errors=merge_errors(self.errors), 1105 )
Logs or raises any found errors, depending on the chosen error level setting.
1107 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1108 """ 1109 Appends an error in the list of recorded errors or raises it, depending on the chosen 1110 error level setting. 1111 """ 1112 token = token or self._curr or self._prev or Token.string("") 1113 start = token.start 1114 end = token.end + 1 1115 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1116 highlight = self.sql[start:end] 1117 end_context = self.sql[end : end + self.error_message_context] 1118 1119 error = ParseError.new( 1120 f"{message}. Line {token.line}, Col: {token.col}.\n" 1121 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1122 description=message, 1123 line=token.line, 1124 col=token.col, 1125 start_context=start_context, 1126 highlight=highlight, 1127 end_context=end_context, 1128 ) 1129 1130 if self.error_level == ErrorLevel.IMMEDIATE: 1131 raise error 1132 1133 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1135 def expression( 1136 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1137 ) -> E: 1138 """ 1139 Creates a new, validated Expression. 1140 1141 Args: 1142 exp_class: The expression class to instantiate. 1143 comments: An optional list of comments to attach to the expression. 1144 kwargs: The arguments to set for the expression along with their respective values. 1145 1146 Returns: 1147 The target expression. 1148 """ 1149 instance = exp_class(**kwargs) 1150 instance.add_comments(comments) if comments else self._add_comments(instance) 1151 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1158 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1159 """ 1160 Validates an Expression, making sure that all its mandatory arguments are set. 1161 1162 Args: 1163 expression: The expression to validate. 1164 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1165 1166 Returns: 1167 The validated expression. 1168 """ 1169 if self.error_level != ErrorLevel.IGNORE: 1170 for error_message in expression.error_messages(args): 1171 self.raise_error(error_message) 1172 1173 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.