sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 ) 35 36 37def parse_like(args: t.List) -> exp.Escape | exp.Like: 38 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 39 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 40 41 42def binary_range_parser( 43 expr_type: t.Type[exp.Expression], 44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 45 return lambda self, this: self._parse_escape( 46 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 47 ) 48 49 50def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 61 62 63class _Parser(type): 64 def __new__(cls, clsname, bases, attrs): 65 klass = super().__new__(cls, clsname, bases, attrs) 66 67 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 68 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 69 70 return klass 71 72 73class Parser(metaclass=_Parser): 74 """ 75 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 76 77 Args: 78 error_level: The desired error level. 79 Default: ErrorLevel.IMMEDIATE 80 error_message_context: Determines the amount of context to capture from a 81 query string when displaying the error message (in number of characters). 82 Default: 100 83 max_errors: Maximum number of error messages to include in a raised ParseError. 84 This is only relevant if error_level is ErrorLevel.RAISE. 85 Default: 3 86 """ 87 88 FUNCTIONS: t.Dict[str, t.Callable] = { 89 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 90 "CONCAT": lambda args, dialect: exp.Concat( 91 expressions=args, 92 safe=not dialect.STRICT_STRING_CONCAT, 93 coalesce=dialect.CONCAT_COALESCE, 94 ), 95 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 96 expressions=args, 97 safe=not dialect.STRICT_STRING_CONCAT, 98 coalesce=dialect.CONCAT_COALESCE, 99 ), 100 "DATE_TO_DATE_STR": lambda args: exp.Cast( 101 this=seq_get(args, 0), 102 to=exp.DataType(this=exp.DataType.Type.TEXT), 103 ), 104 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 105 "LIKE": parse_like, 106 "LOG": parse_logarithm, 107 "TIME_TO_TIME_STR": lambda args: exp.Cast( 108 this=seq_get(args, 0), 109 to=exp.DataType(this=exp.DataType.Type.TEXT), 110 ), 111 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 112 this=exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 start=exp.Literal.number(1), 117 length=exp.Literal.number(10), 118 ), 119 "VAR_MAP": parse_var_map, 120 } 121 122 NO_PAREN_FUNCTIONS = { 123 TokenType.CURRENT_DATE: exp.CurrentDate, 124 TokenType.CURRENT_DATETIME: exp.CurrentDate, 125 TokenType.CURRENT_TIME: exp.CurrentTime, 126 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 127 TokenType.CURRENT_USER: exp.CurrentUser, 128 } 129 130 STRUCT_TYPE_TOKENS = { 131 TokenType.NESTED, 132 TokenType.STRUCT, 133 } 134 135 NESTED_TYPE_TOKENS = { 136 TokenType.ARRAY, 137 TokenType.LOWCARDINALITY, 138 TokenType.MAP, 139 TokenType.NULLABLE, 140 *STRUCT_TYPE_TOKENS, 141 } 142 143 ENUM_TYPE_TOKENS = { 144 TokenType.ENUM, 145 TokenType.ENUM8, 146 TokenType.ENUM16, 147 } 148 149 TYPE_TOKENS = { 150 TokenType.BIT, 151 TokenType.BOOLEAN, 152 TokenType.TINYINT, 153 TokenType.UTINYINT, 154 TokenType.SMALLINT, 155 TokenType.USMALLINT, 156 TokenType.INT, 157 TokenType.UINT, 158 TokenType.BIGINT, 159 TokenType.UBIGINT, 160 TokenType.INT128, 161 TokenType.UINT128, 162 TokenType.INT256, 163 TokenType.UINT256, 164 TokenType.MEDIUMINT, 165 TokenType.UMEDIUMINT, 166 TokenType.FIXEDSTRING, 167 TokenType.FLOAT, 168 TokenType.DOUBLE, 169 TokenType.CHAR, 170 TokenType.NCHAR, 171 TokenType.VARCHAR, 172 TokenType.NVARCHAR, 173 TokenType.TEXT, 174 TokenType.MEDIUMTEXT, 175 TokenType.LONGTEXT, 176 TokenType.MEDIUMBLOB, 177 TokenType.LONGBLOB, 178 TokenType.BINARY, 179 TokenType.VARBINARY, 180 TokenType.JSON, 181 TokenType.JSONB, 182 TokenType.INTERVAL, 183 TokenType.TINYBLOB, 184 TokenType.TINYTEXT, 185 TokenType.TIME, 186 TokenType.TIMETZ, 187 TokenType.TIMESTAMP, 188 TokenType.TIMESTAMP_S, 189 TokenType.TIMESTAMP_MS, 190 TokenType.TIMESTAMP_NS, 191 TokenType.TIMESTAMPTZ, 192 TokenType.TIMESTAMPLTZ, 193 TokenType.DATETIME, 194 TokenType.DATETIME64, 195 TokenType.DATE, 196 TokenType.INT4RANGE, 197 TokenType.INT4MULTIRANGE, 198 TokenType.INT8RANGE, 199 TokenType.INT8MULTIRANGE, 200 TokenType.NUMRANGE, 201 TokenType.NUMMULTIRANGE, 202 TokenType.TSRANGE, 203 TokenType.TSMULTIRANGE, 204 TokenType.TSTZRANGE, 205 TokenType.TSTZMULTIRANGE, 206 TokenType.DATERANGE, 207 TokenType.DATEMULTIRANGE, 208 TokenType.DECIMAL, 209 TokenType.UDECIMAL, 210 TokenType.BIGDECIMAL, 211 TokenType.UUID, 212 TokenType.GEOGRAPHY, 213 TokenType.GEOMETRY, 214 TokenType.HLLSKETCH, 215 TokenType.HSTORE, 216 TokenType.PSEUDO_TYPE, 217 TokenType.SUPER, 218 TokenType.SERIAL, 219 TokenType.SMALLSERIAL, 220 TokenType.BIGSERIAL, 221 TokenType.XML, 222 TokenType.YEAR, 223 TokenType.UNIQUEIDENTIFIER, 224 TokenType.USERDEFINED, 225 TokenType.MONEY, 226 TokenType.SMALLMONEY, 227 TokenType.ROWVERSION, 228 TokenType.IMAGE, 229 TokenType.VARIANT, 230 TokenType.OBJECT, 231 TokenType.OBJECT_IDENTIFIER, 232 TokenType.INET, 233 TokenType.IPADDRESS, 234 TokenType.IPPREFIX, 235 TokenType.UNKNOWN, 236 TokenType.NULL, 237 *ENUM_TYPE_TOKENS, 238 *NESTED_TYPE_TOKENS, 239 } 240 241 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 242 TokenType.BIGINT: TokenType.UBIGINT, 243 TokenType.INT: TokenType.UINT, 244 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 245 TokenType.SMALLINT: TokenType.USMALLINT, 246 TokenType.TINYINT: TokenType.UTINYINT, 247 TokenType.DECIMAL: TokenType.UDECIMAL, 248 } 249 250 SUBQUERY_PREDICATES = { 251 TokenType.ANY: exp.Any, 252 TokenType.ALL: exp.All, 253 TokenType.EXISTS: exp.Exists, 254 TokenType.SOME: exp.Any, 255 } 256 257 RESERVED_TOKENS = { 258 *Tokenizer.SINGLE_TOKENS.values(), 259 TokenType.SELECT, 260 } 261 262 DB_CREATABLES = { 263 TokenType.DATABASE, 264 TokenType.SCHEMA, 265 TokenType.TABLE, 266 TokenType.VIEW, 267 TokenType.MODEL, 268 TokenType.DICTIONARY, 269 } 270 271 CREATABLES = { 272 TokenType.COLUMN, 273 TokenType.CONSTRAINT, 274 TokenType.FUNCTION, 275 TokenType.INDEX, 276 TokenType.PROCEDURE, 277 TokenType.FOREIGN_KEY, 278 *DB_CREATABLES, 279 } 280 281 # Tokens that can represent identifiers 282 ID_VAR_TOKENS = { 283 TokenType.VAR, 284 TokenType.ANTI, 285 TokenType.APPLY, 286 TokenType.ASC, 287 TokenType.AUTO_INCREMENT, 288 TokenType.BEGIN, 289 TokenType.CACHE, 290 TokenType.CASE, 291 TokenType.COLLATE, 292 TokenType.COMMAND, 293 TokenType.COMMENT, 294 TokenType.COMMIT, 295 TokenType.CONSTRAINT, 296 TokenType.DEFAULT, 297 TokenType.DELETE, 298 TokenType.DESC, 299 TokenType.DESCRIBE, 300 TokenType.DICTIONARY, 301 TokenType.DIV, 302 TokenType.END, 303 TokenType.EXECUTE, 304 TokenType.ESCAPE, 305 TokenType.FALSE, 306 TokenType.FIRST, 307 TokenType.FILTER, 308 TokenType.FORMAT, 309 TokenType.FULL, 310 TokenType.IS, 311 TokenType.ISNULL, 312 TokenType.INTERVAL, 313 TokenType.KEEP, 314 TokenType.KILL, 315 TokenType.LEFT, 316 TokenType.LOAD, 317 TokenType.MERGE, 318 TokenType.NATURAL, 319 TokenType.NEXT, 320 TokenType.OFFSET, 321 TokenType.OPERATOR, 322 TokenType.ORDINALITY, 323 TokenType.OVERLAPS, 324 TokenType.OVERWRITE, 325 TokenType.PARTITION, 326 TokenType.PERCENT, 327 TokenType.PIVOT, 328 TokenType.PRAGMA, 329 TokenType.RANGE, 330 TokenType.RECURSIVE, 331 TokenType.REFERENCES, 332 TokenType.REFRESH, 333 TokenType.REPLACE, 334 TokenType.RIGHT, 335 TokenType.ROW, 336 TokenType.ROWS, 337 TokenType.SEMI, 338 TokenType.SET, 339 TokenType.SETTINGS, 340 TokenType.SHOW, 341 TokenType.TEMPORARY, 342 TokenType.TOP, 343 TokenType.TRUE, 344 TokenType.UNIQUE, 345 TokenType.UNPIVOT, 346 TokenType.UPDATE, 347 TokenType.USE, 348 TokenType.VOLATILE, 349 TokenType.WINDOW, 350 *CREATABLES, 351 *SUBQUERY_PREDICATES, 352 *TYPE_TOKENS, 353 *NO_PAREN_FUNCTIONS, 354 } 355 356 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 357 358 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 359 TokenType.ANTI, 360 TokenType.APPLY, 361 TokenType.ASOF, 362 TokenType.FULL, 363 TokenType.LEFT, 364 TokenType.LOCK, 365 TokenType.NATURAL, 366 TokenType.OFFSET, 367 TokenType.RIGHT, 368 TokenType.SEMI, 369 TokenType.WINDOW, 370 } 371 372 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 373 374 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 375 376 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 377 378 FUNC_TOKENS = { 379 TokenType.COLLATE, 380 TokenType.COMMAND, 381 TokenType.CURRENT_DATE, 382 TokenType.CURRENT_DATETIME, 383 TokenType.CURRENT_TIMESTAMP, 384 TokenType.CURRENT_TIME, 385 TokenType.CURRENT_USER, 386 TokenType.FILTER, 387 TokenType.FIRST, 388 TokenType.FORMAT, 389 TokenType.GLOB, 390 TokenType.IDENTIFIER, 391 TokenType.INDEX, 392 TokenType.ISNULL, 393 TokenType.ILIKE, 394 TokenType.INSERT, 395 TokenType.LIKE, 396 TokenType.MERGE, 397 TokenType.OFFSET, 398 TokenType.PRIMARY_KEY, 399 TokenType.RANGE, 400 TokenType.REPLACE, 401 TokenType.RLIKE, 402 TokenType.ROW, 403 TokenType.UNNEST, 404 TokenType.VAR, 405 TokenType.LEFT, 406 TokenType.RIGHT, 407 TokenType.DATE, 408 TokenType.DATETIME, 409 TokenType.TABLE, 410 TokenType.TIMESTAMP, 411 TokenType.TIMESTAMPTZ, 412 TokenType.WINDOW, 413 TokenType.XOR, 414 *TYPE_TOKENS, 415 *SUBQUERY_PREDICATES, 416 } 417 418 CONJUNCTION = { 419 TokenType.AND: exp.And, 420 TokenType.OR: exp.Or, 421 } 422 423 EQUALITY = { 424 TokenType.COLON_EQ: exp.PropertyEQ, 425 TokenType.EQ: exp.EQ, 426 TokenType.NEQ: exp.NEQ, 427 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 428 } 429 430 COMPARISON = { 431 TokenType.GT: exp.GT, 432 TokenType.GTE: exp.GTE, 433 TokenType.LT: exp.LT, 434 TokenType.LTE: exp.LTE, 435 } 436 437 BITWISE = { 438 TokenType.AMP: exp.BitwiseAnd, 439 TokenType.CARET: exp.BitwiseXor, 440 TokenType.PIPE: exp.BitwiseOr, 441 } 442 443 TERM = { 444 TokenType.DASH: exp.Sub, 445 TokenType.PLUS: exp.Add, 446 TokenType.MOD: exp.Mod, 447 TokenType.COLLATE: exp.Collate, 448 } 449 450 FACTOR = { 451 TokenType.DIV: exp.IntDiv, 452 TokenType.LR_ARROW: exp.Distance, 453 TokenType.SLASH: exp.Div, 454 TokenType.STAR: exp.Mul, 455 } 456 457 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 458 459 TIMES = { 460 TokenType.TIME, 461 TokenType.TIMETZ, 462 } 463 464 TIMESTAMPS = { 465 TokenType.TIMESTAMP, 466 TokenType.TIMESTAMPTZ, 467 TokenType.TIMESTAMPLTZ, 468 *TIMES, 469 } 470 471 SET_OPERATIONS = { 472 TokenType.UNION, 473 TokenType.INTERSECT, 474 TokenType.EXCEPT, 475 } 476 477 JOIN_METHODS = { 478 TokenType.NATURAL, 479 TokenType.ASOF, 480 } 481 482 JOIN_SIDES = { 483 TokenType.LEFT, 484 TokenType.RIGHT, 485 TokenType.FULL, 486 } 487 488 JOIN_KINDS = { 489 TokenType.INNER, 490 TokenType.OUTER, 491 TokenType.CROSS, 492 TokenType.SEMI, 493 TokenType.ANTI, 494 } 495 496 JOIN_HINTS: t.Set[str] = set() 497 498 LAMBDAS = { 499 TokenType.ARROW: lambda self, expressions: self.expression( 500 exp.Lambda, 501 this=self._replace_lambda( 502 self._parse_conjunction(), 503 {node.name for node in expressions}, 504 ), 505 expressions=expressions, 506 ), 507 TokenType.FARROW: lambda self, expressions: self.expression( 508 exp.Kwarg, 509 this=exp.var(expressions[0].name), 510 expression=self._parse_conjunction(), 511 ), 512 } 513 514 COLUMN_OPERATORS = { 515 TokenType.DOT: None, 516 TokenType.DCOLON: lambda self, this, to: self.expression( 517 exp.Cast if self.STRICT_CAST else exp.TryCast, 518 this=this, 519 to=to, 520 ), 521 TokenType.ARROW: lambda self, this, path: self.expression( 522 exp.JSONExtract, 523 this=this, 524 expression=path, 525 ), 526 TokenType.DARROW: lambda self, this, path: self.expression( 527 exp.JSONExtractScalar, 528 this=this, 529 expression=path, 530 ), 531 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 532 exp.JSONBExtract, 533 this=this, 534 expression=path, 535 ), 536 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 537 exp.JSONBExtractScalar, 538 this=this, 539 expression=path, 540 ), 541 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 542 exp.JSONBContains, 543 this=this, 544 expression=key, 545 ), 546 } 547 548 EXPRESSION_PARSERS = { 549 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 550 exp.Column: lambda self: self._parse_column(), 551 exp.Condition: lambda self: self._parse_conjunction(), 552 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 553 exp.Expression: lambda self: self._parse_statement(), 554 exp.From: lambda self: self._parse_from(), 555 exp.Group: lambda self: self._parse_group(), 556 exp.Having: lambda self: self._parse_having(), 557 exp.Identifier: lambda self: self._parse_id_var(), 558 exp.Join: lambda self: self._parse_join(), 559 exp.Lambda: lambda self: self._parse_lambda(), 560 exp.Lateral: lambda self: self._parse_lateral(), 561 exp.Limit: lambda self: self._parse_limit(), 562 exp.Offset: lambda self: self._parse_offset(), 563 exp.Order: lambda self: self._parse_order(), 564 exp.Ordered: lambda self: self._parse_ordered(), 565 exp.Properties: lambda self: self._parse_properties(), 566 exp.Qualify: lambda self: self._parse_qualify(), 567 exp.Returning: lambda self: self._parse_returning(), 568 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 569 exp.Table: lambda self: self._parse_table_parts(), 570 exp.TableAlias: lambda self: self._parse_table_alias(), 571 exp.Where: lambda self: self._parse_where(), 572 exp.Window: lambda self: self._parse_named_window(), 573 exp.With: lambda self: self._parse_with(), 574 "JOIN_TYPE": lambda self: self._parse_join_parts(), 575 } 576 577 STATEMENT_PARSERS = { 578 TokenType.ALTER: lambda self: self._parse_alter(), 579 TokenType.BEGIN: lambda self: self._parse_transaction(), 580 TokenType.CACHE: lambda self: self._parse_cache(), 581 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 582 TokenType.COMMENT: lambda self: self._parse_comment(), 583 TokenType.CREATE: lambda self: self._parse_create(), 584 TokenType.DELETE: lambda self: self._parse_delete(), 585 TokenType.DESC: lambda self: self._parse_describe(), 586 TokenType.DESCRIBE: lambda self: self._parse_describe(), 587 TokenType.DROP: lambda self: self._parse_drop(), 588 TokenType.INSERT: lambda self: self._parse_insert(), 589 TokenType.KILL: lambda self: self._parse_kill(), 590 TokenType.LOAD: lambda self: self._parse_load(), 591 TokenType.MERGE: lambda self: self._parse_merge(), 592 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 593 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 594 TokenType.REFRESH: lambda self: self._parse_refresh(), 595 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 596 TokenType.SET: lambda self: self._parse_set(), 597 TokenType.UNCACHE: lambda self: self._parse_uncache(), 598 TokenType.UPDATE: lambda self: self._parse_update(), 599 TokenType.USE: lambda self: self.expression( 600 exp.Use, 601 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 602 and exp.var(self._prev.text), 603 this=self._parse_table(schema=False), 604 ), 605 } 606 607 UNARY_PARSERS = { 608 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 609 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 610 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 611 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 612 } 613 614 PRIMARY_PARSERS = { 615 TokenType.STRING: lambda self, token: self.expression( 616 exp.Literal, this=token.text, is_string=True 617 ), 618 TokenType.NUMBER: lambda self, token: self.expression( 619 exp.Literal, this=token.text, is_string=False 620 ), 621 TokenType.STAR: lambda self, _: self.expression( 622 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 623 ), 624 TokenType.NULL: lambda self, _: self.expression(exp.Null), 625 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 626 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 627 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 628 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 629 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 630 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 631 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 632 exp.National, this=token.text 633 ), 634 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 635 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 636 exp.RawString, this=token.text 637 ), 638 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 639 } 640 641 PLACEHOLDER_PARSERS = { 642 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 643 TokenType.PARAMETER: lambda self: self._parse_parameter(), 644 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 645 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 646 else None, 647 } 648 649 RANGE_PARSERS = { 650 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 651 TokenType.GLOB: binary_range_parser(exp.Glob), 652 TokenType.ILIKE: binary_range_parser(exp.ILike), 653 TokenType.IN: lambda self, this: self._parse_in(this), 654 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 655 TokenType.IS: lambda self, this: self._parse_is(this), 656 TokenType.LIKE: binary_range_parser(exp.Like), 657 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 658 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 659 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 660 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 661 } 662 663 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 664 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 665 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 666 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 667 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 668 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 669 "CHECKSUM": lambda self: self._parse_checksum(), 670 "CLUSTER BY": lambda self: self._parse_cluster(), 671 "CLUSTERED": lambda self: self._parse_clustered_by(), 672 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 673 exp.CollateProperty, **kwargs 674 ), 675 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 676 "COPY": lambda self: self._parse_copy_property(), 677 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 678 "DEFINER": lambda self: self._parse_definer(), 679 "DETERMINISTIC": lambda self: self.expression( 680 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 681 ), 682 "DISTKEY": lambda self: self._parse_distkey(), 683 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 684 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 685 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 686 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 687 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 688 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 689 "FREESPACE": lambda self: self._parse_freespace(), 690 "HEAP": lambda self: self.expression(exp.HeapProperty), 691 "IMMUTABLE": lambda self: self.expression( 692 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 693 ), 694 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 695 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 696 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 697 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 698 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 699 "LIKE": lambda self: self._parse_create_like(), 700 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 701 "LOCK": lambda self: self._parse_locking(), 702 "LOCKING": lambda self: self._parse_locking(), 703 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 704 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 705 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 706 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 707 "NO": lambda self: self._parse_no_property(), 708 "ON": lambda self: self._parse_on_property(), 709 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 710 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 711 "PARTITION": lambda self: self._parse_partitioned_of(), 712 "PARTITION BY": lambda self: self._parse_partitioned_by(), 713 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 714 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 715 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 716 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 717 "REMOTE": lambda self: self._parse_remote_with_connection(), 718 "RETURNS": lambda self: self._parse_returns(), 719 "ROW": lambda self: self._parse_row(), 720 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 721 "SAMPLE": lambda self: self.expression( 722 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 723 ), 724 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 725 "SETTINGS": lambda self: self.expression( 726 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 727 ), 728 "SORTKEY": lambda self: self._parse_sortkey(), 729 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 730 "STABLE": lambda self: self.expression( 731 exp.StabilityProperty, this=exp.Literal.string("STABLE") 732 ), 733 "STORED": lambda self: self._parse_stored(), 734 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 735 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 736 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 737 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 738 "TO": lambda self: self._parse_to_table(), 739 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 740 "TRANSFORM": lambda self: self.expression( 741 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 742 ), 743 "TTL": lambda self: self._parse_ttl(), 744 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 745 "VOLATILE": lambda self: self._parse_volatile_property(), 746 "WITH": lambda self: self._parse_with_property(), 747 } 748 749 CONSTRAINT_PARSERS = { 750 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 751 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 752 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 753 "CHARACTER SET": lambda self: self.expression( 754 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 755 ), 756 "CHECK": lambda self: self.expression( 757 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 758 ), 759 "COLLATE": lambda self: self.expression( 760 exp.CollateColumnConstraint, this=self._parse_var() 761 ), 762 "COMMENT": lambda self: self.expression( 763 exp.CommentColumnConstraint, this=self._parse_string() 764 ), 765 "COMPRESS": lambda self: self._parse_compress(), 766 "CLUSTERED": lambda self: self.expression( 767 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 768 ), 769 "NONCLUSTERED": lambda self: self.expression( 770 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 771 ), 772 "DEFAULT": lambda self: self.expression( 773 exp.DefaultColumnConstraint, this=self._parse_bitwise() 774 ), 775 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 776 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 777 "FORMAT": lambda self: self.expression( 778 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 779 ), 780 "GENERATED": lambda self: self._parse_generated_as_identity(), 781 "IDENTITY": lambda self: self._parse_auto_increment(), 782 "INLINE": lambda self: self._parse_inline(), 783 "LIKE": lambda self: self._parse_create_like(), 784 "NOT": lambda self: self._parse_not_constraint(), 785 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 786 "ON": lambda self: ( 787 self._match(TokenType.UPDATE) 788 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 789 ) 790 or self.expression(exp.OnProperty, this=self._parse_id_var()), 791 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 792 "PERIOD": lambda self: self._parse_period_for_system_time(), 793 "PRIMARY KEY": lambda self: self._parse_primary_key(), 794 "REFERENCES": lambda self: self._parse_references(match=False), 795 "TITLE": lambda self: self.expression( 796 exp.TitleColumnConstraint, this=self._parse_var_or_string() 797 ), 798 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 799 "UNIQUE": lambda self: self._parse_unique(), 800 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 801 "WITH": lambda self: self.expression( 802 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 803 ), 804 } 805 806 ALTER_PARSERS = { 807 "ADD": lambda self: self._parse_alter_table_add(), 808 "ALTER": lambda self: self._parse_alter_table_alter(), 809 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 810 "DROP": lambda self: self._parse_alter_table_drop(), 811 "RENAME": lambda self: self._parse_alter_table_rename(), 812 } 813 814 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 815 816 NO_PAREN_FUNCTION_PARSERS = { 817 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 818 "CASE": lambda self: self._parse_case(), 819 "IF": lambda self: self._parse_if(), 820 "NEXT": lambda self: self._parse_next_value_for(), 821 } 822 823 INVALID_FUNC_NAME_TOKENS = { 824 TokenType.IDENTIFIER, 825 TokenType.STRING, 826 } 827 828 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 829 830 FUNCTION_PARSERS = { 831 "ANY_VALUE": lambda self: self._parse_any_value(), 832 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 833 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 834 "DECODE": lambda self: self._parse_decode(), 835 "EXTRACT": lambda self: self._parse_extract(), 836 "JSON_OBJECT": lambda self: self._parse_json_object(), 837 "JSON_TABLE": lambda self: self._parse_json_table(), 838 "MATCH": lambda self: self._parse_match_against(), 839 "OPENJSON": lambda self: self._parse_open_json(), 840 "POSITION": lambda self: self._parse_position(), 841 "PREDICT": lambda self: self._parse_predict(), 842 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 843 "STRING_AGG": lambda self: self._parse_string_agg(), 844 "SUBSTRING": lambda self: self._parse_substring(), 845 "TRIM": lambda self: self._parse_trim(), 846 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 847 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 848 } 849 850 QUERY_MODIFIER_PARSERS = { 851 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 852 TokenType.WHERE: lambda self: ("where", self._parse_where()), 853 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 854 TokenType.HAVING: lambda self: ("having", self._parse_having()), 855 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 856 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 857 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 858 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 859 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 860 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 861 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 862 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 863 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 864 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 865 TokenType.CLUSTER_BY: lambda self: ( 866 "cluster", 867 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 868 ), 869 TokenType.DISTRIBUTE_BY: lambda self: ( 870 "distribute", 871 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 872 ), 873 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 874 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 875 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 876 } 877 878 SET_PARSERS = { 879 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 880 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 881 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 882 "TRANSACTION": lambda self: self._parse_set_transaction(), 883 } 884 885 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 886 887 TYPE_LITERAL_PARSERS = { 888 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 889 } 890 891 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 892 893 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 894 895 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 896 897 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 898 TRANSACTION_CHARACTERISTICS = { 899 "ISOLATION LEVEL REPEATABLE READ", 900 "ISOLATION LEVEL READ COMMITTED", 901 "ISOLATION LEVEL READ UNCOMMITTED", 902 "ISOLATION LEVEL SERIALIZABLE", 903 "READ WRITE", 904 "READ ONLY", 905 } 906 907 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 908 909 CLONE_KEYWORDS = {"CLONE", "COPY"} 910 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 911 912 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 913 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 914 915 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 916 917 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 918 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 919 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 920 921 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 922 923 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 924 925 DISTINCT_TOKENS = {TokenType.DISTINCT} 926 927 NULL_TOKENS = {TokenType.NULL} 928 929 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 930 931 STRICT_CAST = True 932 933 PREFIXED_PIVOT_COLUMNS = False 934 IDENTIFY_PIVOT_STRINGS = False 935 936 LOG_DEFAULTS_TO_LN = False 937 938 # Whether or not ADD is present for each column added by ALTER TABLE 939 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 940 941 # Whether or not the table sample clause expects CSV syntax 942 TABLESAMPLE_CSV = False 943 944 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 945 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 946 947 # Whether the TRIM function expects the characters to trim as its first argument 948 TRIM_PATTERN_FIRST = False 949 950 __slots__ = ( 951 "error_level", 952 "error_message_context", 953 "max_errors", 954 "dialect", 955 "sql", 956 "errors", 957 "_tokens", 958 "_index", 959 "_curr", 960 "_next", 961 "_prev", 962 "_prev_comments", 963 ) 964 965 # Autofilled 966 SHOW_TRIE: t.Dict = {} 967 SET_TRIE: t.Dict = {} 968 969 def __init__( 970 self, 971 error_level: t.Optional[ErrorLevel] = None, 972 error_message_context: int = 100, 973 max_errors: int = 3, 974 dialect: DialectType = None, 975 ): 976 from sqlglot.dialects import Dialect 977 978 self.error_level = error_level or ErrorLevel.IMMEDIATE 979 self.error_message_context = error_message_context 980 self.max_errors = max_errors 981 self.dialect = Dialect.get_or_raise(dialect) 982 self.reset() 983 984 def reset(self): 985 self.sql = "" 986 self.errors = [] 987 self._tokens = [] 988 self._index = 0 989 self._curr = None 990 self._next = None 991 self._prev = None 992 self._prev_comments = None 993 994 def parse( 995 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 996 ) -> t.List[t.Optional[exp.Expression]]: 997 """ 998 Parses a list of tokens and returns a list of syntax trees, one tree 999 per parsed SQL statement. 1000 1001 Args: 1002 raw_tokens: The list of tokens. 1003 sql: The original SQL string, used to produce helpful debug messages. 1004 1005 Returns: 1006 The list of the produced syntax trees. 1007 """ 1008 return self._parse( 1009 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1010 ) 1011 1012 def parse_into( 1013 self, 1014 expression_types: exp.IntoType, 1015 raw_tokens: t.List[Token], 1016 sql: t.Optional[str] = None, 1017 ) -> t.List[t.Optional[exp.Expression]]: 1018 """ 1019 Parses a list of tokens into a given Expression type. If a collection of Expression 1020 types is given instead, this method will try to parse the token list into each one 1021 of them, stopping at the first for which the parsing succeeds. 1022 1023 Args: 1024 expression_types: The expression type(s) to try and parse the token list into. 1025 raw_tokens: The list of tokens. 1026 sql: The original SQL string, used to produce helpful debug messages. 1027 1028 Returns: 1029 The target Expression. 1030 """ 1031 errors = [] 1032 for expression_type in ensure_list(expression_types): 1033 parser = self.EXPRESSION_PARSERS.get(expression_type) 1034 if not parser: 1035 raise TypeError(f"No parser registered for {expression_type}") 1036 1037 try: 1038 return self._parse(parser, raw_tokens, sql) 1039 except ParseError as e: 1040 e.errors[0]["into_expression"] = expression_type 1041 errors.append(e) 1042 1043 raise ParseError( 1044 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1045 errors=merge_errors(errors), 1046 ) from errors[-1] 1047 1048 def _parse( 1049 self, 1050 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1051 raw_tokens: t.List[Token], 1052 sql: t.Optional[str] = None, 1053 ) -> t.List[t.Optional[exp.Expression]]: 1054 self.reset() 1055 self.sql = sql or "" 1056 1057 total = len(raw_tokens) 1058 chunks: t.List[t.List[Token]] = [[]] 1059 1060 for i, token in enumerate(raw_tokens): 1061 if token.token_type == TokenType.SEMICOLON: 1062 if i < total - 1: 1063 chunks.append([]) 1064 else: 1065 chunks[-1].append(token) 1066 1067 expressions = [] 1068 1069 for tokens in chunks: 1070 self._index = -1 1071 self._tokens = tokens 1072 self._advance() 1073 1074 expressions.append(parse_method(self)) 1075 1076 if self._index < len(self._tokens): 1077 self.raise_error("Invalid expression / Unexpected token") 1078 1079 self.check_errors() 1080 1081 return expressions 1082 1083 def check_errors(self) -> None: 1084 """Logs or raises any found errors, depending on the chosen error level setting.""" 1085 if self.error_level == ErrorLevel.WARN: 1086 for error in self.errors: 1087 logger.error(str(error)) 1088 elif self.error_level == ErrorLevel.RAISE and self.errors: 1089 raise ParseError( 1090 concat_messages(self.errors, self.max_errors), 1091 errors=merge_errors(self.errors), 1092 ) 1093 1094 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1095 """ 1096 Appends an error in the list of recorded errors or raises it, depending on the chosen 1097 error level setting. 1098 """ 1099 token = token or self._curr or self._prev or Token.string("") 1100 start = token.start 1101 end = token.end + 1 1102 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1103 highlight = self.sql[start:end] 1104 end_context = self.sql[end : end + self.error_message_context] 1105 1106 error = ParseError.new( 1107 f"{message}. Line {token.line}, Col: {token.col}.\n" 1108 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1109 description=message, 1110 line=token.line, 1111 col=token.col, 1112 start_context=start_context, 1113 highlight=highlight, 1114 end_context=end_context, 1115 ) 1116 1117 if self.error_level == ErrorLevel.IMMEDIATE: 1118 raise error 1119 1120 self.errors.append(error) 1121 1122 def expression( 1123 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1124 ) -> E: 1125 """ 1126 Creates a new, validated Expression. 1127 1128 Args: 1129 exp_class: The expression class to instantiate. 1130 comments: An optional list of comments to attach to the expression. 1131 kwargs: The arguments to set for the expression along with their respective values. 1132 1133 Returns: 1134 The target expression. 1135 """ 1136 instance = exp_class(**kwargs) 1137 instance.add_comments(comments) if comments else self._add_comments(instance) 1138 return self.validate_expression(instance) 1139 1140 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1141 if expression and self._prev_comments: 1142 expression.add_comments(self._prev_comments) 1143 self._prev_comments = None 1144 1145 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1146 """ 1147 Validates an Expression, making sure that all its mandatory arguments are set. 1148 1149 Args: 1150 expression: The expression to validate. 1151 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1152 1153 Returns: 1154 The validated expression. 1155 """ 1156 if self.error_level != ErrorLevel.IGNORE: 1157 for error_message in expression.error_messages(args): 1158 self.raise_error(error_message) 1159 1160 return expression 1161 1162 def _find_sql(self, start: Token, end: Token) -> str: 1163 return self.sql[start.start : end.end + 1] 1164 1165 def _advance(self, times: int = 1) -> None: 1166 self._index += times 1167 self._curr = seq_get(self._tokens, self._index) 1168 self._next = seq_get(self._tokens, self._index + 1) 1169 1170 if self._index > 0: 1171 self._prev = self._tokens[self._index - 1] 1172 self._prev_comments = self._prev.comments 1173 else: 1174 self._prev = None 1175 self._prev_comments = None 1176 1177 def _retreat(self, index: int) -> None: 1178 if index != self._index: 1179 self._advance(index - self._index) 1180 1181 def _parse_command(self) -> exp.Command: 1182 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1183 1184 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1185 start = self._prev 1186 exists = self._parse_exists() if allow_exists else None 1187 1188 self._match(TokenType.ON) 1189 1190 kind = self._match_set(self.CREATABLES) and self._prev 1191 if not kind: 1192 return self._parse_as_command(start) 1193 1194 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1195 this = self._parse_user_defined_function(kind=kind.token_type) 1196 elif kind.token_type == TokenType.TABLE: 1197 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1198 elif kind.token_type == TokenType.COLUMN: 1199 this = self._parse_column() 1200 else: 1201 this = self._parse_id_var() 1202 1203 self._match(TokenType.IS) 1204 1205 return self.expression( 1206 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1207 ) 1208 1209 def _parse_to_table( 1210 self, 1211 ) -> exp.ToTableProperty: 1212 table = self._parse_table_parts(schema=True) 1213 return self.expression(exp.ToTableProperty, this=table) 1214 1215 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1216 def _parse_ttl(self) -> exp.Expression: 1217 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1218 this = self._parse_bitwise() 1219 1220 if self._match_text_seq("DELETE"): 1221 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1222 if self._match_text_seq("RECOMPRESS"): 1223 return self.expression( 1224 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1225 ) 1226 if self._match_text_seq("TO", "DISK"): 1227 return self.expression( 1228 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1229 ) 1230 if self._match_text_seq("TO", "VOLUME"): 1231 return self.expression( 1232 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1233 ) 1234 1235 return this 1236 1237 expressions = self._parse_csv(_parse_ttl_action) 1238 where = self._parse_where() 1239 group = self._parse_group() 1240 1241 aggregates = None 1242 if group and self._match(TokenType.SET): 1243 aggregates = self._parse_csv(self._parse_set_item) 1244 1245 return self.expression( 1246 exp.MergeTreeTTL, 1247 expressions=expressions, 1248 where=where, 1249 group=group, 1250 aggregates=aggregates, 1251 ) 1252 1253 def _parse_statement(self) -> t.Optional[exp.Expression]: 1254 if self._curr is None: 1255 return None 1256 1257 if self._match_set(self.STATEMENT_PARSERS): 1258 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1259 1260 if self._match_set(Tokenizer.COMMANDS): 1261 return self._parse_command() 1262 1263 expression = self._parse_expression() 1264 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1265 return self._parse_query_modifiers(expression) 1266 1267 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1268 start = self._prev 1269 temporary = self._match(TokenType.TEMPORARY) 1270 materialized = self._match_text_seq("MATERIALIZED") 1271 1272 kind = self._match_set(self.CREATABLES) and self._prev.text 1273 if not kind: 1274 return self._parse_as_command(start) 1275 1276 return self.expression( 1277 exp.Drop, 1278 comments=start.comments, 1279 exists=exists or self._parse_exists(), 1280 this=self._parse_table(schema=True), 1281 kind=kind, 1282 temporary=temporary, 1283 materialized=materialized, 1284 cascade=self._match_text_seq("CASCADE"), 1285 constraints=self._match_text_seq("CONSTRAINTS"), 1286 purge=self._match_text_seq("PURGE"), 1287 ) 1288 1289 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1290 return ( 1291 self._match_text_seq("IF") 1292 and (not not_ or self._match(TokenType.NOT)) 1293 and self._match(TokenType.EXISTS) 1294 ) 1295 1296 def _parse_create(self) -> exp.Create | exp.Command: 1297 # Note: this can't be None because we've matched a statement parser 1298 start = self._prev 1299 comments = self._prev_comments 1300 1301 replace = start.text.upper() == "REPLACE" or self._match_pair( 1302 TokenType.OR, TokenType.REPLACE 1303 ) 1304 unique = self._match(TokenType.UNIQUE) 1305 1306 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1307 self._advance() 1308 1309 properties = None 1310 create_token = self._match_set(self.CREATABLES) and self._prev 1311 1312 if not create_token: 1313 # exp.Properties.Location.POST_CREATE 1314 properties = self._parse_properties() 1315 create_token = self._match_set(self.CREATABLES) and self._prev 1316 1317 if not properties or not create_token: 1318 return self._parse_as_command(start) 1319 1320 exists = self._parse_exists(not_=True) 1321 this = None 1322 expression: t.Optional[exp.Expression] = None 1323 indexes = None 1324 no_schema_binding = None 1325 begin = None 1326 end = None 1327 clone = None 1328 1329 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1330 nonlocal properties 1331 if properties and temp_props: 1332 properties.expressions.extend(temp_props.expressions) 1333 elif temp_props: 1334 properties = temp_props 1335 1336 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1337 this = self._parse_user_defined_function(kind=create_token.token_type) 1338 1339 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1340 extend_props(self._parse_properties()) 1341 1342 self._match(TokenType.ALIAS) 1343 1344 if self._match(TokenType.COMMAND): 1345 expression = self._parse_as_command(self._prev) 1346 else: 1347 begin = self._match(TokenType.BEGIN) 1348 return_ = self._match_text_seq("RETURN") 1349 1350 if self._match(TokenType.STRING, advance=False): 1351 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1352 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1353 expression = self._parse_string() 1354 extend_props(self._parse_properties()) 1355 else: 1356 expression = self._parse_statement() 1357 1358 end = self._match_text_seq("END") 1359 1360 if return_: 1361 expression = self.expression(exp.Return, this=expression) 1362 elif create_token.token_type == TokenType.INDEX: 1363 this = self._parse_index(index=self._parse_id_var()) 1364 elif create_token.token_type in self.DB_CREATABLES: 1365 table_parts = self._parse_table_parts(schema=True) 1366 1367 # exp.Properties.Location.POST_NAME 1368 self._match(TokenType.COMMA) 1369 extend_props(self._parse_properties(before=True)) 1370 1371 this = self._parse_schema(this=table_parts) 1372 1373 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1374 extend_props(self._parse_properties()) 1375 1376 self._match(TokenType.ALIAS) 1377 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1378 # exp.Properties.Location.POST_ALIAS 1379 extend_props(self._parse_properties()) 1380 1381 expression = self._parse_ddl_select() 1382 1383 if create_token.token_type == TokenType.TABLE: 1384 # exp.Properties.Location.POST_EXPRESSION 1385 extend_props(self._parse_properties()) 1386 1387 indexes = [] 1388 while True: 1389 index = self._parse_index() 1390 1391 # exp.Properties.Location.POST_INDEX 1392 extend_props(self._parse_properties()) 1393 1394 if not index: 1395 break 1396 else: 1397 self._match(TokenType.COMMA) 1398 indexes.append(index) 1399 elif create_token.token_type == TokenType.VIEW: 1400 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1401 no_schema_binding = True 1402 1403 shallow = self._match_text_seq("SHALLOW") 1404 1405 if self._match_texts(self.CLONE_KEYWORDS): 1406 copy = self._prev.text.lower() == "copy" 1407 clone = self._parse_table(schema=True) 1408 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1409 clone_kind = ( 1410 self._match(TokenType.L_PAREN) 1411 and self._match_texts(self.CLONE_KINDS) 1412 and self._prev.text.upper() 1413 ) 1414 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1415 self._match(TokenType.R_PAREN) 1416 clone = self.expression( 1417 exp.Clone, 1418 this=clone, 1419 when=when, 1420 kind=clone_kind, 1421 shallow=shallow, 1422 expression=clone_expression, 1423 copy=copy, 1424 ) 1425 1426 return self.expression( 1427 exp.Create, 1428 comments=comments, 1429 this=this, 1430 kind=create_token.text, 1431 replace=replace, 1432 unique=unique, 1433 expression=expression, 1434 exists=exists, 1435 properties=properties, 1436 indexes=indexes, 1437 no_schema_binding=no_schema_binding, 1438 begin=begin, 1439 end=end, 1440 clone=clone, 1441 ) 1442 1443 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1444 # only used for teradata currently 1445 self._match(TokenType.COMMA) 1446 1447 kwargs = { 1448 "no": self._match_text_seq("NO"), 1449 "dual": self._match_text_seq("DUAL"), 1450 "before": self._match_text_seq("BEFORE"), 1451 "default": self._match_text_seq("DEFAULT"), 1452 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1453 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1454 "after": self._match_text_seq("AFTER"), 1455 "minimum": self._match_texts(("MIN", "MINIMUM")), 1456 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1457 } 1458 1459 if self._match_texts(self.PROPERTY_PARSERS): 1460 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1461 try: 1462 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1463 except TypeError: 1464 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1465 1466 return None 1467 1468 def _parse_property(self) -> t.Optional[exp.Expression]: 1469 if self._match_texts(self.PROPERTY_PARSERS): 1470 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1471 1472 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1473 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1474 1475 if self._match_text_seq("COMPOUND", "SORTKEY"): 1476 return self._parse_sortkey(compound=True) 1477 1478 if self._match_text_seq("SQL", "SECURITY"): 1479 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1480 1481 index = self._index 1482 key = self._parse_column() 1483 1484 if not self._match(TokenType.EQ): 1485 self._retreat(index) 1486 return None 1487 1488 return self.expression( 1489 exp.Property, 1490 this=key.to_dot() if isinstance(key, exp.Column) else key, 1491 value=self._parse_column() or self._parse_var(any_token=True), 1492 ) 1493 1494 def _parse_stored(self) -> exp.FileFormatProperty: 1495 self._match(TokenType.ALIAS) 1496 1497 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1498 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1499 1500 return self.expression( 1501 exp.FileFormatProperty, 1502 this=self.expression( 1503 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1504 ) 1505 if input_format or output_format 1506 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1507 ) 1508 1509 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1510 self._match(TokenType.EQ) 1511 self._match(TokenType.ALIAS) 1512 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1513 1514 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1515 properties = [] 1516 while True: 1517 if before: 1518 prop = self._parse_property_before() 1519 else: 1520 prop = self._parse_property() 1521 1522 if not prop: 1523 break 1524 for p in ensure_list(prop): 1525 properties.append(p) 1526 1527 if properties: 1528 return self.expression(exp.Properties, expressions=properties) 1529 1530 return None 1531 1532 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1533 return self.expression( 1534 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1535 ) 1536 1537 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1538 if self._index >= 2: 1539 pre_volatile_token = self._tokens[self._index - 2] 1540 else: 1541 pre_volatile_token = None 1542 1543 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1544 return exp.VolatileProperty() 1545 1546 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1547 1548 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1549 self._match_pair(TokenType.EQ, TokenType.ON) 1550 1551 prop = self.expression(exp.WithSystemVersioningProperty) 1552 if self._match(TokenType.L_PAREN): 1553 self._match_text_seq("HISTORY_TABLE", "=") 1554 prop.set("this", self._parse_table_parts()) 1555 1556 if self._match(TokenType.COMMA): 1557 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1558 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1559 1560 self._match_r_paren() 1561 1562 return prop 1563 1564 def _parse_with_property( 1565 self, 1566 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1567 if self._match(TokenType.L_PAREN, advance=False): 1568 return self._parse_wrapped_csv(self._parse_property) 1569 1570 if self._match_text_seq("JOURNAL"): 1571 return self._parse_withjournaltable() 1572 1573 if self._match_text_seq("DATA"): 1574 return self._parse_withdata(no=False) 1575 elif self._match_text_seq("NO", "DATA"): 1576 return self._parse_withdata(no=True) 1577 1578 if not self._next: 1579 return None 1580 1581 return self._parse_withisolatedloading() 1582 1583 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1584 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1585 self._match(TokenType.EQ) 1586 1587 user = self._parse_id_var() 1588 self._match(TokenType.PARAMETER) 1589 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1590 1591 if not user or not host: 1592 return None 1593 1594 return exp.DefinerProperty(this=f"{user}@{host}") 1595 1596 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1597 self._match(TokenType.TABLE) 1598 self._match(TokenType.EQ) 1599 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1600 1601 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1602 return self.expression(exp.LogProperty, no=no) 1603 1604 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1605 return self.expression(exp.JournalProperty, **kwargs) 1606 1607 def _parse_checksum(self) -> exp.ChecksumProperty: 1608 self._match(TokenType.EQ) 1609 1610 on = None 1611 if self._match(TokenType.ON): 1612 on = True 1613 elif self._match_text_seq("OFF"): 1614 on = False 1615 1616 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1617 1618 def _parse_cluster(self) -> exp.Cluster: 1619 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1620 1621 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1622 self._match_text_seq("BY") 1623 1624 self._match_l_paren() 1625 expressions = self._parse_csv(self._parse_column) 1626 self._match_r_paren() 1627 1628 if self._match_text_seq("SORTED", "BY"): 1629 self._match_l_paren() 1630 sorted_by = self._parse_csv(self._parse_ordered) 1631 self._match_r_paren() 1632 else: 1633 sorted_by = None 1634 1635 self._match(TokenType.INTO) 1636 buckets = self._parse_number() 1637 self._match_text_seq("BUCKETS") 1638 1639 return self.expression( 1640 exp.ClusteredByProperty, 1641 expressions=expressions, 1642 sorted_by=sorted_by, 1643 buckets=buckets, 1644 ) 1645 1646 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1647 if not self._match_text_seq("GRANTS"): 1648 self._retreat(self._index - 1) 1649 return None 1650 1651 return self.expression(exp.CopyGrantsProperty) 1652 1653 def _parse_freespace(self) -> exp.FreespaceProperty: 1654 self._match(TokenType.EQ) 1655 return self.expression( 1656 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1657 ) 1658 1659 def _parse_mergeblockratio( 1660 self, no: bool = False, default: bool = False 1661 ) -> exp.MergeBlockRatioProperty: 1662 if self._match(TokenType.EQ): 1663 return self.expression( 1664 exp.MergeBlockRatioProperty, 1665 this=self._parse_number(), 1666 percent=self._match(TokenType.PERCENT), 1667 ) 1668 1669 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1670 1671 def _parse_datablocksize( 1672 self, 1673 default: t.Optional[bool] = None, 1674 minimum: t.Optional[bool] = None, 1675 maximum: t.Optional[bool] = None, 1676 ) -> exp.DataBlocksizeProperty: 1677 self._match(TokenType.EQ) 1678 size = self._parse_number() 1679 1680 units = None 1681 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1682 units = self._prev.text 1683 1684 return self.expression( 1685 exp.DataBlocksizeProperty, 1686 size=size, 1687 units=units, 1688 default=default, 1689 minimum=minimum, 1690 maximum=maximum, 1691 ) 1692 1693 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1694 self._match(TokenType.EQ) 1695 always = self._match_text_seq("ALWAYS") 1696 manual = self._match_text_seq("MANUAL") 1697 never = self._match_text_seq("NEVER") 1698 default = self._match_text_seq("DEFAULT") 1699 1700 autotemp = None 1701 if self._match_text_seq("AUTOTEMP"): 1702 autotemp = self._parse_schema() 1703 1704 return self.expression( 1705 exp.BlockCompressionProperty, 1706 always=always, 1707 manual=manual, 1708 never=never, 1709 default=default, 1710 autotemp=autotemp, 1711 ) 1712 1713 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1714 no = self._match_text_seq("NO") 1715 concurrent = self._match_text_seq("CONCURRENT") 1716 self._match_text_seq("ISOLATED", "LOADING") 1717 for_all = self._match_text_seq("FOR", "ALL") 1718 for_insert = self._match_text_seq("FOR", "INSERT") 1719 for_none = self._match_text_seq("FOR", "NONE") 1720 return self.expression( 1721 exp.IsolatedLoadingProperty, 1722 no=no, 1723 concurrent=concurrent, 1724 for_all=for_all, 1725 for_insert=for_insert, 1726 for_none=for_none, 1727 ) 1728 1729 def _parse_locking(self) -> exp.LockingProperty: 1730 if self._match(TokenType.TABLE): 1731 kind = "TABLE" 1732 elif self._match(TokenType.VIEW): 1733 kind = "VIEW" 1734 elif self._match(TokenType.ROW): 1735 kind = "ROW" 1736 elif self._match_text_seq("DATABASE"): 1737 kind = "DATABASE" 1738 else: 1739 kind = None 1740 1741 if kind in ("DATABASE", "TABLE", "VIEW"): 1742 this = self._parse_table_parts() 1743 else: 1744 this = None 1745 1746 if self._match(TokenType.FOR): 1747 for_or_in = "FOR" 1748 elif self._match(TokenType.IN): 1749 for_or_in = "IN" 1750 else: 1751 for_or_in = None 1752 1753 if self._match_text_seq("ACCESS"): 1754 lock_type = "ACCESS" 1755 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1756 lock_type = "EXCLUSIVE" 1757 elif self._match_text_seq("SHARE"): 1758 lock_type = "SHARE" 1759 elif self._match_text_seq("READ"): 1760 lock_type = "READ" 1761 elif self._match_text_seq("WRITE"): 1762 lock_type = "WRITE" 1763 elif self._match_text_seq("CHECKSUM"): 1764 lock_type = "CHECKSUM" 1765 else: 1766 lock_type = None 1767 1768 override = self._match_text_seq("OVERRIDE") 1769 1770 return self.expression( 1771 exp.LockingProperty, 1772 this=this, 1773 kind=kind, 1774 for_or_in=for_or_in, 1775 lock_type=lock_type, 1776 override=override, 1777 ) 1778 1779 def _parse_partition_by(self) -> t.List[exp.Expression]: 1780 if self._match(TokenType.PARTITION_BY): 1781 return self._parse_csv(self._parse_conjunction) 1782 return [] 1783 1784 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1785 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1786 if self._match_text_seq("MINVALUE"): 1787 return exp.var("MINVALUE") 1788 if self._match_text_seq("MAXVALUE"): 1789 return exp.var("MAXVALUE") 1790 return self._parse_bitwise() 1791 1792 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1793 expression = None 1794 from_expressions = None 1795 to_expressions = None 1796 1797 if self._match(TokenType.IN): 1798 this = self._parse_wrapped_csv(self._parse_bitwise) 1799 elif self._match(TokenType.FROM): 1800 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1801 self._match_text_seq("TO") 1802 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1803 elif self._match_text_seq("WITH", "(", "MODULUS"): 1804 this = self._parse_number() 1805 self._match_text_seq(",", "REMAINDER") 1806 expression = self._parse_number() 1807 self._match_r_paren() 1808 else: 1809 self.raise_error("Failed to parse partition bound spec.") 1810 1811 return self.expression( 1812 exp.PartitionBoundSpec, 1813 this=this, 1814 expression=expression, 1815 from_expressions=from_expressions, 1816 to_expressions=to_expressions, 1817 ) 1818 1819 # https://www.postgresql.org/docs/current/sql-createtable.html 1820 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1821 if not self._match_text_seq("OF"): 1822 self._retreat(self._index - 1) 1823 return None 1824 1825 this = self._parse_table(schema=True) 1826 1827 if self._match(TokenType.DEFAULT): 1828 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1829 elif self._match_text_seq("FOR", "VALUES"): 1830 expression = self._parse_partition_bound_spec() 1831 else: 1832 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1833 1834 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1835 1836 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1837 self._match(TokenType.EQ) 1838 return self.expression( 1839 exp.PartitionedByProperty, 1840 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1841 ) 1842 1843 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1844 if self._match_text_seq("AND", "STATISTICS"): 1845 statistics = True 1846 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1847 statistics = False 1848 else: 1849 statistics = None 1850 1851 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1852 1853 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1854 if self._match_text_seq("PRIMARY", "INDEX"): 1855 return exp.NoPrimaryIndexProperty() 1856 return None 1857 1858 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1859 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1860 return exp.OnCommitProperty() 1861 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1862 return exp.OnCommitProperty(delete=True) 1863 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1864 1865 def _parse_distkey(self) -> exp.DistKeyProperty: 1866 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1867 1868 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1869 table = self._parse_table(schema=True) 1870 1871 options = [] 1872 while self._match_texts(("INCLUDING", "EXCLUDING")): 1873 this = self._prev.text.upper() 1874 1875 id_var = self._parse_id_var() 1876 if not id_var: 1877 return None 1878 1879 options.append( 1880 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1881 ) 1882 1883 return self.expression(exp.LikeProperty, this=table, expressions=options) 1884 1885 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1886 return self.expression( 1887 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1888 ) 1889 1890 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1891 self._match(TokenType.EQ) 1892 return self.expression( 1893 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1894 ) 1895 1896 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1897 self._match_text_seq("WITH", "CONNECTION") 1898 return self.expression( 1899 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1900 ) 1901 1902 def _parse_returns(self) -> exp.ReturnsProperty: 1903 value: t.Optional[exp.Expression] 1904 is_table = self._match(TokenType.TABLE) 1905 1906 if is_table: 1907 if self._match(TokenType.LT): 1908 value = self.expression( 1909 exp.Schema, 1910 this="TABLE", 1911 expressions=self._parse_csv(self._parse_struct_types), 1912 ) 1913 if not self._match(TokenType.GT): 1914 self.raise_error("Expecting >") 1915 else: 1916 value = self._parse_schema(exp.var("TABLE")) 1917 else: 1918 value = self._parse_types() 1919 1920 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1921 1922 def _parse_describe(self) -> exp.Describe: 1923 kind = self._match_set(self.CREATABLES) and self._prev.text 1924 this = self._parse_table(schema=True) 1925 properties = self._parse_properties() 1926 expressions = properties.expressions if properties else None 1927 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1928 1929 def _parse_insert(self) -> exp.Insert: 1930 comments = ensure_list(self._prev_comments) 1931 overwrite = self._match(TokenType.OVERWRITE) 1932 ignore = self._match(TokenType.IGNORE) 1933 local = self._match_text_seq("LOCAL") 1934 alternative = None 1935 1936 if self._match_text_seq("DIRECTORY"): 1937 this: t.Optional[exp.Expression] = self.expression( 1938 exp.Directory, 1939 this=self._parse_var_or_string(), 1940 local=local, 1941 row_format=self._parse_row_format(match_row=True), 1942 ) 1943 else: 1944 if self._match(TokenType.OR): 1945 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1946 1947 self._match(TokenType.INTO) 1948 comments += ensure_list(self._prev_comments) 1949 self._match(TokenType.TABLE) 1950 this = self._parse_table(schema=True) 1951 1952 returning = self._parse_returning() 1953 1954 return self.expression( 1955 exp.Insert, 1956 comments=comments, 1957 this=this, 1958 by_name=self._match_text_seq("BY", "NAME"), 1959 exists=self._parse_exists(), 1960 partition=self._parse_partition(), 1961 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1962 and self._parse_conjunction(), 1963 expression=self._parse_ddl_select(), 1964 conflict=self._parse_on_conflict(), 1965 returning=returning or self._parse_returning(), 1966 overwrite=overwrite, 1967 alternative=alternative, 1968 ignore=ignore, 1969 ) 1970 1971 def _parse_kill(self) -> exp.Kill: 1972 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1973 1974 return self.expression( 1975 exp.Kill, 1976 this=self._parse_primary(), 1977 kind=kind, 1978 ) 1979 1980 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1981 conflict = self._match_text_seq("ON", "CONFLICT") 1982 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1983 1984 if not conflict and not duplicate: 1985 return None 1986 1987 nothing = None 1988 expressions = None 1989 key = None 1990 constraint = None 1991 1992 if conflict: 1993 if self._match_text_seq("ON", "CONSTRAINT"): 1994 constraint = self._parse_id_var() 1995 else: 1996 key = self._parse_csv(self._parse_value) 1997 1998 self._match_text_seq("DO") 1999 if self._match_text_seq("NOTHING"): 2000 nothing = True 2001 else: 2002 self._match(TokenType.UPDATE) 2003 self._match(TokenType.SET) 2004 expressions = self._parse_csv(self._parse_equality) 2005 2006 return self.expression( 2007 exp.OnConflict, 2008 duplicate=duplicate, 2009 expressions=expressions, 2010 nothing=nothing, 2011 key=key, 2012 constraint=constraint, 2013 ) 2014 2015 def _parse_returning(self) -> t.Optional[exp.Returning]: 2016 if not self._match(TokenType.RETURNING): 2017 return None 2018 return self.expression( 2019 exp.Returning, 2020 expressions=self._parse_csv(self._parse_expression), 2021 into=self._match(TokenType.INTO) and self._parse_table_part(), 2022 ) 2023 2024 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2025 if not self._match(TokenType.FORMAT): 2026 return None 2027 return self._parse_row_format() 2028 2029 def _parse_row_format( 2030 self, match_row: bool = False 2031 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2032 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2033 return None 2034 2035 if self._match_text_seq("SERDE"): 2036 this = self._parse_string() 2037 2038 serde_properties = None 2039 if self._match(TokenType.SERDE_PROPERTIES): 2040 serde_properties = self.expression( 2041 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2042 ) 2043 2044 return self.expression( 2045 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2046 ) 2047 2048 self._match_text_seq("DELIMITED") 2049 2050 kwargs = {} 2051 2052 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2053 kwargs["fields"] = self._parse_string() 2054 if self._match_text_seq("ESCAPED", "BY"): 2055 kwargs["escaped"] = self._parse_string() 2056 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2057 kwargs["collection_items"] = self._parse_string() 2058 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2059 kwargs["map_keys"] = self._parse_string() 2060 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2061 kwargs["lines"] = self._parse_string() 2062 if self._match_text_seq("NULL", "DEFINED", "AS"): 2063 kwargs["null"] = self._parse_string() 2064 2065 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2066 2067 def _parse_load(self) -> exp.LoadData | exp.Command: 2068 if self._match_text_seq("DATA"): 2069 local = self._match_text_seq("LOCAL") 2070 self._match_text_seq("INPATH") 2071 inpath = self._parse_string() 2072 overwrite = self._match(TokenType.OVERWRITE) 2073 self._match_pair(TokenType.INTO, TokenType.TABLE) 2074 2075 return self.expression( 2076 exp.LoadData, 2077 this=self._parse_table(schema=True), 2078 local=local, 2079 overwrite=overwrite, 2080 inpath=inpath, 2081 partition=self._parse_partition(), 2082 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2083 serde=self._match_text_seq("SERDE") and self._parse_string(), 2084 ) 2085 return self._parse_as_command(self._prev) 2086 2087 def _parse_delete(self) -> exp.Delete: 2088 # This handles MySQL's "Multiple-Table Syntax" 2089 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2090 tables = None 2091 comments = self._prev_comments 2092 if not self._match(TokenType.FROM, advance=False): 2093 tables = self._parse_csv(self._parse_table) or None 2094 2095 returning = self._parse_returning() 2096 2097 return self.expression( 2098 exp.Delete, 2099 comments=comments, 2100 tables=tables, 2101 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2102 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2103 where=self._parse_where(), 2104 returning=returning or self._parse_returning(), 2105 limit=self._parse_limit(), 2106 ) 2107 2108 def _parse_update(self) -> exp.Update: 2109 comments = self._prev_comments 2110 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2111 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2112 returning = self._parse_returning() 2113 return self.expression( 2114 exp.Update, 2115 comments=comments, 2116 **{ # type: ignore 2117 "this": this, 2118 "expressions": expressions, 2119 "from": self._parse_from(joins=True), 2120 "where": self._parse_where(), 2121 "returning": returning or self._parse_returning(), 2122 "order": self._parse_order(), 2123 "limit": self._parse_limit(), 2124 }, 2125 ) 2126 2127 def _parse_uncache(self) -> exp.Uncache: 2128 if not self._match(TokenType.TABLE): 2129 self.raise_error("Expecting TABLE after UNCACHE") 2130 2131 return self.expression( 2132 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2133 ) 2134 2135 def _parse_cache(self) -> exp.Cache: 2136 lazy = self._match_text_seq("LAZY") 2137 self._match(TokenType.TABLE) 2138 table = self._parse_table(schema=True) 2139 2140 options = [] 2141 if self._match_text_seq("OPTIONS"): 2142 self._match_l_paren() 2143 k = self._parse_string() 2144 self._match(TokenType.EQ) 2145 v = self._parse_string() 2146 options = [k, v] 2147 self._match_r_paren() 2148 2149 self._match(TokenType.ALIAS) 2150 return self.expression( 2151 exp.Cache, 2152 this=table, 2153 lazy=lazy, 2154 options=options, 2155 expression=self._parse_select(nested=True), 2156 ) 2157 2158 def _parse_partition(self) -> t.Optional[exp.Partition]: 2159 if not self._match(TokenType.PARTITION): 2160 return None 2161 2162 return self.expression( 2163 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2164 ) 2165 2166 def _parse_value(self) -> exp.Tuple: 2167 if self._match(TokenType.L_PAREN): 2168 expressions = self._parse_csv(self._parse_conjunction) 2169 self._match_r_paren() 2170 return self.expression(exp.Tuple, expressions=expressions) 2171 2172 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2173 # https://prestodb.io/docs/current/sql/values.html 2174 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2175 2176 def _parse_projections(self) -> t.List[exp.Expression]: 2177 return self._parse_expressions() 2178 2179 def _parse_select( 2180 self, 2181 nested: bool = False, 2182 table: bool = False, 2183 parse_subquery_alias: bool = True, 2184 parse_set_operation: bool = True, 2185 ) -> t.Optional[exp.Expression]: 2186 cte = self._parse_with() 2187 2188 if cte: 2189 this = self._parse_statement() 2190 2191 if not this: 2192 self.raise_error("Failed to parse any statement following CTE") 2193 return cte 2194 2195 if "with" in this.arg_types: 2196 this.set("with", cte) 2197 else: 2198 self.raise_error(f"{this.key} does not support CTE") 2199 this = cte 2200 2201 return this 2202 2203 # duckdb supports leading with FROM x 2204 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2205 2206 if self._match(TokenType.SELECT): 2207 comments = self._prev_comments 2208 2209 hint = self._parse_hint() 2210 all_ = self._match(TokenType.ALL) 2211 distinct = self._match_set(self.DISTINCT_TOKENS) 2212 2213 kind = ( 2214 self._match(TokenType.ALIAS) 2215 and self._match_texts(("STRUCT", "VALUE")) 2216 and self._prev.text 2217 ) 2218 2219 if distinct: 2220 distinct = self.expression( 2221 exp.Distinct, 2222 on=self._parse_value() if self._match(TokenType.ON) else None, 2223 ) 2224 2225 if all_ and distinct: 2226 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2227 2228 limit = self._parse_limit(top=True) 2229 projections = self._parse_projections() 2230 2231 this = self.expression( 2232 exp.Select, 2233 kind=kind, 2234 hint=hint, 2235 distinct=distinct, 2236 expressions=projections, 2237 limit=limit, 2238 ) 2239 this.comments = comments 2240 2241 into = self._parse_into() 2242 if into: 2243 this.set("into", into) 2244 2245 if not from_: 2246 from_ = self._parse_from() 2247 2248 if from_: 2249 this.set("from", from_) 2250 2251 this = self._parse_query_modifiers(this) 2252 elif (table or nested) and self._match(TokenType.L_PAREN): 2253 if self._match(TokenType.PIVOT): 2254 this = self._parse_simplified_pivot() 2255 elif self._match(TokenType.FROM): 2256 this = exp.select("*").from_( 2257 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2258 ) 2259 else: 2260 this = ( 2261 self._parse_table() 2262 if table 2263 else self._parse_select(nested=True, parse_set_operation=False) 2264 ) 2265 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2266 2267 self._match_r_paren() 2268 2269 # We return early here so that the UNION isn't attached to the subquery by the 2270 # following call to _parse_set_operations, but instead becomes the parent node 2271 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2272 elif self._match(TokenType.VALUES): 2273 this = self.expression( 2274 exp.Values, 2275 expressions=self._parse_csv(self._parse_value), 2276 alias=self._parse_table_alias(), 2277 ) 2278 elif from_: 2279 this = exp.select("*").from_(from_.this, copy=False) 2280 else: 2281 this = None 2282 2283 if parse_set_operation: 2284 return self._parse_set_operations(this) 2285 return this 2286 2287 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2288 if not skip_with_token and not self._match(TokenType.WITH): 2289 return None 2290 2291 comments = self._prev_comments 2292 recursive = self._match(TokenType.RECURSIVE) 2293 2294 expressions = [] 2295 while True: 2296 expressions.append(self._parse_cte()) 2297 2298 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2299 break 2300 else: 2301 self._match(TokenType.WITH) 2302 2303 return self.expression( 2304 exp.With, comments=comments, expressions=expressions, recursive=recursive 2305 ) 2306 2307 def _parse_cte(self) -> exp.CTE: 2308 alias = self._parse_table_alias() 2309 if not alias or not alias.this: 2310 self.raise_error("Expected CTE to have alias") 2311 2312 self._match(TokenType.ALIAS) 2313 return self.expression( 2314 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2315 ) 2316 2317 def _parse_table_alias( 2318 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2319 ) -> t.Optional[exp.TableAlias]: 2320 any_token = self._match(TokenType.ALIAS) 2321 alias = ( 2322 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2323 or self._parse_string_as_identifier() 2324 ) 2325 2326 index = self._index 2327 if self._match(TokenType.L_PAREN): 2328 columns = self._parse_csv(self._parse_function_parameter) 2329 self._match_r_paren() if columns else self._retreat(index) 2330 else: 2331 columns = None 2332 2333 if not alias and not columns: 2334 return None 2335 2336 return self.expression(exp.TableAlias, this=alias, columns=columns) 2337 2338 def _parse_subquery( 2339 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2340 ) -> t.Optional[exp.Subquery]: 2341 if not this: 2342 return None 2343 2344 return self.expression( 2345 exp.Subquery, 2346 this=this, 2347 pivots=self._parse_pivots(), 2348 alias=self._parse_table_alias() if parse_alias else None, 2349 ) 2350 2351 def _parse_query_modifiers( 2352 self, this: t.Optional[exp.Expression] 2353 ) -> t.Optional[exp.Expression]: 2354 if isinstance(this, self.MODIFIABLES): 2355 for join in iter(self._parse_join, None): 2356 this.append("joins", join) 2357 for lateral in iter(self._parse_lateral, None): 2358 this.append("laterals", lateral) 2359 2360 while True: 2361 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2362 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2363 key, expression = parser(self) 2364 2365 if expression: 2366 this.set(key, expression) 2367 if key == "limit": 2368 offset = expression.args.pop("offset", None) 2369 if offset: 2370 this.set("offset", exp.Offset(expression=offset)) 2371 continue 2372 break 2373 return this 2374 2375 def _parse_hint(self) -> t.Optional[exp.Hint]: 2376 if self._match(TokenType.HINT): 2377 hints = [] 2378 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2379 hints.extend(hint) 2380 2381 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2382 self.raise_error("Expected */ after HINT") 2383 2384 return self.expression(exp.Hint, expressions=hints) 2385 2386 return None 2387 2388 def _parse_into(self) -> t.Optional[exp.Into]: 2389 if not self._match(TokenType.INTO): 2390 return None 2391 2392 temp = self._match(TokenType.TEMPORARY) 2393 unlogged = self._match_text_seq("UNLOGGED") 2394 self._match(TokenType.TABLE) 2395 2396 return self.expression( 2397 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2398 ) 2399 2400 def _parse_from( 2401 self, joins: bool = False, skip_from_token: bool = False 2402 ) -> t.Optional[exp.From]: 2403 if not skip_from_token and not self._match(TokenType.FROM): 2404 return None 2405 2406 return self.expression( 2407 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2408 ) 2409 2410 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2411 if not self._match(TokenType.MATCH_RECOGNIZE): 2412 return None 2413 2414 self._match_l_paren() 2415 2416 partition = self._parse_partition_by() 2417 order = self._parse_order() 2418 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2419 2420 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2421 rows = exp.var("ONE ROW PER MATCH") 2422 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2423 text = "ALL ROWS PER MATCH" 2424 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2425 text += f" SHOW EMPTY MATCHES" 2426 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2427 text += f" OMIT EMPTY MATCHES" 2428 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2429 text += f" WITH UNMATCHED ROWS" 2430 rows = exp.var(text) 2431 else: 2432 rows = None 2433 2434 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2435 text = "AFTER MATCH SKIP" 2436 if self._match_text_seq("PAST", "LAST", "ROW"): 2437 text += f" PAST LAST ROW" 2438 elif self._match_text_seq("TO", "NEXT", "ROW"): 2439 text += f" TO NEXT ROW" 2440 elif self._match_text_seq("TO", "FIRST"): 2441 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2442 elif self._match_text_seq("TO", "LAST"): 2443 text += f" TO LAST {self._advance_any().text}" # type: ignore 2444 after = exp.var(text) 2445 else: 2446 after = None 2447 2448 if self._match_text_seq("PATTERN"): 2449 self._match_l_paren() 2450 2451 if not self._curr: 2452 self.raise_error("Expecting )", self._curr) 2453 2454 paren = 1 2455 start = self._curr 2456 2457 while self._curr and paren > 0: 2458 if self._curr.token_type == TokenType.L_PAREN: 2459 paren += 1 2460 if self._curr.token_type == TokenType.R_PAREN: 2461 paren -= 1 2462 2463 end = self._prev 2464 self._advance() 2465 2466 if paren > 0: 2467 self.raise_error("Expecting )", self._curr) 2468 2469 pattern = exp.var(self._find_sql(start, end)) 2470 else: 2471 pattern = None 2472 2473 define = ( 2474 self._parse_csv( 2475 lambda: self.expression( 2476 exp.Alias, 2477 alias=self._parse_id_var(any_token=True), 2478 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2479 ) 2480 ) 2481 if self._match_text_seq("DEFINE") 2482 else None 2483 ) 2484 2485 self._match_r_paren() 2486 2487 return self.expression( 2488 exp.MatchRecognize, 2489 partition_by=partition, 2490 order=order, 2491 measures=measures, 2492 rows=rows, 2493 after=after, 2494 pattern=pattern, 2495 define=define, 2496 alias=self._parse_table_alias(), 2497 ) 2498 2499 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2500 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2501 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2502 2503 if outer_apply or cross_apply: 2504 this = self._parse_select(table=True) 2505 view = None 2506 outer = not cross_apply 2507 elif self._match(TokenType.LATERAL): 2508 this = self._parse_select(table=True) 2509 view = self._match(TokenType.VIEW) 2510 outer = self._match(TokenType.OUTER) 2511 else: 2512 return None 2513 2514 if not this: 2515 this = ( 2516 self._parse_unnest() 2517 or self._parse_function() 2518 or self._parse_id_var(any_token=False) 2519 ) 2520 2521 while self._match(TokenType.DOT): 2522 this = exp.Dot( 2523 this=this, 2524 expression=self._parse_function() or self._parse_id_var(any_token=False), 2525 ) 2526 2527 if view: 2528 table = self._parse_id_var(any_token=False) 2529 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2530 table_alias: t.Optional[exp.TableAlias] = self.expression( 2531 exp.TableAlias, this=table, columns=columns 2532 ) 2533 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2534 # We move the alias from the lateral's child node to the lateral itself 2535 table_alias = this.args["alias"].pop() 2536 else: 2537 table_alias = self._parse_table_alias() 2538 2539 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2540 2541 def _parse_join_parts( 2542 self, 2543 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2544 return ( 2545 self._match_set(self.JOIN_METHODS) and self._prev, 2546 self._match_set(self.JOIN_SIDES) and self._prev, 2547 self._match_set(self.JOIN_KINDS) and self._prev, 2548 ) 2549 2550 def _parse_join( 2551 self, skip_join_token: bool = False, parse_bracket: bool = False 2552 ) -> t.Optional[exp.Join]: 2553 if self._match(TokenType.COMMA): 2554 return self.expression(exp.Join, this=self._parse_table()) 2555 2556 index = self._index 2557 method, side, kind = self._parse_join_parts() 2558 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2559 join = self._match(TokenType.JOIN) 2560 2561 if not skip_join_token and not join: 2562 self._retreat(index) 2563 kind = None 2564 method = None 2565 side = None 2566 2567 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2568 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2569 2570 if not skip_join_token and not join and not outer_apply and not cross_apply: 2571 return None 2572 2573 if outer_apply: 2574 side = Token(TokenType.LEFT, "LEFT") 2575 2576 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2577 2578 if method: 2579 kwargs["method"] = method.text 2580 if side: 2581 kwargs["side"] = side.text 2582 if kind: 2583 kwargs["kind"] = kind.text 2584 if hint: 2585 kwargs["hint"] = hint 2586 2587 if self._match(TokenType.ON): 2588 kwargs["on"] = self._parse_conjunction() 2589 elif self._match(TokenType.USING): 2590 kwargs["using"] = self._parse_wrapped_id_vars() 2591 elif not (kind and kind.token_type == TokenType.CROSS): 2592 index = self._index 2593 join = self._parse_join() 2594 2595 if join and self._match(TokenType.ON): 2596 kwargs["on"] = self._parse_conjunction() 2597 elif join and self._match(TokenType.USING): 2598 kwargs["using"] = self._parse_wrapped_id_vars() 2599 else: 2600 join = None 2601 self._retreat(index) 2602 2603 kwargs["this"].set("joins", [join] if join else None) 2604 2605 comments = [c for token in (method, side, kind) if token for c in token.comments] 2606 return self.expression(exp.Join, comments=comments, **kwargs) 2607 2608 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2609 this = self._parse_conjunction() 2610 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2611 return this 2612 2613 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2614 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2615 2616 return this 2617 2618 def _parse_index( 2619 self, 2620 index: t.Optional[exp.Expression] = None, 2621 ) -> t.Optional[exp.Index]: 2622 if index: 2623 unique = None 2624 primary = None 2625 amp = None 2626 2627 self._match(TokenType.ON) 2628 self._match(TokenType.TABLE) # hive 2629 table = self._parse_table_parts(schema=True) 2630 else: 2631 unique = self._match(TokenType.UNIQUE) 2632 primary = self._match_text_seq("PRIMARY") 2633 amp = self._match_text_seq("AMP") 2634 2635 if not self._match(TokenType.INDEX): 2636 return None 2637 2638 index = self._parse_id_var() 2639 table = None 2640 2641 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2642 2643 if self._match(TokenType.L_PAREN, advance=False): 2644 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2645 else: 2646 columns = None 2647 2648 return self.expression( 2649 exp.Index, 2650 this=index, 2651 table=table, 2652 using=using, 2653 columns=columns, 2654 unique=unique, 2655 primary=primary, 2656 amp=amp, 2657 partition_by=self._parse_partition_by(), 2658 where=self._parse_where(), 2659 ) 2660 2661 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2662 hints: t.List[exp.Expression] = [] 2663 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2664 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2665 hints.append( 2666 self.expression( 2667 exp.WithTableHint, 2668 expressions=self._parse_csv( 2669 lambda: self._parse_function() or self._parse_var(any_token=True) 2670 ), 2671 ) 2672 ) 2673 self._match_r_paren() 2674 else: 2675 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2676 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2677 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2678 2679 self._match_texts(("INDEX", "KEY")) 2680 if self._match(TokenType.FOR): 2681 hint.set("target", self._advance_any() and self._prev.text.upper()) 2682 2683 hint.set("expressions", self._parse_wrapped_id_vars()) 2684 hints.append(hint) 2685 2686 return hints or None 2687 2688 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2689 return ( 2690 (not schema and self._parse_function(optional_parens=False)) 2691 or self._parse_id_var(any_token=False) 2692 or self._parse_string_as_identifier() 2693 or self._parse_placeholder() 2694 ) 2695 2696 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2697 catalog = None 2698 db = None 2699 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2700 2701 while self._match(TokenType.DOT): 2702 if catalog: 2703 # This allows nesting the table in arbitrarily many dot expressions if needed 2704 table = self.expression( 2705 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2706 ) 2707 else: 2708 catalog = db 2709 db = table 2710 table = self._parse_table_part(schema=schema) or "" 2711 2712 if not table: 2713 self.raise_error(f"Expected table name but got {self._curr}") 2714 2715 return self.expression( 2716 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2717 ) 2718 2719 def _parse_table( 2720 self, 2721 schema: bool = False, 2722 joins: bool = False, 2723 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2724 parse_bracket: bool = False, 2725 ) -> t.Optional[exp.Expression]: 2726 lateral = self._parse_lateral() 2727 if lateral: 2728 return lateral 2729 2730 unnest = self._parse_unnest() 2731 if unnest: 2732 return unnest 2733 2734 values = self._parse_derived_table_values() 2735 if values: 2736 return values 2737 2738 subquery = self._parse_select(table=True) 2739 if subquery: 2740 if not subquery.args.get("pivots"): 2741 subquery.set("pivots", self._parse_pivots()) 2742 return subquery 2743 2744 bracket = parse_bracket and self._parse_bracket(None) 2745 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2746 this = t.cast( 2747 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2748 ) 2749 2750 if schema: 2751 return self._parse_schema(this=this) 2752 2753 version = self._parse_version() 2754 2755 if version: 2756 this.set("version", version) 2757 2758 if self.dialect.ALIAS_POST_TABLESAMPLE: 2759 table_sample = self._parse_table_sample() 2760 2761 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2762 if alias: 2763 this.set("alias", alias) 2764 2765 if self._match_text_seq("AT"): 2766 this.set("index", self._parse_id_var()) 2767 2768 this.set("hints", self._parse_table_hints()) 2769 2770 if not this.args.get("pivots"): 2771 this.set("pivots", self._parse_pivots()) 2772 2773 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2774 table_sample = self._parse_table_sample() 2775 2776 if table_sample: 2777 table_sample.set("this", this) 2778 this = table_sample 2779 2780 if joins: 2781 for join in iter(self._parse_join, None): 2782 this.append("joins", join) 2783 2784 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2785 this.set("ordinality", True) 2786 this.set("alias", self._parse_table_alias()) 2787 2788 return this 2789 2790 def _parse_version(self) -> t.Optional[exp.Version]: 2791 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2792 this = "TIMESTAMP" 2793 elif self._match(TokenType.VERSION_SNAPSHOT): 2794 this = "VERSION" 2795 else: 2796 return None 2797 2798 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2799 kind = self._prev.text.upper() 2800 start = self._parse_bitwise() 2801 self._match_texts(("TO", "AND")) 2802 end = self._parse_bitwise() 2803 expression: t.Optional[exp.Expression] = self.expression( 2804 exp.Tuple, expressions=[start, end] 2805 ) 2806 elif self._match_text_seq("CONTAINED", "IN"): 2807 kind = "CONTAINED IN" 2808 expression = self.expression( 2809 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2810 ) 2811 elif self._match(TokenType.ALL): 2812 kind = "ALL" 2813 expression = None 2814 else: 2815 self._match_text_seq("AS", "OF") 2816 kind = "AS OF" 2817 expression = self._parse_type() 2818 2819 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2820 2821 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2822 if not self._match(TokenType.UNNEST): 2823 return None 2824 2825 expressions = self._parse_wrapped_csv(self._parse_equality) 2826 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2827 2828 alias = self._parse_table_alias() if with_alias else None 2829 2830 if alias: 2831 if self.dialect.UNNEST_COLUMN_ONLY: 2832 if alias.args.get("columns"): 2833 self.raise_error("Unexpected extra column alias in unnest.") 2834 2835 alias.set("columns", [alias.this]) 2836 alias.set("this", None) 2837 2838 columns = alias.args.get("columns") or [] 2839 if offset and len(expressions) < len(columns): 2840 offset = columns.pop() 2841 2842 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2843 self._match(TokenType.ALIAS) 2844 offset = self._parse_id_var( 2845 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2846 ) or exp.to_identifier("offset") 2847 2848 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2849 2850 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2851 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2852 if not is_derived and not self._match(TokenType.VALUES): 2853 return None 2854 2855 expressions = self._parse_csv(self._parse_value) 2856 alias = self._parse_table_alias() 2857 2858 if is_derived: 2859 self._match_r_paren() 2860 2861 return self.expression( 2862 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2863 ) 2864 2865 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2866 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2867 as_modifier and self._match_text_seq("USING", "SAMPLE") 2868 ): 2869 return None 2870 2871 bucket_numerator = None 2872 bucket_denominator = None 2873 bucket_field = None 2874 percent = None 2875 rows = None 2876 size = None 2877 seed = None 2878 2879 kind = ( 2880 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2881 ) 2882 method = self._parse_var(tokens=(TokenType.ROW,)) 2883 2884 matched_l_paren = self._match(TokenType.L_PAREN) 2885 2886 if self.TABLESAMPLE_CSV: 2887 num = None 2888 expressions = self._parse_csv(self._parse_primary) 2889 else: 2890 expressions = None 2891 num = ( 2892 self._parse_factor() 2893 if self._match(TokenType.NUMBER, advance=False) 2894 else self._parse_primary() or self._parse_placeholder() 2895 ) 2896 2897 if self._match_text_seq("BUCKET"): 2898 bucket_numerator = self._parse_number() 2899 self._match_text_seq("OUT", "OF") 2900 bucket_denominator = bucket_denominator = self._parse_number() 2901 self._match(TokenType.ON) 2902 bucket_field = self._parse_field() 2903 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2904 percent = num 2905 elif self._match(TokenType.ROWS): 2906 rows = num 2907 elif num: 2908 size = num 2909 2910 if matched_l_paren: 2911 self._match_r_paren() 2912 2913 if self._match(TokenType.L_PAREN): 2914 method = self._parse_var() 2915 seed = self._match(TokenType.COMMA) and self._parse_number() 2916 self._match_r_paren() 2917 elif self._match_texts(("SEED", "REPEATABLE")): 2918 seed = self._parse_wrapped(self._parse_number) 2919 2920 return self.expression( 2921 exp.TableSample, 2922 expressions=expressions, 2923 method=method, 2924 bucket_numerator=bucket_numerator, 2925 bucket_denominator=bucket_denominator, 2926 bucket_field=bucket_field, 2927 percent=percent, 2928 rows=rows, 2929 size=size, 2930 seed=seed, 2931 kind=kind, 2932 ) 2933 2934 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2935 return list(iter(self._parse_pivot, None)) or None 2936 2937 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2938 return list(iter(self._parse_join, None)) or None 2939 2940 # https://duckdb.org/docs/sql/statements/pivot 2941 def _parse_simplified_pivot(self) -> exp.Pivot: 2942 def _parse_on() -> t.Optional[exp.Expression]: 2943 this = self._parse_bitwise() 2944 return self._parse_in(this) if self._match(TokenType.IN) else this 2945 2946 this = self._parse_table() 2947 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2948 using = self._match(TokenType.USING) and self._parse_csv( 2949 lambda: self._parse_alias(self._parse_function()) 2950 ) 2951 group = self._parse_group() 2952 return self.expression( 2953 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2954 ) 2955 2956 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2957 index = self._index 2958 include_nulls = None 2959 2960 if self._match(TokenType.PIVOT): 2961 unpivot = False 2962 elif self._match(TokenType.UNPIVOT): 2963 unpivot = True 2964 2965 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2966 if self._match_text_seq("INCLUDE", "NULLS"): 2967 include_nulls = True 2968 elif self._match_text_seq("EXCLUDE", "NULLS"): 2969 include_nulls = False 2970 else: 2971 return None 2972 2973 expressions = [] 2974 field = None 2975 2976 if not self._match(TokenType.L_PAREN): 2977 self._retreat(index) 2978 return None 2979 2980 if unpivot: 2981 expressions = self._parse_csv(self._parse_column) 2982 else: 2983 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2984 2985 if not expressions: 2986 self.raise_error("Failed to parse PIVOT's aggregation list") 2987 2988 if not self._match(TokenType.FOR): 2989 self.raise_error("Expecting FOR") 2990 2991 value = self._parse_column() 2992 2993 if not self._match(TokenType.IN): 2994 self.raise_error("Expecting IN") 2995 2996 field = self._parse_in(value, alias=True) 2997 2998 self._match_r_paren() 2999 3000 pivot = self.expression( 3001 exp.Pivot, 3002 expressions=expressions, 3003 field=field, 3004 unpivot=unpivot, 3005 include_nulls=include_nulls, 3006 ) 3007 3008 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3009 pivot.set("alias", self._parse_table_alias()) 3010 3011 if not unpivot: 3012 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3013 3014 columns: t.List[exp.Expression] = [] 3015 for fld in pivot.args["field"].expressions: 3016 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3017 for name in names: 3018 if self.PREFIXED_PIVOT_COLUMNS: 3019 name = f"{name}_{field_name}" if name else field_name 3020 else: 3021 name = f"{field_name}_{name}" if name else field_name 3022 3023 columns.append(exp.to_identifier(name)) 3024 3025 pivot.set("columns", columns) 3026 3027 return pivot 3028 3029 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3030 return [agg.alias for agg in aggregations] 3031 3032 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3033 if not skip_where_token and not self._match(TokenType.WHERE): 3034 return None 3035 3036 return self.expression( 3037 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3038 ) 3039 3040 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3041 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3042 return None 3043 3044 elements = defaultdict(list) 3045 3046 if self._match(TokenType.ALL): 3047 return self.expression(exp.Group, all=True) 3048 3049 while True: 3050 expressions = self._parse_csv(self._parse_conjunction) 3051 if expressions: 3052 elements["expressions"].extend(expressions) 3053 3054 grouping_sets = self._parse_grouping_sets() 3055 if grouping_sets: 3056 elements["grouping_sets"].extend(grouping_sets) 3057 3058 rollup = None 3059 cube = None 3060 totals = None 3061 3062 index = self._index 3063 with_ = self._match(TokenType.WITH) 3064 if self._match(TokenType.ROLLUP): 3065 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3066 elements["rollup"].extend(ensure_list(rollup)) 3067 3068 if self._match(TokenType.CUBE): 3069 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3070 elements["cube"].extend(ensure_list(cube)) 3071 3072 if self._match_text_seq("TOTALS"): 3073 totals = True 3074 elements["totals"] = True # type: ignore 3075 3076 if not (grouping_sets or rollup or cube or totals): 3077 if with_: 3078 self._retreat(index) 3079 break 3080 3081 return self.expression(exp.Group, **elements) # type: ignore 3082 3083 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3084 if not self._match(TokenType.GROUPING_SETS): 3085 return None 3086 3087 return self._parse_wrapped_csv(self._parse_grouping_set) 3088 3089 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3090 if self._match(TokenType.L_PAREN): 3091 grouping_set = self._parse_csv(self._parse_column) 3092 self._match_r_paren() 3093 return self.expression(exp.Tuple, expressions=grouping_set) 3094 3095 return self._parse_column() 3096 3097 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3098 if not skip_having_token and not self._match(TokenType.HAVING): 3099 return None 3100 return self.expression(exp.Having, this=self._parse_conjunction()) 3101 3102 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3103 if not self._match(TokenType.QUALIFY): 3104 return None 3105 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3106 3107 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3108 if skip_start_token: 3109 start = None 3110 elif self._match(TokenType.START_WITH): 3111 start = self._parse_conjunction() 3112 else: 3113 return None 3114 3115 self._match(TokenType.CONNECT_BY) 3116 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3117 exp.Prior, this=self._parse_bitwise() 3118 ) 3119 connect = self._parse_conjunction() 3120 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3121 3122 if not start and self._match(TokenType.START_WITH): 3123 start = self._parse_conjunction() 3124 3125 return self.expression(exp.Connect, start=start, connect=connect) 3126 3127 def _parse_order( 3128 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3129 ) -> t.Optional[exp.Expression]: 3130 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3131 return this 3132 3133 return self.expression( 3134 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3135 ) 3136 3137 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3138 if not self._match(token): 3139 return None 3140 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3141 3142 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3143 this = parse_method() if parse_method else self._parse_conjunction() 3144 3145 asc = self._match(TokenType.ASC) 3146 desc = self._match(TokenType.DESC) or (asc and False) 3147 3148 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3149 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3150 3151 nulls_first = is_nulls_first or False 3152 explicitly_null_ordered = is_nulls_first or is_nulls_last 3153 3154 if ( 3155 not explicitly_null_ordered 3156 and ( 3157 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3158 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3159 ) 3160 and self.dialect.NULL_ORDERING != "nulls_are_last" 3161 ): 3162 nulls_first = True 3163 3164 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3165 3166 def _parse_limit( 3167 self, this: t.Optional[exp.Expression] = None, top: bool = False 3168 ) -> t.Optional[exp.Expression]: 3169 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3170 comments = self._prev_comments 3171 if top: 3172 limit_paren = self._match(TokenType.L_PAREN) 3173 expression = self._parse_term() if limit_paren else self._parse_number() 3174 3175 if limit_paren: 3176 self._match_r_paren() 3177 else: 3178 expression = self._parse_term() 3179 3180 if self._match(TokenType.COMMA): 3181 offset = expression 3182 expression = self._parse_term() 3183 else: 3184 offset = None 3185 3186 limit_exp = self.expression( 3187 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3188 ) 3189 3190 return limit_exp 3191 3192 if self._match(TokenType.FETCH): 3193 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3194 direction = self._prev.text if direction else "FIRST" 3195 3196 count = self._parse_field(tokens=self.FETCH_TOKENS) 3197 percent = self._match(TokenType.PERCENT) 3198 3199 self._match_set((TokenType.ROW, TokenType.ROWS)) 3200 3201 only = self._match_text_seq("ONLY") 3202 with_ties = self._match_text_seq("WITH", "TIES") 3203 3204 if only and with_ties: 3205 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3206 3207 return self.expression( 3208 exp.Fetch, 3209 direction=direction, 3210 count=count, 3211 percent=percent, 3212 with_ties=with_ties, 3213 ) 3214 3215 return this 3216 3217 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3218 if not self._match(TokenType.OFFSET): 3219 return this 3220 3221 count = self._parse_term() 3222 self._match_set((TokenType.ROW, TokenType.ROWS)) 3223 return self.expression(exp.Offset, this=this, expression=count) 3224 3225 def _parse_locks(self) -> t.List[exp.Lock]: 3226 locks = [] 3227 while True: 3228 if self._match_text_seq("FOR", "UPDATE"): 3229 update = True 3230 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3231 "LOCK", "IN", "SHARE", "MODE" 3232 ): 3233 update = False 3234 else: 3235 break 3236 3237 expressions = None 3238 if self._match_text_seq("OF"): 3239 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3240 3241 wait: t.Optional[bool | exp.Expression] = None 3242 if self._match_text_seq("NOWAIT"): 3243 wait = True 3244 elif self._match_text_seq("WAIT"): 3245 wait = self._parse_primary() 3246 elif self._match_text_seq("SKIP", "LOCKED"): 3247 wait = False 3248 3249 locks.append( 3250 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3251 ) 3252 3253 return locks 3254 3255 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3256 if not self._match_set(self.SET_OPERATIONS): 3257 return this 3258 3259 token_type = self._prev.token_type 3260 3261 if token_type == TokenType.UNION: 3262 expression = exp.Union 3263 elif token_type == TokenType.EXCEPT: 3264 expression = exp.Except 3265 else: 3266 expression = exp.Intersect 3267 3268 return self.expression( 3269 expression, 3270 comments=self._prev.comments, 3271 this=this, 3272 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3273 by_name=self._match_text_seq("BY", "NAME"), 3274 expression=self._parse_set_operations( 3275 self._parse_select(nested=True, parse_set_operation=False) 3276 ), 3277 ) 3278 3279 def _parse_expression(self) -> t.Optional[exp.Expression]: 3280 return self._parse_alias(self._parse_conjunction()) 3281 3282 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3283 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3284 3285 def _parse_equality(self) -> t.Optional[exp.Expression]: 3286 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3287 3288 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3289 return self._parse_tokens(self._parse_range, self.COMPARISON) 3290 3291 def _parse_range(self) -> t.Optional[exp.Expression]: 3292 this = self._parse_bitwise() 3293 negate = self._match(TokenType.NOT) 3294 3295 if self._match_set(self.RANGE_PARSERS): 3296 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3297 if not expression: 3298 return this 3299 3300 this = expression 3301 elif self._match(TokenType.ISNULL): 3302 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3303 3304 # Postgres supports ISNULL and NOTNULL for conditions. 3305 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3306 if self._match(TokenType.NOTNULL): 3307 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3308 this = self.expression(exp.Not, this=this) 3309 3310 if negate: 3311 this = self.expression(exp.Not, this=this) 3312 3313 if self._match(TokenType.IS): 3314 this = self._parse_is(this) 3315 3316 return this 3317 3318 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3319 index = self._index - 1 3320 negate = self._match(TokenType.NOT) 3321 3322 if self._match_text_seq("DISTINCT", "FROM"): 3323 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3324 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3325 3326 expression = self._parse_null() or self._parse_boolean() 3327 if not expression: 3328 self._retreat(index) 3329 return None 3330 3331 this = self.expression(exp.Is, this=this, expression=expression) 3332 return self.expression(exp.Not, this=this) if negate else this 3333 3334 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3335 unnest = self._parse_unnest(with_alias=False) 3336 if unnest: 3337 this = self.expression(exp.In, this=this, unnest=unnest) 3338 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3339 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3340 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3341 3342 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3343 this = self.expression(exp.In, this=this, query=expressions[0]) 3344 else: 3345 this = self.expression(exp.In, this=this, expressions=expressions) 3346 3347 if matched_l_paren: 3348 self._match_r_paren(this) 3349 elif not self._match(TokenType.R_BRACKET, expression=this): 3350 self.raise_error("Expecting ]") 3351 else: 3352 this = self.expression(exp.In, this=this, field=self._parse_field()) 3353 3354 return this 3355 3356 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3357 low = self._parse_bitwise() 3358 self._match(TokenType.AND) 3359 high = self._parse_bitwise() 3360 return self.expression(exp.Between, this=this, low=low, high=high) 3361 3362 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3363 if not self._match(TokenType.ESCAPE): 3364 return this 3365 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3366 3367 def _parse_interval(self) -> t.Optional[exp.Interval]: 3368 index = self._index 3369 3370 if not self._match(TokenType.INTERVAL): 3371 return None 3372 3373 if self._match(TokenType.STRING, advance=False): 3374 this = self._parse_primary() 3375 else: 3376 this = self._parse_term() 3377 3378 if not this: 3379 self._retreat(index) 3380 return None 3381 3382 unit = self._parse_function() or self._parse_var(any_token=True) 3383 3384 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3385 # each INTERVAL expression into this canonical form so it's easy to transpile 3386 if this and this.is_number: 3387 this = exp.Literal.string(this.name) 3388 elif this and this.is_string: 3389 parts = this.name.split() 3390 3391 if len(parts) == 2: 3392 if unit: 3393 # This is not actually a unit, it's something else (e.g. a "window side") 3394 unit = None 3395 self._retreat(self._index - 1) 3396 3397 this = exp.Literal.string(parts[0]) 3398 unit = self.expression(exp.Var, this=parts[1]) 3399 3400 return self.expression(exp.Interval, this=this, unit=unit) 3401 3402 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3403 this = self._parse_term() 3404 3405 while True: 3406 if self._match_set(self.BITWISE): 3407 this = self.expression( 3408 self.BITWISE[self._prev.token_type], 3409 this=this, 3410 expression=self._parse_term(), 3411 ) 3412 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3413 this = self.expression( 3414 exp.DPipe, 3415 this=this, 3416 expression=self._parse_term(), 3417 safe=not self.dialect.STRICT_STRING_CONCAT, 3418 ) 3419 elif self._match(TokenType.DQMARK): 3420 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3421 elif self._match_pair(TokenType.LT, TokenType.LT): 3422 this = self.expression( 3423 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3424 ) 3425 elif self._match_pair(TokenType.GT, TokenType.GT): 3426 this = self.expression( 3427 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3428 ) 3429 else: 3430 break 3431 3432 return this 3433 3434 def _parse_term(self) -> t.Optional[exp.Expression]: 3435 return self._parse_tokens(self._parse_factor, self.TERM) 3436 3437 def _parse_factor(self) -> t.Optional[exp.Expression]: 3438 if self.EXPONENT: 3439 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3440 else: 3441 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3442 if isinstance(factor, exp.Div): 3443 factor.args["typed"] = self.dialect.TYPED_DIVISION 3444 factor.args["safe"] = self.dialect.SAFE_DIVISION 3445 return factor 3446 3447 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3448 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3449 3450 def _parse_unary(self) -> t.Optional[exp.Expression]: 3451 if self._match_set(self.UNARY_PARSERS): 3452 return self.UNARY_PARSERS[self._prev.token_type](self) 3453 return self._parse_at_time_zone(self._parse_type()) 3454 3455 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3456 interval = parse_interval and self._parse_interval() 3457 if interval: 3458 return interval 3459 3460 index = self._index 3461 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3462 this = self._parse_column() 3463 3464 if data_type: 3465 if isinstance(this, exp.Literal): 3466 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3467 if parser: 3468 return parser(self, this, data_type) 3469 return self.expression(exp.Cast, this=this, to=data_type) 3470 if not data_type.expressions: 3471 self._retreat(index) 3472 return self._parse_column() 3473 return self._parse_column_ops(data_type) 3474 3475 return this and self._parse_column_ops(this) 3476 3477 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3478 this = self._parse_type() 3479 if not this: 3480 return None 3481 3482 return self.expression( 3483 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3484 ) 3485 3486 def _parse_types( 3487 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3488 ) -> t.Optional[exp.Expression]: 3489 index = self._index 3490 3491 prefix = self._match_text_seq("SYSUDTLIB", ".") 3492 3493 if not self._match_set(self.TYPE_TOKENS): 3494 identifier = allow_identifiers and self._parse_id_var( 3495 any_token=False, tokens=(TokenType.VAR,) 3496 ) 3497 3498 if identifier: 3499 tokens = self.dialect.tokenize(identifier.name) 3500 3501 if len(tokens) != 1: 3502 self.raise_error("Unexpected identifier", self._prev) 3503 3504 if tokens[0].token_type in self.TYPE_TOKENS: 3505 self._prev = tokens[0] 3506 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3507 type_name = identifier.name 3508 3509 while self._match(TokenType.DOT): 3510 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3511 3512 return exp.DataType.build(type_name, udt=True) 3513 else: 3514 return None 3515 else: 3516 return None 3517 3518 type_token = self._prev.token_type 3519 3520 if type_token == TokenType.PSEUDO_TYPE: 3521 return self.expression(exp.PseudoType, this=self._prev.text) 3522 3523 if type_token == TokenType.OBJECT_IDENTIFIER: 3524 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3525 3526 nested = type_token in self.NESTED_TYPE_TOKENS 3527 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3528 expressions = None 3529 maybe_func = False 3530 3531 if self._match(TokenType.L_PAREN): 3532 if is_struct: 3533 expressions = self._parse_csv(self._parse_struct_types) 3534 elif nested: 3535 expressions = self._parse_csv( 3536 lambda: self._parse_types( 3537 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3538 ) 3539 ) 3540 elif type_token in self.ENUM_TYPE_TOKENS: 3541 expressions = self._parse_csv(self._parse_equality) 3542 else: 3543 expressions = self._parse_csv(self._parse_type_size) 3544 3545 if not expressions or not self._match(TokenType.R_PAREN): 3546 self._retreat(index) 3547 return None 3548 3549 maybe_func = True 3550 3551 this: t.Optional[exp.Expression] = None 3552 values: t.Optional[t.List[exp.Expression]] = None 3553 3554 if nested and self._match(TokenType.LT): 3555 if is_struct: 3556 expressions = self._parse_csv(self._parse_struct_types) 3557 else: 3558 expressions = self._parse_csv( 3559 lambda: self._parse_types( 3560 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3561 ) 3562 ) 3563 3564 if not self._match(TokenType.GT): 3565 self.raise_error("Expecting >") 3566 3567 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3568 values = self._parse_csv(self._parse_conjunction) 3569 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3570 3571 if type_token in self.TIMESTAMPS: 3572 if self._match_text_seq("WITH", "TIME", "ZONE"): 3573 maybe_func = False 3574 tz_type = ( 3575 exp.DataType.Type.TIMETZ 3576 if type_token in self.TIMES 3577 else exp.DataType.Type.TIMESTAMPTZ 3578 ) 3579 this = exp.DataType(this=tz_type, expressions=expressions) 3580 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3581 maybe_func = False 3582 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3583 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3584 maybe_func = False 3585 elif type_token == TokenType.INTERVAL: 3586 unit = self._parse_var() 3587 3588 if self._match_text_seq("TO"): 3589 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3590 else: 3591 span = None 3592 3593 if span or not unit: 3594 this = self.expression( 3595 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3596 ) 3597 else: 3598 this = self.expression(exp.Interval, unit=unit) 3599 3600 if maybe_func and check_func: 3601 index2 = self._index 3602 peek = self._parse_string() 3603 3604 if not peek: 3605 self._retreat(index) 3606 return None 3607 3608 self._retreat(index2) 3609 3610 if not this: 3611 if self._match_text_seq("UNSIGNED"): 3612 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3613 if not unsigned_type_token: 3614 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3615 3616 type_token = unsigned_type_token or type_token 3617 3618 this = exp.DataType( 3619 this=exp.DataType.Type[type_token.value], 3620 expressions=expressions, 3621 nested=nested, 3622 values=values, 3623 prefix=prefix, 3624 ) 3625 3626 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3627 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3628 3629 return this 3630 3631 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3632 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3633 self._match(TokenType.COLON) 3634 return self._parse_column_def(this) 3635 3636 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3637 if not self._match_text_seq("AT", "TIME", "ZONE"): 3638 return this 3639 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3640 3641 def _parse_column(self) -> t.Optional[exp.Expression]: 3642 this = self._parse_field() 3643 if isinstance(this, exp.Identifier): 3644 this = self.expression(exp.Column, this=this) 3645 elif not this: 3646 return self._parse_bracket(this) 3647 return self._parse_column_ops(this) 3648 3649 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3650 this = self._parse_bracket(this) 3651 3652 while self._match_set(self.COLUMN_OPERATORS): 3653 op_token = self._prev.token_type 3654 op = self.COLUMN_OPERATORS.get(op_token) 3655 3656 if op_token == TokenType.DCOLON: 3657 field = self._parse_types() 3658 if not field: 3659 self.raise_error("Expected type") 3660 elif op and self._curr: 3661 self._advance() 3662 value = self._prev.text 3663 field = ( 3664 exp.Literal.number(value) 3665 if self._prev.token_type == TokenType.NUMBER 3666 else exp.Literal.string(value) 3667 ) 3668 else: 3669 field = self._parse_field(anonymous_func=True, any_token=True) 3670 3671 if isinstance(field, exp.Func): 3672 # bigquery allows function calls like x.y.count(...) 3673 # SAFE.SUBSTR(...) 3674 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3675 this = self._replace_columns_with_dots(this) 3676 3677 if op: 3678 this = op(self, this, field) 3679 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3680 this = self.expression( 3681 exp.Column, 3682 this=field, 3683 table=this.this, 3684 db=this.args.get("table"), 3685 catalog=this.args.get("db"), 3686 ) 3687 else: 3688 this = self.expression(exp.Dot, this=this, expression=field) 3689 this = self._parse_bracket(this) 3690 return this 3691 3692 def _parse_primary(self) -> t.Optional[exp.Expression]: 3693 if self._match_set(self.PRIMARY_PARSERS): 3694 token_type = self._prev.token_type 3695 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3696 3697 if token_type == TokenType.STRING: 3698 expressions = [primary] 3699 while self._match(TokenType.STRING): 3700 expressions.append(exp.Literal.string(self._prev.text)) 3701 3702 if len(expressions) > 1: 3703 return self.expression(exp.Concat, expressions=expressions) 3704 3705 return primary 3706 3707 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3708 return exp.Literal.number(f"0.{self._prev.text}") 3709 3710 if self._match(TokenType.L_PAREN): 3711 comments = self._prev_comments 3712 query = self._parse_select() 3713 3714 if query: 3715 expressions = [query] 3716 else: 3717 expressions = self._parse_expressions() 3718 3719 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3720 3721 if isinstance(this, exp.Subqueryable): 3722 this = self._parse_set_operations( 3723 self._parse_subquery(this=this, parse_alias=False) 3724 ) 3725 elif len(expressions) > 1: 3726 this = self.expression(exp.Tuple, expressions=expressions) 3727 else: 3728 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3729 3730 if this: 3731 this.add_comments(comments) 3732 3733 self._match_r_paren(expression=this) 3734 return this 3735 3736 return None 3737 3738 def _parse_field( 3739 self, 3740 any_token: bool = False, 3741 tokens: t.Optional[t.Collection[TokenType]] = None, 3742 anonymous_func: bool = False, 3743 ) -> t.Optional[exp.Expression]: 3744 return ( 3745 self._parse_primary() 3746 or self._parse_function(anonymous=anonymous_func) 3747 or self._parse_id_var(any_token=any_token, tokens=tokens) 3748 ) 3749 3750 def _parse_function( 3751 self, 3752 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3753 anonymous: bool = False, 3754 optional_parens: bool = True, 3755 ) -> t.Optional[exp.Expression]: 3756 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3757 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3758 fn_syntax = False 3759 if ( 3760 self._match(TokenType.L_BRACE, advance=False) 3761 and self._next 3762 and self._next.text.upper() == "FN" 3763 ): 3764 self._advance(2) 3765 fn_syntax = True 3766 3767 func = self._parse_function_call( 3768 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3769 ) 3770 3771 if fn_syntax: 3772 self._match(TokenType.R_BRACE) 3773 3774 return func 3775 3776 def _parse_function_call( 3777 self, 3778 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3779 anonymous: bool = False, 3780 optional_parens: bool = True, 3781 ) -> t.Optional[exp.Expression]: 3782 if not self._curr: 3783 return None 3784 3785 comments = self._curr.comments 3786 token_type = self._curr.token_type 3787 this = self._curr.text 3788 upper = this.upper() 3789 3790 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3791 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3792 self._advance() 3793 return parser(self) 3794 3795 if not self._next or self._next.token_type != TokenType.L_PAREN: 3796 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3797 self._advance() 3798 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3799 3800 return None 3801 3802 if token_type not in self.FUNC_TOKENS: 3803 return None 3804 3805 self._advance(2) 3806 3807 parser = self.FUNCTION_PARSERS.get(upper) 3808 if parser and not anonymous: 3809 this = parser(self) 3810 else: 3811 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3812 3813 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3814 this = self.expression(subquery_predicate, this=self._parse_select()) 3815 self._match_r_paren() 3816 return this 3817 3818 if functions is None: 3819 functions = self.FUNCTIONS 3820 3821 function = functions.get(upper) 3822 3823 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3824 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3825 3826 if function and not anonymous: 3827 if "dialect" in function.__code__.co_varnames: 3828 func = function(args, dialect=self.dialect) 3829 else: 3830 func = function(args) 3831 3832 func = self.validate_expression(func, args) 3833 if not self.dialect.NORMALIZE_FUNCTIONS: 3834 func.meta["name"] = this 3835 3836 this = func 3837 else: 3838 this = self.expression(exp.Anonymous, this=this, expressions=args) 3839 3840 if isinstance(this, exp.Expression): 3841 this.add_comments(comments) 3842 3843 self._match_r_paren(this) 3844 return self._parse_window(this) 3845 3846 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3847 return self._parse_column_def(self._parse_id_var()) 3848 3849 def _parse_user_defined_function( 3850 self, kind: t.Optional[TokenType] = None 3851 ) -> t.Optional[exp.Expression]: 3852 this = self._parse_id_var() 3853 3854 while self._match(TokenType.DOT): 3855 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3856 3857 if not self._match(TokenType.L_PAREN): 3858 return this 3859 3860 expressions = self._parse_csv(self._parse_function_parameter) 3861 self._match_r_paren() 3862 return self.expression( 3863 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3864 ) 3865 3866 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3867 literal = self._parse_primary() 3868 if literal: 3869 return self.expression(exp.Introducer, this=token.text, expression=literal) 3870 3871 return self.expression(exp.Identifier, this=token.text) 3872 3873 def _parse_session_parameter(self) -> exp.SessionParameter: 3874 kind = None 3875 this = self._parse_id_var() or self._parse_primary() 3876 3877 if this and self._match(TokenType.DOT): 3878 kind = this.name 3879 this = self._parse_var() or self._parse_primary() 3880 3881 return self.expression(exp.SessionParameter, this=this, kind=kind) 3882 3883 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3884 index = self._index 3885 3886 if self._match(TokenType.L_PAREN): 3887 expressions = t.cast( 3888 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3889 ) 3890 3891 if not self._match(TokenType.R_PAREN): 3892 self._retreat(index) 3893 else: 3894 expressions = [self._parse_id_var()] 3895 3896 if self._match_set(self.LAMBDAS): 3897 return self.LAMBDAS[self._prev.token_type](self, expressions) 3898 3899 self._retreat(index) 3900 3901 this: t.Optional[exp.Expression] 3902 3903 if self._match(TokenType.DISTINCT): 3904 this = self.expression( 3905 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3906 ) 3907 else: 3908 this = self._parse_select_or_expression(alias=alias) 3909 3910 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3911 3912 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3913 index = self._index 3914 3915 if not self.errors: 3916 try: 3917 if self._parse_select(nested=True): 3918 return this 3919 except ParseError: 3920 pass 3921 finally: 3922 self.errors.clear() 3923 self._retreat(index) 3924 3925 if not self._match(TokenType.L_PAREN): 3926 return this 3927 3928 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3929 3930 self._match_r_paren() 3931 return self.expression(exp.Schema, this=this, expressions=args) 3932 3933 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3934 return self._parse_column_def(self._parse_field(any_token=True)) 3935 3936 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3937 # column defs are not really columns, they're identifiers 3938 if isinstance(this, exp.Column): 3939 this = this.this 3940 3941 kind = self._parse_types(schema=True) 3942 3943 if self._match_text_seq("FOR", "ORDINALITY"): 3944 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3945 3946 constraints: t.List[exp.Expression] = [] 3947 3948 if not kind and self._match(TokenType.ALIAS): 3949 constraints.append( 3950 self.expression( 3951 exp.ComputedColumnConstraint, 3952 this=self._parse_conjunction(), 3953 persisted=self._match_text_seq("PERSISTED"), 3954 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3955 ) 3956 ) 3957 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 3958 self._match(TokenType.ALIAS) 3959 constraints.append( 3960 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 3961 ) 3962 3963 while True: 3964 constraint = self._parse_column_constraint() 3965 if not constraint: 3966 break 3967 constraints.append(constraint) 3968 3969 if not kind and not constraints: 3970 return this 3971 3972 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3973 3974 def _parse_auto_increment( 3975 self, 3976 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3977 start = None 3978 increment = None 3979 3980 if self._match(TokenType.L_PAREN, advance=False): 3981 args = self._parse_wrapped_csv(self._parse_bitwise) 3982 start = seq_get(args, 0) 3983 increment = seq_get(args, 1) 3984 elif self._match_text_seq("START"): 3985 start = self._parse_bitwise() 3986 self._match_text_seq("INCREMENT") 3987 increment = self._parse_bitwise() 3988 3989 if start and increment: 3990 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3991 3992 return exp.AutoIncrementColumnConstraint() 3993 3994 def _parse_compress(self) -> exp.CompressColumnConstraint: 3995 if self._match(TokenType.L_PAREN, advance=False): 3996 return self.expression( 3997 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3998 ) 3999 4000 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4001 4002 def _parse_generated_as_identity( 4003 self, 4004 ) -> ( 4005 exp.GeneratedAsIdentityColumnConstraint 4006 | exp.ComputedColumnConstraint 4007 | exp.GeneratedAsRowColumnConstraint 4008 ): 4009 if self._match_text_seq("BY", "DEFAULT"): 4010 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4011 this = self.expression( 4012 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4013 ) 4014 else: 4015 self._match_text_seq("ALWAYS") 4016 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4017 4018 self._match(TokenType.ALIAS) 4019 4020 if self._match_text_seq("ROW"): 4021 start = self._match_text_seq("START") 4022 if not start: 4023 self._match(TokenType.END) 4024 hidden = self._match_text_seq("HIDDEN") 4025 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4026 4027 identity = self._match_text_seq("IDENTITY") 4028 4029 if self._match(TokenType.L_PAREN): 4030 if self._match(TokenType.START_WITH): 4031 this.set("start", self._parse_bitwise()) 4032 if self._match_text_seq("INCREMENT", "BY"): 4033 this.set("increment", self._parse_bitwise()) 4034 if self._match_text_seq("MINVALUE"): 4035 this.set("minvalue", self._parse_bitwise()) 4036 if self._match_text_seq("MAXVALUE"): 4037 this.set("maxvalue", self._parse_bitwise()) 4038 4039 if self._match_text_seq("CYCLE"): 4040 this.set("cycle", True) 4041 elif self._match_text_seq("NO", "CYCLE"): 4042 this.set("cycle", False) 4043 4044 if not identity: 4045 this.set("expression", self._parse_bitwise()) 4046 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4047 args = self._parse_csv(self._parse_bitwise) 4048 this.set("start", seq_get(args, 0)) 4049 this.set("increment", seq_get(args, 1)) 4050 4051 self._match_r_paren() 4052 4053 return this 4054 4055 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4056 self._match_text_seq("LENGTH") 4057 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4058 4059 def _parse_not_constraint( 4060 self, 4061 ) -> t.Optional[exp.Expression]: 4062 if self._match_text_seq("NULL"): 4063 return self.expression(exp.NotNullColumnConstraint) 4064 if self._match_text_seq("CASESPECIFIC"): 4065 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4066 if self._match_text_seq("FOR", "REPLICATION"): 4067 return self.expression(exp.NotForReplicationColumnConstraint) 4068 return None 4069 4070 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4071 if self._match(TokenType.CONSTRAINT): 4072 this = self._parse_id_var() 4073 else: 4074 this = None 4075 4076 if self._match_texts(self.CONSTRAINT_PARSERS): 4077 return self.expression( 4078 exp.ColumnConstraint, 4079 this=this, 4080 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4081 ) 4082 4083 return this 4084 4085 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4086 if not self._match(TokenType.CONSTRAINT): 4087 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4088 4089 this = self._parse_id_var() 4090 expressions = [] 4091 4092 while True: 4093 constraint = self._parse_unnamed_constraint() or self._parse_function() 4094 if not constraint: 4095 break 4096 expressions.append(constraint) 4097 4098 return self.expression(exp.Constraint, this=this, expressions=expressions) 4099 4100 def _parse_unnamed_constraint( 4101 self, constraints: t.Optional[t.Collection[str]] = None 4102 ) -> t.Optional[exp.Expression]: 4103 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4104 constraints or self.CONSTRAINT_PARSERS 4105 ): 4106 return None 4107 4108 constraint = self._prev.text.upper() 4109 if constraint not in self.CONSTRAINT_PARSERS: 4110 self.raise_error(f"No parser found for schema constraint {constraint}.") 4111 4112 return self.CONSTRAINT_PARSERS[constraint](self) 4113 4114 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4115 self._match_text_seq("KEY") 4116 return self.expression( 4117 exp.UniqueColumnConstraint, 4118 this=self._parse_schema(self._parse_id_var(any_token=False)), 4119 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4120 ) 4121 4122 def _parse_key_constraint_options(self) -> t.List[str]: 4123 options = [] 4124 while True: 4125 if not self._curr: 4126 break 4127 4128 if self._match(TokenType.ON): 4129 action = None 4130 on = self._advance_any() and self._prev.text 4131 4132 if self._match_text_seq("NO", "ACTION"): 4133 action = "NO ACTION" 4134 elif self._match_text_seq("CASCADE"): 4135 action = "CASCADE" 4136 elif self._match_text_seq("RESTRICT"): 4137 action = "RESTRICT" 4138 elif self._match_pair(TokenType.SET, TokenType.NULL): 4139 action = "SET NULL" 4140 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4141 action = "SET DEFAULT" 4142 else: 4143 self.raise_error("Invalid key constraint") 4144 4145 options.append(f"ON {on} {action}") 4146 elif self._match_text_seq("NOT", "ENFORCED"): 4147 options.append("NOT ENFORCED") 4148 elif self._match_text_seq("DEFERRABLE"): 4149 options.append("DEFERRABLE") 4150 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4151 options.append("INITIALLY DEFERRED") 4152 elif self._match_text_seq("NORELY"): 4153 options.append("NORELY") 4154 elif self._match_text_seq("MATCH", "FULL"): 4155 options.append("MATCH FULL") 4156 else: 4157 break 4158 4159 return options 4160 4161 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4162 if match and not self._match(TokenType.REFERENCES): 4163 return None 4164 4165 expressions = None 4166 this = self._parse_table(schema=True) 4167 options = self._parse_key_constraint_options() 4168 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4169 4170 def _parse_foreign_key(self) -> exp.ForeignKey: 4171 expressions = self._parse_wrapped_id_vars() 4172 reference = self._parse_references() 4173 options = {} 4174 4175 while self._match(TokenType.ON): 4176 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4177 self.raise_error("Expected DELETE or UPDATE") 4178 4179 kind = self._prev.text.lower() 4180 4181 if self._match_text_seq("NO", "ACTION"): 4182 action = "NO ACTION" 4183 elif self._match(TokenType.SET): 4184 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4185 action = "SET " + self._prev.text.upper() 4186 else: 4187 self._advance() 4188 action = self._prev.text.upper() 4189 4190 options[kind] = action 4191 4192 return self.expression( 4193 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4194 ) 4195 4196 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4197 return self._parse_field() 4198 4199 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4200 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4201 4202 id_vars = self._parse_wrapped_id_vars() 4203 return self.expression( 4204 exp.PeriodForSystemTimeConstraint, 4205 this=seq_get(id_vars, 0), 4206 expression=seq_get(id_vars, 1), 4207 ) 4208 4209 def _parse_primary_key( 4210 self, wrapped_optional: bool = False, in_props: bool = False 4211 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4212 desc = ( 4213 self._match_set((TokenType.ASC, TokenType.DESC)) 4214 and self._prev.token_type == TokenType.DESC 4215 ) 4216 4217 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4218 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4219 4220 expressions = self._parse_wrapped_csv( 4221 self._parse_primary_key_part, optional=wrapped_optional 4222 ) 4223 options = self._parse_key_constraint_options() 4224 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4225 4226 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4227 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4228 return this 4229 4230 bracket_kind = self._prev.token_type 4231 4232 if self._match(TokenType.COLON): 4233 expressions: t.List[exp.Expression] = [ 4234 self.expression(exp.Slice, expression=self._parse_conjunction()) 4235 ] 4236 else: 4237 expressions = self._parse_csv( 4238 lambda: self._parse_slice( 4239 self._parse_alias(self._parse_conjunction(), explicit=True) 4240 ) 4241 ) 4242 4243 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4244 self.raise_error("Expected ]") 4245 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4246 self.raise_error("Expected }") 4247 4248 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4249 if bracket_kind == TokenType.L_BRACE: 4250 this = self.expression(exp.Struct, expressions=expressions) 4251 elif not this or this.name.upper() == "ARRAY": 4252 this = self.expression(exp.Array, expressions=expressions) 4253 else: 4254 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4255 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4256 4257 self._add_comments(this) 4258 return self._parse_bracket(this) 4259 4260 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4261 if self._match(TokenType.COLON): 4262 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4263 return this 4264 4265 def _parse_case(self) -> t.Optional[exp.Expression]: 4266 ifs = [] 4267 default = None 4268 4269 comments = self._prev_comments 4270 expression = self._parse_conjunction() 4271 4272 while self._match(TokenType.WHEN): 4273 this = self._parse_conjunction() 4274 self._match(TokenType.THEN) 4275 then = self._parse_conjunction() 4276 ifs.append(self.expression(exp.If, this=this, true=then)) 4277 4278 if self._match(TokenType.ELSE): 4279 default = self._parse_conjunction() 4280 4281 if not self._match(TokenType.END): 4282 self.raise_error("Expected END after CASE", self._prev) 4283 4284 return self._parse_window( 4285 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4286 ) 4287 4288 def _parse_if(self) -> t.Optional[exp.Expression]: 4289 if self._match(TokenType.L_PAREN): 4290 args = self._parse_csv(self._parse_conjunction) 4291 this = self.validate_expression(exp.If.from_arg_list(args), args) 4292 self._match_r_paren() 4293 else: 4294 index = self._index - 1 4295 condition = self._parse_conjunction() 4296 4297 if not condition: 4298 self._retreat(index) 4299 return None 4300 4301 self._match(TokenType.THEN) 4302 true = self._parse_conjunction() 4303 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4304 self._match(TokenType.END) 4305 this = self.expression(exp.If, this=condition, true=true, false=false) 4306 4307 return self._parse_window(this) 4308 4309 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4310 if not self._match_text_seq("VALUE", "FOR"): 4311 self._retreat(self._index - 1) 4312 return None 4313 4314 return self.expression( 4315 exp.NextValueFor, 4316 this=self._parse_column(), 4317 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4318 ) 4319 4320 def _parse_extract(self) -> exp.Extract: 4321 this = self._parse_function() or self._parse_var() or self._parse_type() 4322 4323 if self._match(TokenType.FROM): 4324 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4325 4326 if not self._match(TokenType.COMMA): 4327 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4328 4329 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4330 4331 def _parse_any_value(self) -> exp.AnyValue: 4332 this = self._parse_lambda() 4333 is_max = None 4334 having = None 4335 4336 if self._match(TokenType.HAVING): 4337 self._match_texts(("MAX", "MIN")) 4338 is_max = self._prev.text == "MAX" 4339 having = self._parse_column() 4340 4341 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4342 4343 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4344 this = self._parse_conjunction() 4345 4346 if not self._match(TokenType.ALIAS): 4347 if self._match(TokenType.COMMA): 4348 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4349 4350 self.raise_error("Expected AS after CAST") 4351 4352 fmt = None 4353 to = self._parse_types() 4354 4355 if self._match(TokenType.FORMAT): 4356 fmt_string = self._parse_string() 4357 fmt = self._parse_at_time_zone(fmt_string) 4358 4359 if not to: 4360 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4361 if to.this in exp.DataType.TEMPORAL_TYPES: 4362 this = self.expression( 4363 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4364 this=this, 4365 format=exp.Literal.string( 4366 format_time( 4367 fmt_string.this if fmt_string else "", 4368 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4369 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4370 ) 4371 ), 4372 ) 4373 4374 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4375 this.set("zone", fmt.args["zone"]) 4376 return this 4377 elif not to: 4378 self.raise_error("Expected TYPE after CAST") 4379 elif isinstance(to, exp.Identifier): 4380 to = exp.DataType.build(to.name, udt=True) 4381 elif to.this == exp.DataType.Type.CHAR: 4382 if self._match(TokenType.CHARACTER_SET): 4383 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4384 4385 return self.expression( 4386 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4387 ) 4388 4389 def _parse_string_agg(self) -> exp.Expression: 4390 if self._match(TokenType.DISTINCT): 4391 args: t.List[t.Optional[exp.Expression]] = [ 4392 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4393 ] 4394 if self._match(TokenType.COMMA): 4395 args.extend(self._parse_csv(self._parse_conjunction)) 4396 else: 4397 args = self._parse_csv(self._parse_conjunction) # type: ignore 4398 4399 index = self._index 4400 if not self._match(TokenType.R_PAREN) and args: 4401 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4402 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4403 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4404 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4405 4406 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4407 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4408 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4409 if not self._match_text_seq("WITHIN", "GROUP"): 4410 self._retreat(index) 4411 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4412 4413 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4414 order = self._parse_order(this=seq_get(args, 0)) 4415 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4416 4417 def _parse_convert( 4418 self, strict: bool, safe: t.Optional[bool] = None 4419 ) -> t.Optional[exp.Expression]: 4420 this = self._parse_bitwise() 4421 4422 if self._match(TokenType.USING): 4423 to: t.Optional[exp.Expression] = self.expression( 4424 exp.CharacterSet, this=self._parse_var() 4425 ) 4426 elif self._match(TokenType.COMMA): 4427 to = self._parse_types() 4428 else: 4429 to = None 4430 4431 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4432 4433 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4434 """ 4435 There are generally two variants of the DECODE function: 4436 4437 - DECODE(bin, charset) 4438 - DECODE(expression, search, result [, search, result] ... [, default]) 4439 4440 The second variant will always be parsed into a CASE expression. Note that NULL 4441 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4442 instead of relying on pattern matching. 4443 """ 4444 args = self._parse_csv(self._parse_conjunction) 4445 4446 if len(args) < 3: 4447 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4448 4449 expression, *expressions = args 4450 if not expression: 4451 return None 4452 4453 ifs = [] 4454 for search, result in zip(expressions[::2], expressions[1::2]): 4455 if not search or not result: 4456 return None 4457 4458 if isinstance(search, exp.Literal): 4459 ifs.append( 4460 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4461 ) 4462 elif isinstance(search, exp.Null): 4463 ifs.append( 4464 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4465 ) 4466 else: 4467 cond = exp.or_( 4468 exp.EQ(this=expression.copy(), expression=search), 4469 exp.and_( 4470 exp.Is(this=expression.copy(), expression=exp.Null()), 4471 exp.Is(this=search.copy(), expression=exp.Null()), 4472 copy=False, 4473 ), 4474 copy=False, 4475 ) 4476 ifs.append(exp.If(this=cond, true=result)) 4477 4478 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4479 4480 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4481 self._match_text_seq("KEY") 4482 key = self._parse_column() 4483 self._match_set((TokenType.COLON, TokenType.COMMA)) 4484 self._match_text_seq("VALUE") 4485 value = self._parse_bitwise() 4486 4487 if not key and not value: 4488 return None 4489 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4490 4491 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4492 if not this or not self._match_text_seq("FORMAT", "JSON"): 4493 return this 4494 4495 return self.expression(exp.FormatJson, this=this) 4496 4497 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4498 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4499 for value in values: 4500 if self._match_text_seq(value, "ON", on): 4501 return f"{value} ON {on}" 4502 4503 return None 4504 4505 def _parse_json_object(self) -> exp.JSONObject: 4506 star = self._parse_star() 4507 expressions = ( 4508 [star] 4509 if star 4510 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4511 ) 4512 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4513 4514 unique_keys = None 4515 if self._match_text_seq("WITH", "UNIQUE"): 4516 unique_keys = True 4517 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4518 unique_keys = False 4519 4520 self._match_text_seq("KEYS") 4521 4522 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4523 self._parse_type() 4524 ) 4525 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4526 4527 return self.expression( 4528 exp.JSONObject, 4529 expressions=expressions, 4530 null_handling=null_handling, 4531 unique_keys=unique_keys, 4532 return_type=return_type, 4533 encoding=encoding, 4534 ) 4535 4536 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4537 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4538 if not self._match_text_seq("NESTED"): 4539 this = self._parse_id_var() 4540 kind = self._parse_types(allow_identifiers=False) 4541 nested = None 4542 else: 4543 this = None 4544 kind = None 4545 nested = True 4546 4547 path = self._match_text_seq("PATH") and self._parse_string() 4548 nested_schema = nested and self._parse_json_schema() 4549 4550 return self.expression( 4551 exp.JSONColumnDef, 4552 this=this, 4553 kind=kind, 4554 path=path, 4555 nested_schema=nested_schema, 4556 ) 4557 4558 def _parse_json_schema(self) -> exp.JSONSchema: 4559 self._match_text_seq("COLUMNS") 4560 return self.expression( 4561 exp.JSONSchema, 4562 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4563 ) 4564 4565 def _parse_json_table(self) -> exp.JSONTable: 4566 this = self._parse_format_json(self._parse_bitwise()) 4567 path = self._match(TokenType.COMMA) and self._parse_string() 4568 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4569 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4570 schema = self._parse_json_schema() 4571 4572 return exp.JSONTable( 4573 this=this, 4574 schema=schema, 4575 path=path, 4576 error_handling=error_handling, 4577 empty_handling=empty_handling, 4578 ) 4579 4580 def _parse_match_against(self) -> exp.MatchAgainst: 4581 expressions = self._parse_csv(self._parse_column) 4582 4583 self._match_text_seq(")", "AGAINST", "(") 4584 4585 this = self._parse_string() 4586 4587 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4588 modifier = "IN NATURAL LANGUAGE MODE" 4589 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4590 modifier = f"{modifier} WITH QUERY EXPANSION" 4591 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4592 modifier = "IN BOOLEAN MODE" 4593 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4594 modifier = "WITH QUERY EXPANSION" 4595 else: 4596 modifier = None 4597 4598 return self.expression( 4599 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4600 ) 4601 4602 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4603 def _parse_open_json(self) -> exp.OpenJSON: 4604 this = self._parse_bitwise() 4605 path = self._match(TokenType.COMMA) and self._parse_string() 4606 4607 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4608 this = self._parse_field(any_token=True) 4609 kind = self._parse_types() 4610 path = self._parse_string() 4611 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4612 4613 return self.expression( 4614 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4615 ) 4616 4617 expressions = None 4618 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4619 self._match_l_paren() 4620 expressions = self._parse_csv(_parse_open_json_column_def) 4621 4622 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4623 4624 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4625 args = self._parse_csv(self._parse_bitwise) 4626 4627 if self._match(TokenType.IN): 4628 return self.expression( 4629 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4630 ) 4631 4632 if haystack_first: 4633 haystack = seq_get(args, 0) 4634 needle = seq_get(args, 1) 4635 else: 4636 needle = seq_get(args, 0) 4637 haystack = seq_get(args, 1) 4638 4639 return self.expression( 4640 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4641 ) 4642 4643 def _parse_predict(self) -> exp.Predict: 4644 self._match_text_seq("MODEL") 4645 this = self._parse_table() 4646 4647 self._match(TokenType.COMMA) 4648 self._match_text_seq("TABLE") 4649 4650 return self.expression( 4651 exp.Predict, 4652 this=this, 4653 expression=self._parse_table(), 4654 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4655 ) 4656 4657 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4658 args = self._parse_csv(self._parse_table) 4659 return exp.JoinHint(this=func_name.upper(), expressions=args) 4660 4661 def _parse_substring(self) -> exp.Substring: 4662 # Postgres supports the form: substring(string [from int] [for int]) 4663 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4664 4665 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4666 4667 if self._match(TokenType.FROM): 4668 args.append(self._parse_bitwise()) 4669 if self._match(TokenType.FOR): 4670 args.append(self._parse_bitwise()) 4671 4672 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4673 4674 def _parse_trim(self) -> exp.Trim: 4675 # https://www.w3resource.com/sql/character-functions/trim.php 4676 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4677 4678 position = None 4679 collation = None 4680 expression = None 4681 4682 if self._match_texts(self.TRIM_TYPES): 4683 position = self._prev.text.upper() 4684 4685 this = self._parse_bitwise() 4686 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4687 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4688 expression = self._parse_bitwise() 4689 4690 if invert_order: 4691 this, expression = expression, this 4692 4693 if self._match(TokenType.COLLATE): 4694 collation = self._parse_bitwise() 4695 4696 return self.expression( 4697 exp.Trim, this=this, position=position, expression=expression, collation=collation 4698 ) 4699 4700 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4701 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4702 4703 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4704 return self._parse_window(self._parse_id_var(), alias=True) 4705 4706 def _parse_respect_or_ignore_nulls( 4707 self, this: t.Optional[exp.Expression] 4708 ) -> t.Optional[exp.Expression]: 4709 if self._match_text_seq("IGNORE", "NULLS"): 4710 return self.expression(exp.IgnoreNulls, this=this) 4711 if self._match_text_seq("RESPECT", "NULLS"): 4712 return self.expression(exp.RespectNulls, this=this) 4713 return this 4714 4715 def _parse_window( 4716 self, this: t.Optional[exp.Expression], alias: bool = False 4717 ) -> t.Optional[exp.Expression]: 4718 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4719 self._match(TokenType.WHERE) 4720 this = self.expression( 4721 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4722 ) 4723 self._match_r_paren() 4724 4725 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4726 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4727 if self._match_text_seq("WITHIN", "GROUP"): 4728 order = self._parse_wrapped(self._parse_order) 4729 this = self.expression(exp.WithinGroup, this=this, expression=order) 4730 4731 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4732 # Some dialects choose to implement and some do not. 4733 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4734 4735 # There is some code above in _parse_lambda that handles 4736 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4737 4738 # The below changes handle 4739 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4740 4741 # Oracle allows both formats 4742 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4743 # and Snowflake chose to do the same for familiarity 4744 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4745 this = self._parse_respect_or_ignore_nulls(this) 4746 4747 # bigquery select from window x AS (partition by ...) 4748 if alias: 4749 over = None 4750 self._match(TokenType.ALIAS) 4751 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4752 return this 4753 else: 4754 over = self._prev.text.upper() 4755 4756 if not self._match(TokenType.L_PAREN): 4757 return self.expression( 4758 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4759 ) 4760 4761 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4762 4763 first = self._match(TokenType.FIRST) 4764 if self._match_text_seq("LAST"): 4765 first = False 4766 4767 partition, order = self._parse_partition_and_order() 4768 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4769 4770 if kind: 4771 self._match(TokenType.BETWEEN) 4772 start = self._parse_window_spec() 4773 self._match(TokenType.AND) 4774 end = self._parse_window_spec() 4775 4776 spec = self.expression( 4777 exp.WindowSpec, 4778 kind=kind, 4779 start=start["value"], 4780 start_side=start["side"], 4781 end=end["value"], 4782 end_side=end["side"], 4783 ) 4784 else: 4785 spec = None 4786 4787 self._match_r_paren() 4788 4789 window = self.expression( 4790 exp.Window, 4791 this=this, 4792 partition_by=partition, 4793 order=order, 4794 spec=spec, 4795 alias=window_alias, 4796 over=over, 4797 first=first, 4798 ) 4799 4800 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4801 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4802 return self._parse_window(window, alias=alias) 4803 4804 return window 4805 4806 def _parse_partition_and_order( 4807 self, 4808 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4809 return self._parse_partition_by(), self._parse_order() 4810 4811 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4812 self._match(TokenType.BETWEEN) 4813 4814 return { 4815 "value": ( 4816 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4817 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4818 or self._parse_bitwise() 4819 ), 4820 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4821 } 4822 4823 def _parse_alias( 4824 self, this: t.Optional[exp.Expression], explicit: bool = False 4825 ) -> t.Optional[exp.Expression]: 4826 any_token = self._match(TokenType.ALIAS) 4827 comments = self._prev_comments 4828 4829 if explicit and not any_token: 4830 return this 4831 4832 if self._match(TokenType.L_PAREN): 4833 aliases = self.expression( 4834 exp.Aliases, 4835 comments=comments, 4836 this=this, 4837 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4838 ) 4839 self._match_r_paren(aliases) 4840 return aliases 4841 4842 alias = self._parse_id_var(any_token) 4843 4844 if alias: 4845 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4846 4847 return this 4848 4849 def _parse_id_var( 4850 self, 4851 any_token: bool = True, 4852 tokens: t.Optional[t.Collection[TokenType]] = None, 4853 ) -> t.Optional[exp.Expression]: 4854 identifier = self._parse_identifier() 4855 4856 if identifier: 4857 return identifier 4858 4859 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4860 quoted = self._prev.token_type == TokenType.STRING 4861 return exp.Identifier(this=self._prev.text, quoted=quoted) 4862 4863 return None 4864 4865 def _parse_string(self) -> t.Optional[exp.Expression]: 4866 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4867 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4868 return self._parse_placeholder() 4869 4870 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4871 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4872 4873 def _parse_number(self) -> t.Optional[exp.Expression]: 4874 if self._match(TokenType.NUMBER): 4875 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4876 return self._parse_placeholder() 4877 4878 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4879 if self._match(TokenType.IDENTIFIER): 4880 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4881 return self._parse_placeholder() 4882 4883 def _parse_var( 4884 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4885 ) -> t.Optional[exp.Expression]: 4886 if ( 4887 (any_token and self._advance_any()) 4888 or self._match(TokenType.VAR) 4889 or (self._match_set(tokens) if tokens else False) 4890 ): 4891 return self.expression(exp.Var, this=self._prev.text) 4892 return self._parse_placeholder() 4893 4894 def _advance_any(self) -> t.Optional[Token]: 4895 if self._curr and self._curr.token_type not in self.RESERVED_TOKENS: 4896 self._advance() 4897 return self._prev 4898 return None 4899 4900 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4901 return self._parse_var() or self._parse_string() 4902 4903 def _parse_null(self) -> t.Optional[exp.Expression]: 4904 if self._match_set(self.NULL_TOKENS): 4905 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4906 return self._parse_placeholder() 4907 4908 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4909 if self._match(TokenType.TRUE): 4910 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4911 if self._match(TokenType.FALSE): 4912 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4913 return self._parse_placeholder() 4914 4915 def _parse_star(self) -> t.Optional[exp.Expression]: 4916 if self._match(TokenType.STAR): 4917 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4918 return self._parse_placeholder() 4919 4920 def _parse_parameter(self) -> exp.Parameter: 4921 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4922 return ( 4923 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4924 ) 4925 4926 self._match(TokenType.L_BRACE) 4927 this = _parse_parameter_part() 4928 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4929 self._match(TokenType.R_BRACE) 4930 4931 return self.expression(exp.Parameter, this=this, expression=expression) 4932 4933 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4934 if self._match_set(self.PLACEHOLDER_PARSERS): 4935 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4936 if placeholder: 4937 return placeholder 4938 self._advance(-1) 4939 return None 4940 4941 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4942 if not self._match(TokenType.EXCEPT): 4943 return None 4944 if self._match(TokenType.L_PAREN, advance=False): 4945 return self._parse_wrapped_csv(self._parse_column) 4946 4947 except_column = self._parse_column() 4948 return [except_column] if except_column else None 4949 4950 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4951 if not self._match(TokenType.REPLACE): 4952 return None 4953 if self._match(TokenType.L_PAREN, advance=False): 4954 return self._parse_wrapped_csv(self._parse_expression) 4955 4956 replace_expression = self._parse_expression() 4957 return [replace_expression] if replace_expression else None 4958 4959 def _parse_csv( 4960 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4961 ) -> t.List[exp.Expression]: 4962 parse_result = parse_method() 4963 items = [parse_result] if parse_result is not None else [] 4964 4965 while self._match(sep): 4966 self._add_comments(parse_result) 4967 parse_result = parse_method() 4968 if parse_result is not None: 4969 items.append(parse_result) 4970 4971 return items 4972 4973 def _parse_tokens( 4974 self, parse_method: t.Callable, expressions: t.Dict 4975 ) -> t.Optional[exp.Expression]: 4976 this = parse_method() 4977 4978 while self._match_set(expressions): 4979 this = self.expression( 4980 expressions[self._prev.token_type], 4981 this=this, 4982 comments=self._prev_comments, 4983 expression=parse_method(), 4984 ) 4985 4986 return this 4987 4988 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4989 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4990 4991 def _parse_wrapped_csv( 4992 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4993 ) -> t.List[exp.Expression]: 4994 return self._parse_wrapped( 4995 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4996 ) 4997 4998 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4999 wrapped = self._match(TokenType.L_PAREN) 5000 if not wrapped and not optional: 5001 self.raise_error("Expecting (") 5002 parse_result = parse_method() 5003 if wrapped: 5004 self._match_r_paren() 5005 return parse_result 5006 5007 def _parse_expressions(self) -> t.List[exp.Expression]: 5008 return self._parse_csv(self._parse_expression) 5009 5010 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5011 return self._parse_select() or self._parse_set_operations( 5012 self._parse_expression() if alias else self._parse_conjunction() 5013 ) 5014 5015 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5016 return self._parse_query_modifiers( 5017 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5018 ) 5019 5020 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5021 this = None 5022 if self._match_texts(self.TRANSACTION_KIND): 5023 this = self._prev.text 5024 5025 self._match_texts(("TRANSACTION", "WORK")) 5026 5027 modes = [] 5028 while True: 5029 mode = [] 5030 while self._match(TokenType.VAR): 5031 mode.append(self._prev.text) 5032 5033 if mode: 5034 modes.append(" ".join(mode)) 5035 if not self._match(TokenType.COMMA): 5036 break 5037 5038 return self.expression(exp.Transaction, this=this, modes=modes) 5039 5040 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5041 chain = None 5042 savepoint = None 5043 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5044 5045 self._match_texts(("TRANSACTION", "WORK")) 5046 5047 if self._match_text_seq("TO"): 5048 self._match_text_seq("SAVEPOINT") 5049 savepoint = self._parse_id_var() 5050 5051 if self._match(TokenType.AND): 5052 chain = not self._match_text_seq("NO") 5053 self._match_text_seq("CHAIN") 5054 5055 if is_rollback: 5056 return self.expression(exp.Rollback, savepoint=savepoint) 5057 5058 return self.expression(exp.Commit, chain=chain) 5059 5060 def _parse_refresh(self) -> exp.Refresh: 5061 self._match(TokenType.TABLE) 5062 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5063 5064 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5065 if not self._match_text_seq("ADD"): 5066 return None 5067 5068 self._match(TokenType.COLUMN) 5069 exists_column = self._parse_exists(not_=True) 5070 expression = self._parse_field_def() 5071 5072 if expression: 5073 expression.set("exists", exists_column) 5074 5075 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5076 if self._match_texts(("FIRST", "AFTER")): 5077 position = self._prev.text 5078 column_position = self.expression( 5079 exp.ColumnPosition, this=self._parse_column(), position=position 5080 ) 5081 expression.set("position", column_position) 5082 5083 return expression 5084 5085 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5086 drop = self._match(TokenType.DROP) and self._parse_drop() 5087 if drop and not isinstance(drop, exp.Command): 5088 drop.set("kind", drop.args.get("kind", "COLUMN")) 5089 return drop 5090 5091 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5092 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5093 return self.expression( 5094 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5095 ) 5096 5097 def _parse_add_constraint(self) -> exp.AddConstraint: 5098 this = None 5099 kind = self._prev.token_type 5100 5101 if kind == TokenType.CONSTRAINT: 5102 this = self._parse_id_var() 5103 5104 if self._match_text_seq("CHECK"): 5105 expression = self._parse_wrapped(self._parse_conjunction) 5106 enforced = self._match_text_seq("ENFORCED") 5107 5108 return self.expression( 5109 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5110 ) 5111 5112 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5113 expression = self._parse_foreign_key() 5114 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5115 expression = self._parse_primary_key() 5116 else: 5117 expression = None 5118 5119 return self.expression(exp.AddConstraint, this=this, expression=expression) 5120 5121 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5122 index = self._index - 1 5123 5124 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5125 return self._parse_csv(self._parse_add_constraint) 5126 5127 self._retreat(index) 5128 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5129 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5130 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5131 5132 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5133 self._match(TokenType.COLUMN) 5134 column = self._parse_field(any_token=True) 5135 5136 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5137 return self.expression(exp.AlterColumn, this=column, drop=True) 5138 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5139 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5140 5141 self._match_text_seq("SET", "DATA") 5142 return self.expression( 5143 exp.AlterColumn, 5144 this=column, 5145 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5146 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5147 using=self._match(TokenType.USING) and self._parse_conjunction(), 5148 ) 5149 5150 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5151 index = self._index - 1 5152 5153 partition_exists = self._parse_exists() 5154 if self._match(TokenType.PARTITION, advance=False): 5155 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5156 5157 self._retreat(index) 5158 return self._parse_csv(self._parse_drop_column) 5159 5160 def _parse_alter_table_rename(self) -> exp.RenameTable: 5161 self._match_text_seq("TO") 5162 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5163 5164 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5165 start = self._prev 5166 5167 if not self._match(TokenType.TABLE): 5168 return self._parse_as_command(start) 5169 5170 exists = self._parse_exists() 5171 only = self._match_text_seq("ONLY") 5172 this = self._parse_table(schema=True) 5173 5174 if self._next: 5175 self._advance() 5176 5177 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5178 if parser: 5179 actions = ensure_list(parser(self)) 5180 5181 if not self._curr: 5182 return self.expression( 5183 exp.AlterTable, 5184 this=this, 5185 exists=exists, 5186 actions=actions, 5187 only=only, 5188 ) 5189 5190 return self._parse_as_command(start) 5191 5192 def _parse_merge(self) -> exp.Merge: 5193 self._match(TokenType.INTO) 5194 target = self._parse_table() 5195 5196 if target and self._match(TokenType.ALIAS, advance=False): 5197 target.set("alias", self._parse_table_alias()) 5198 5199 self._match(TokenType.USING) 5200 using = self._parse_table() 5201 5202 self._match(TokenType.ON) 5203 on = self._parse_conjunction() 5204 5205 return self.expression( 5206 exp.Merge, 5207 this=target, 5208 using=using, 5209 on=on, 5210 expressions=self._parse_when_matched(), 5211 ) 5212 5213 def _parse_when_matched(self) -> t.List[exp.When]: 5214 whens = [] 5215 5216 while self._match(TokenType.WHEN): 5217 matched = not self._match(TokenType.NOT) 5218 self._match_text_seq("MATCHED") 5219 source = ( 5220 False 5221 if self._match_text_seq("BY", "TARGET") 5222 else self._match_text_seq("BY", "SOURCE") 5223 ) 5224 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5225 5226 self._match(TokenType.THEN) 5227 5228 if self._match(TokenType.INSERT): 5229 _this = self._parse_star() 5230 if _this: 5231 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5232 else: 5233 then = self.expression( 5234 exp.Insert, 5235 this=self._parse_value(), 5236 expression=self._match(TokenType.VALUES) and self._parse_value(), 5237 ) 5238 elif self._match(TokenType.UPDATE): 5239 expressions = self._parse_star() 5240 if expressions: 5241 then = self.expression(exp.Update, expressions=expressions) 5242 else: 5243 then = self.expression( 5244 exp.Update, 5245 expressions=self._match(TokenType.SET) 5246 and self._parse_csv(self._parse_equality), 5247 ) 5248 elif self._match(TokenType.DELETE): 5249 then = self.expression(exp.Var, this=self._prev.text) 5250 else: 5251 then = None 5252 5253 whens.append( 5254 self.expression( 5255 exp.When, 5256 matched=matched, 5257 source=source, 5258 condition=condition, 5259 then=then, 5260 ) 5261 ) 5262 return whens 5263 5264 def _parse_show(self) -> t.Optional[exp.Expression]: 5265 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5266 if parser: 5267 return parser(self) 5268 return self._parse_as_command(self._prev) 5269 5270 def _parse_set_item_assignment( 5271 self, kind: t.Optional[str] = None 5272 ) -> t.Optional[exp.Expression]: 5273 index = self._index 5274 5275 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5276 return self._parse_set_transaction(global_=kind == "GLOBAL") 5277 5278 left = self._parse_primary() or self._parse_id_var() 5279 assignment_delimiter = self._match_texts(("=", "TO")) 5280 5281 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5282 self._retreat(index) 5283 return None 5284 5285 right = self._parse_statement() or self._parse_id_var() 5286 this = self.expression(exp.EQ, this=left, expression=right) 5287 5288 return self.expression(exp.SetItem, this=this, kind=kind) 5289 5290 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5291 self._match_text_seq("TRANSACTION") 5292 characteristics = self._parse_csv( 5293 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5294 ) 5295 return self.expression( 5296 exp.SetItem, 5297 expressions=characteristics, 5298 kind="TRANSACTION", 5299 **{"global": global_}, # type: ignore 5300 ) 5301 5302 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5303 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5304 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5305 5306 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5307 index = self._index 5308 set_ = self.expression( 5309 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5310 ) 5311 5312 if self._curr: 5313 self._retreat(index) 5314 return self._parse_as_command(self._prev) 5315 5316 return set_ 5317 5318 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5319 for option in options: 5320 if self._match_text_seq(*option.split(" ")): 5321 return exp.var(option) 5322 return None 5323 5324 def _parse_as_command(self, start: Token) -> exp.Command: 5325 while self._curr: 5326 self._advance() 5327 text = self._find_sql(start, self._prev) 5328 size = len(start.text) 5329 return exp.Command(this=text[:size], expression=text[size:]) 5330 5331 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5332 settings = [] 5333 5334 self._match_l_paren() 5335 kind = self._parse_id_var() 5336 5337 if self._match(TokenType.L_PAREN): 5338 while True: 5339 key = self._parse_id_var() 5340 value = self._parse_primary() 5341 5342 if not key and value is None: 5343 break 5344 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5345 self._match(TokenType.R_PAREN) 5346 5347 self._match_r_paren() 5348 5349 return self.expression( 5350 exp.DictProperty, 5351 this=this, 5352 kind=kind.this if kind else None, 5353 settings=settings, 5354 ) 5355 5356 def _parse_dict_range(self, this: str) -> exp.DictRange: 5357 self._match_l_paren() 5358 has_min = self._match_text_seq("MIN") 5359 if has_min: 5360 min = self._parse_var() or self._parse_primary() 5361 self._match_text_seq("MAX") 5362 max = self._parse_var() or self._parse_primary() 5363 else: 5364 max = self._parse_var() or self._parse_primary() 5365 min = exp.Literal.number(0) 5366 self._match_r_paren() 5367 return self.expression(exp.DictRange, this=this, min=min, max=max) 5368 5369 def _parse_comprehension( 5370 self, this: t.Optional[exp.Expression] 5371 ) -> t.Optional[exp.Comprehension]: 5372 index = self._index 5373 expression = self._parse_column() 5374 if not self._match(TokenType.IN): 5375 self._retreat(index - 1) 5376 return None 5377 iterator = self._parse_column() 5378 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5379 return self.expression( 5380 exp.Comprehension, 5381 this=this, 5382 expression=expression, 5383 iterator=iterator, 5384 condition=condition, 5385 ) 5386 5387 def _find_parser( 5388 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5389 ) -> t.Optional[t.Callable]: 5390 if not self._curr: 5391 return None 5392 5393 index = self._index 5394 this = [] 5395 while True: 5396 # The current token might be multiple words 5397 curr = self._curr.text.upper() 5398 key = curr.split(" ") 5399 this.append(curr) 5400 5401 self._advance() 5402 result, trie = in_trie(trie, key) 5403 if result == TrieResult.FAILED: 5404 break 5405 5406 if result == TrieResult.EXISTS: 5407 subparser = parsers[" ".join(this)] 5408 return subparser 5409 5410 self._retreat(index) 5411 return None 5412 5413 def _match(self, token_type, advance=True, expression=None): 5414 if not self._curr: 5415 return None 5416 5417 if self._curr.token_type == token_type: 5418 if advance: 5419 self._advance() 5420 self._add_comments(expression) 5421 return True 5422 5423 return None 5424 5425 def _match_set(self, types, advance=True): 5426 if not self._curr: 5427 return None 5428 5429 if self._curr.token_type in types: 5430 if advance: 5431 self._advance() 5432 return True 5433 5434 return None 5435 5436 def _match_pair(self, token_type_a, token_type_b, advance=True): 5437 if not self._curr or not self._next: 5438 return None 5439 5440 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5441 if advance: 5442 self._advance(2) 5443 return True 5444 5445 return None 5446 5447 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5448 if not self._match(TokenType.L_PAREN, expression=expression): 5449 self.raise_error("Expecting (") 5450 5451 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5452 if not self._match(TokenType.R_PAREN, expression=expression): 5453 self.raise_error("Expecting )") 5454 5455 def _match_texts(self, texts, advance=True): 5456 if self._curr and self._curr.text.upper() in texts: 5457 if advance: 5458 self._advance() 5459 return True 5460 return False 5461 5462 def _match_text_seq(self, *texts, advance=True): 5463 index = self._index 5464 for text in texts: 5465 if self._curr and self._curr.text.upper() == text: 5466 self._advance() 5467 else: 5468 self._retreat(index) 5469 return False 5470 5471 if not advance: 5472 self._retreat(index) 5473 5474 return True 5475 5476 @t.overload 5477 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5478 ... 5479 5480 @t.overload 5481 def _replace_columns_with_dots( 5482 self, this: t.Optional[exp.Expression] 5483 ) -> t.Optional[exp.Expression]: 5484 ... 5485 5486 def _replace_columns_with_dots(self, this): 5487 if isinstance(this, exp.Dot): 5488 exp.replace_children(this, self._replace_columns_with_dots) 5489 elif isinstance(this, exp.Column): 5490 exp.replace_children(this, self._replace_columns_with_dots) 5491 table = this.args.get("table") 5492 this = ( 5493 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5494 ) 5495 5496 return this 5497 5498 def _replace_lambda( 5499 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5500 ) -> t.Optional[exp.Expression]: 5501 if not node: 5502 return node 5503 5504 for column in node.find_all(exp.Column): 5505 if column.parts[0].name in lambda_variables: 5506 dot_or_id = column.to_dot() if column.table else column.this 5507 parent = column.parent 5508 5509 while isinstance(parent, exp.Dot): 5510 if not isinstance(parent.parent, exp.Dot): 5511 parent.replace(dot_or_id) 5512 break 5513 parent = parent.parent 5514 else: 5515 if column is node: 5516 node = dot_or_id 5517 else: 5518 column.replace(dot_or_id) 5519 return node
22def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap( 33 keys=exp.Array(expressions=keys), 34 values=exp.Array(expressions=values), 35 )
51def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
74class Parser(metaclass=_Parser): 75 """ 76 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 77 78 Args: 79 error_level: The desired error level. 80 Default: ErrorLevel.IMMEDIATE 81 error_message_context: Determines the amount of context to capture from a 82 query string when displaying the error message (in number of characters). 83 Default: 100 84 max_errors: Maximum number of error messages to include in a raised ParseError. 85 This is only relevant if error_level is ErrorLevel.RAISE. 86 Default: 3 87 """ 88 89 FUNCTIONS: t.Dict[str, t.Callable] = { 90 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 91 "CONCAT": lambda args, dialect: exp.Concat( 92 expressions=args, 93 safe=not dialect.STRICT_STRING_CONCAT, 94 coalesce=dialect.CONCAT_COALESCE, 95 ), 96 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 97 expressions=args, 98 safe=not dialect.STRICT_STRING_CONCAT, 99 coalesce=dialect.CONCAT_COALESCE, 100 ), 101 "DATE_TO_DATE_STR": lambda args: exp.Cast( 102 this=seq_get(args, 0), 103 to=exp.DataType(this=exp.DataType.Type.TEXT), 104 ), 105 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 106 "LIKE": parse_like, 107 "LOG": parse_logarithm, 108 "TIME_TO_TIME_STR": lambda args: exp.Cast( 109 this=seq_get(args, 0), 110 to=exp.DataType(this=exp.DataType.Type.TEXT), 111 ), 112 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 113 this=exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 start=exp.Literal.number(1), 118 length=exp.Literal.number(10), 119 ), 120 "VAR_MAP": parse_var_map, 121 } 122 123 NO_PAREN_FUNCTIONS = { 124 TokenType.CURRENT_DATE: exp.CurrentDate, 125 TokenType.CURRENT_DATETIME: exp.CurrentDate, 126 TokenType.CURRENT_TIME: exp.CurrentTime, 127 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 128 TokenType.CURRENT_USER: exp.CurrentUser, 129 } 130 131 STRUCT_TYPE_TOKENS = { 132 TokenType.NESTED, 133 TokenType.STRUCT, 134 } 135 136 NESTED_TYPE_TOKENS = { 137 TokenType.ARRAY, 138 TokenType.LOWCARDINALITY, 139 TokenType.MAP, 140 TokenType.NULLABLE, 141 *STRUCT_TYPE_TOKENS, 142 } 143 144 ENUM_TYPE_TOKENS = { 145 TokenType.ENUM, 146 TokenType.ENUM8, 147 TokenType.ENUM16, 148 } 149 150 TYPE_TOKENS = { 151 TokenType.BIT, 152 TokenType.BOOLEAN, 153 TokenType.TINYINT, 154 TokenType.UTINYINT, 155 TokenType.SMALLINT, 156 TokenType.USMALLINT, 157 TokenType.INT, 158 TokenType.UINT, 159 TokenType.BIGINT, 160 TokenType.UBIGINT, 161 TokenType.INT128, 162 TokenType.UINT128, 163 TokenType.INT256, 164 TokenType.UINT256, 165 TokenType.MEDIUMINT, 166 TokenType.UMEDIUMINT, 167 TokenType.FIXEDSTRING, 168 TokenType.FLOAT, 169 TokenType.DOUBLE, 170 TokenType.CHAR, 171 TokenType.NCHAR, 172 TokenType.VARCHAR, 173 TokenType.NVARCHAR, 174 TokenType.TEXT, 175 TokenType.MEDIUMTEXT, 176 TokenType.LONGTEXT, 177 TokenType.MEDIUMBLOB, 178 TokenType.LONGBLOB, 179 TokenType.BINARY, 180 TokenType.VARBINARY, 181 TokenType.JSON, 182 TokenType.JSONB, 183 TokenType.INTERVAL, 184 TokenType.TINYBLOB, 185 TokenType.TINYTEXT, 186 TokenType.TIME, 187 TokenType.TIMETZ, 188 TokenType.TIMESTAMP, 189 TokenType.TIMESTAMP_S, 190 TokenType.TIMESTAMP_MS, 191 TokenType.TIMESTAMP_NS, 192 TokenType.TIMESTAMPTZ, 193 TokenType.TIMESTAMPLTZ, 194 TokenType.DATETIME, 195 TokenType.DATETIME64, 196 TokenType.DATE, 197 TokenType.INT4RANGE, 198 TokenType.INT4MULTIRANGE, 199 TokenType.INT8RANGE, 200 TokenType.INT8MULTIRANGE, 201 TokenType.NUMRANGE, 202 TokenType.NUMMULTIRANGE, 203 TokenType.TSRANGE, 204 TokenType.TSMULTIRANGE, 205 TokenType.TSTZRANGE, 206 TokenType.TSTZMULTIRANGE, 207 TokenType.DATERANGE, 208 TokenType.DATEMULTIRANGE, 209 TokenType.DECIMAL, 210 TokenType.UDECIMAL, 211 TokenType.BIGDECIMAL, 212 TokenType.UUID, 213 TokenType.GEOGRAPHY, 214 TokenType.GEOMETRY, 215 TokenType.HLLSKETCH, 216 TokenType.HSTORE, 217 TokenType.PSEUDO_TYPE, 218 TokenType.SUPER, 219 TokenType.SERIAL, 220 TokenType.SMALLSERIAL, 221 TokenType.BIGSERIAL, 222 TokenType.XML, 223 TokenType.YEAR, 224 TokenType.UNIQUEIDENTIFIER, 225 TokenType.USERDEFINED, 226 TokenType.MONEY, 227 TokenType.SMALLMONEY, 228 TokenType.ROWVERSION, 229 TokenType.IMAGE, 230 TokenType.VARIANT, 231 TokenType.OBJECT, 232 TokenType.OBJECT_IDENTIFIER, 233 TokenType.INET, 234 TokenType.IPADDRESS, 235 TokenType.IPPREFIX, 236 TokenType.UNKNOWN, 237 TokenType.NULL, 238 *ENUM_TYPE_TOKENS, 239 *NESTED_TYPE_TOKENS, 240 } 241 242 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 243 TokenType.BIGINT: TokenType.UBIGINT, 244 TokenType.INT: TokenType.UINT, 245 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 246 TokenType.SMALLINT: TokenType.USMALLINT, 247 TokenType.TINYINT: TokenType.UTINYINT, 248 TokenType.DECIMAL: TokenType.UDECIMAL, 249 } 250 251 SUBQUERY_PREDICATES = { 252 TokenType.ANY: exp.Any, 253 TokenType.ALL: exp.All, 254 TokenType.EXISTS: exp.Exists, 255 TokenType.SOME: exp.Any, 256 } 257 258 RESERVED_TOKENS = { 259 *Tokenizer.SINGLE_TOKENS.values(), 260 TokenType.SELECT, 261 } 262 263 DB_CREATABLES = { 264 TokenType.DATABASE, 265 TokenType.SCHEMA, 266 TokenType.TABLE, 267 TokenType.VIEW, 268 TokenType.MODEL, 269 TokenType.DICTIONARY, 270 } 271 272 CREATABLES = { 273 TokenType.COLUMN, 274 TokenType.CONSTRAINT, 275 TokenType.FUNCTION, 276 TokenType.INDEX, 277 TokenType.PROCEDURE, 278 TokenType.FOREIGN_KEY, 279 *DB_CREATABLES, 280 } 281 282 # Tokens that can represent identifiers 283 ID_VAR_TOKENS = { 284 TokenType.VAR, 285 TokenType.ANTI, 286 TokenType.APPLY, 287 TokenType.ASC, 288 TokenType.AUTO_INCREMENT, 289 TokenType.BEGIN, 290 TokenType.CACHE, 291 TokenType.CASE, 292 TokenType.COLLATE, 293 TokenType.COMMAND, 294 TokenType.COMMENT, 295 TokenType.COMMIT, 296 TokenType.CONSTRAINT, 297 TokenType.DEFAULT, 298 TokenType.DELETE, 299 TokenType.DESC, 300 TokenType.DESCRIBE, 301 TokenType.DICTIONARY, 302 TokenType.DIV, 303 TokenType.END, 304 TokenType.EXECUTE, 305 TokenType.ESCAPE, 306 TokenType.FALSE, 307 TokenType.FIRST, 308 TokenType.FILTER, 309 TokenType.FORMAT, 310 TokenType.FULL, 311 TokenType.IS, 312 TokenType.ISNULL, 313 TokenType.INTERVAL, 314 TokenType.KEEP, 315 TokenType.KILL, 316 TokenType.LEFT, 317 TokenType.LOAD, 318 TokenType.MERGE, 319 TokenType.NATURAL, 320 TokenType.NEXT, 321 TokenType.OFFSET, 322 TokenType.OPERATOR, 323 TokenType.ORDINALITY, 324 TokenType.OVERLAPS, 325 TokenType.OVERWRITE, 326 TokenType.PARTITION, 327 TokenType.PERCENT, 328 TokenType.PIVOT, 329 TokenType.PRAGMA, 330 TokenType.RANGE, 331 TokenType.RECURSIVE, 332 TokenType.REFERENCES, 333 TokenType.REFRESH, 334 TokenType.REPLACE, 335 TokenType.RIGHT, 336 TokenType.ROW, 337 TokenType.ROWS, 338 TokenType.SEMI, 339 TokenType.SET, 340 TokenType.SETTINGS, 341 TokenType.SHOW, 342 TokenType.TEMPORARY, 343 TokenType.TOP, 344 TokenType.TRUE, 345 TokenType.UNIQUE, 346 TokenType.UNPIVOT, 347 TokenType.UPDATE, 348 TokenType.USE, 349 TokenType.VOLATILE, 350 TokenType.WINDOW, 351 *CREATABLES, 352 *SUBQUERY_PREDICATES, 353 *TYPE_TOKENS, 354 *NO_PAREN_FUNCTIONS, 355 } 356 357 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 358 359 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 360 TokenType.ANTI, 361 TokenType.APPLY, 362 TokenType.ASOF, 363 TokenType.FULL, 364 TokenType.LEFT, 365 TokenType.LOCK, 366 TokenType.NATURAL, 367 TokenType.OFFSET, 368 TokenType.RIGHT, 369 TokenType.SEMI, 370 TokenType.WINDOW, 371 } 372 373 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 374 375 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 376 377 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 378 379 FUNC_TOKENS = { 380 TokenType.COLLATE, 381 TokenType.COMMAND, 382 TokenType.CURRENT_DATE, 383 TokenType.CURRENT_DATETIME, 384 TokenType.CURRENT_TIMESTAMP, 385 TokenType.CURRENT_TIME, 386 TokenType.CURRENT_USER, 387 TokenType.FILTER, 388 TokenType.FIRST, 389 TokenType.FORMAT, 390 TokenType.GLOB, 391 TokenType.IDENTIFIER, 392 TokenType.INDEX, 393 TokenType.ISNULL, 394 TokenType.ILIKE, 395 TokenType.INSERT, 396 TokenType.LIKE, 397 TokenType.MERGE, 398 TokenType.OFFSET, 399 TokenType.PRIMARY_KEY, 400 TokenType.RANGE, 401 TokenType.REPLACE, 402 TokenType.RLIKE, 403 TokenType.ROW, 404 TokenType.UNNEST, 405 TokenType.VAR, 406 TokenType.LEFT, 407 TokenType.RIGHT, 408 TokenType.DATE, 409 TokenType.DATETIME, 410 TokenType.TABLE, 411 TokenType.TIMESTAMP, 412 TokenType.TIMESTAMPTZ, 413 TokenType.WINDOW, 414 TokenType.XOR, 415 *TYPE_TOKENS, 416 *SUBQUERY_PREDICATES, 417 } 418 419 CONJUNCTION = { 420 TokenType.AND: exp.And, 421 TokenType.OR: exp.Or, 422 } 423 424 EQUALITY = { 425 TokenType.COLON_EQ: exp.PropertyEQ, 426 TokenType.EQ: exp.EQ, 427 TokenType.NEQ: exp.NEQ, 428 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 429 } 430 431 COMPARISON = { 432 TokenType.GT: exp.GT, 433 TokenType.GTE: exp.GTE, 434 TokenType.LT: exp.LT, 435 TokenType.LTE: exp.LTE, 436 } 437 438 BITWISE = { 439 TokenType.AMP: exp.BitwiseAnd, 440 TokenType.CARET: exp.BitwiseXor, 441 TokenType.PIPE: exp.BitwiseOr, 442 } 443 444 TERM = { 445 TokenType.DASH: exp.Sub, 446 TokenType.PLUS: exp.Add, 447 TokenType.MOD: exp.Mod, 448 TokenType.COLLATE: exp.Collate, 449 } 450 451 FACTOR = { 452 TokenType.DIV: exp.IntDiv, 453 TokenType.LR_ARROW: exp.Distance, 454 TokenType.SLASH: exp.Div, 455 TokenType.STAR: exp.Mul, 456 } 457 458 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 459 460 TIMES = { 461 TokenType.TIME, 462 TokenType.TIMETZ, 463 } 464 465 TIMESTAMPS = { 466 TokenType.TIMESTAMP, 467 TokenType.TIMESTAMPTZ, 468 TokenType.TIMESTAMPLTZ, 469 *TIMES, 470 } 471 472 SET_OPERATIONS = { 473 TokenType.UNION, 474 TokenType.INTERSECT, 475 TokenType.EXCEPT, 476 } 477 478 JOIN_METHODS = { 479 TokenType.NATURAL, 480 TokenType.ASOF, 481 } 482 483 JOIN_SIDES = { 484 TokenType.LEFT, 485 TokenType.RIGHT, 486 TokenType.FULL, 487 } 488 489 JOIN_KINDS = { 490 TokenType.INNER, 491 TokenType.OUTER, 492 TokenType.CROSS, 493 TokenType.SEMI, 494 TokenType.ANTI, 495 } 496 497 JOIN_HINTS: t.Set[str] = set() 498 499 LAMBDAS = { 500 TokenType.ARROW: lambda self, expressions: self.expression( 501 exp.Lambda, 502 this=self._replace_lambda( 503 self._parse_conjunction(), 504 {node.name for node in expressions}, 505 ), 506 expressions=expressions, 507 ), 508 TokenType.FARROW: lambda self, expressions: self.expression( 509 exp.Kwarg, 510 this=exp.var(expressions[0].name), 511 expression=self._parse_conjunction(), 512 ), 513 } 514 515 COLUMN_OPERATORS = { 516 TokenType.DOT: None, 517 TokenType.DCOLON: lambda self, this, to: self.expression( 518 exp.Cast if self.STRICT_CAST else exp.TryCast, 519 this=this, 520 to=to, 521 ), 522 TokenType.ARROW: lambda self, this, path: self.expression( 523 exp.JSONExtract, 524 this=this, 525 expression=path, 526 ), 527 TokenType.DARROW: lambda self, this, path: self.expression( 528 exp.JSONExtractScalar, 529 this=this, 530 expression=path, 531 ), 532 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 533 exp.JSONBExtract, 534 this=this, 535 expression=path, 536 ), 537 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 538 exp.JSONBExtractScalar, 539 this=this, 540 expression=path, 541 ), 542 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 543 exp.JSONBContains, 544 this=this, 545 expression=key, 546 ), 547 } 548 549 EXPRESSION_PARSERS = { 550 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 551 exp.Column: lambda self: self._parse_column(), 552 exp.Condition: lambda self: self._parse_conjunction(), 553 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 554 exp.Expression: lambda self: self._parse_statement(), 555 exp.From: lambda self: self._parse_from(), 556 exp.Group: lambda self: self._parse_group(), 557 exp.Having: lambda self: self._parse_having(), 558 exp.Identifier: lambda self: self._parse_id_var(), 559 exp.Join: lambda self: self._parse_join(), 560 exp.Lambda: lambda self: self._parse_lambda(), 561 exp.Lateral: lambda self: self._parse_lateral(), 562 exp.Limit: lambda self: self._parse_limit(), 563 exp.Offset: lambda self: self._parse_offset(), 564 exp.Order: lambda self: self._parse_order(), 565 exp.Ordered: lambda self: self._parse_ordered(), 566 exp.Properties: lambda self: self._parse_properties(), 567 exp.Qualify: lambda self: self._parse_qualify(), 568 exp.Returning: lambda self: self._parse_returning(), 569 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 570 exp.Table: lambda self: self._parse_table_parts(), 571 exp.TableAlias: lambda self: self._parse_table_alias(), 572 exp.Where: lambda self: self._parse_where(), 573 exp.Window: lambda self: self._parse_named_window(), 574 exp.With: lambda self: self._parse_with(), 575 "JOIN_TYPE": lambda self: self._parse_join_parts(), 576 } 577 578 STATEMENT_PARSERS = { 579 TokenType.ALTER: lambda self: self._parse_alter(), 580 TokenType.BEGIN: lambda self: self._parse_transaction(), 581 TokenType.CACHE: lambda self: self._parse_cache(), 582 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 583 TokenType.COMMENT: lambda self: self._parse_comment(), 584 TokenType.CREATE: lambda self: self._parse_create(), 585 TokenType.DELETE: lambda self: self._parse_delete(), 586 TokenType.DESC: lambda self: self._parse_describe(), 587 TokenType.DESCRIBE: lambda self: self._parse_describe(), 588 TokenType.DROP: lambda self: self._parse_drop(), 589 TokenType.INSERT: lambda self: self._parse_insert(), 590 TokenType.KILL: lambda self: self._parse_kill(), 591 TokenType.LOAD: lambda self: self._parse_load(), 592 TokenType.MERGE: lambda self: self._parse_merge(), 593 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 594 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 595 TokenType.REFRESH: lambda self: self._parse_refresh(), 596 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 597 TokenType.SET: lambda self: self._parse_set(), 598 TokenType.UNCACHE: lambda self: self._parse_uncache(), 599 TokenType.UPDATE: lambda self: self._parse_update(), 600 TokenType.USE: lambda self: self.expression( 601 exp.Use, 602 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 603 and exp.var(self._prev.text), 604 this=self._parse_table(schema=False), 605 ), 606 } 607 608 UNARY_PARSERS = { 609 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 610 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 611 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 612 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 613 } 614 615 PRIMARY_PARSERS = { 616 TokenType.STRING: lambda self, token: self.expression( 617 exp.Literal, this=token.text, is_string=True 618 ), 619 TokenType.NUMBER: lambda self, token: self.expression( 620 exp.Literal, this=token.text, is_string=False 621 ), 622 TokenType.STAR: lambda self, _: self.expression( 623 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 624 ), 625 TokenType.NULL: lambda self, _: self.expression(exp.Null), 626 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 627 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 628 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 629 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 630 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 631 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 632 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 633 exp.National, this=token.text 634 ), 635 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 636 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 637 exp.RawString, this=token.text 638 ), 639 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 640 } 641 642 PLACEHOLDER_PARSERS = { 643 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 644 TokenType.PARAMETER: lambda self: self._parse_parameter(), 645 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 646 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 647 else None, 648 } 649 650 RANGE_PARSERS = { 651 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 652 TokenType.GLOB: binary_range_parser(exp.Glob), 653 TokenType.ILIKE: binary_range_parser(exp.ILike), 654 TokenType.IN: lambda self, this: self._parse_in(this), 655 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 656 TokenType.IS: lambda self, this: self._parse_is(this), 657 TokenType.LIKE: binary_range_parser(exp.Like), 658 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 659 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 660 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 661 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 662 } 663 664 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 665 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 666 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 667 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 668 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 669 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 670 "CHECKSUM": lambda self: self._parse_checksum(), 671 "CLUSTER BY": lambda self: self._parse_cluster(), 672 "CLUSTERED": lambda self: self._parse_clustered_by(), 673 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 674 exp.CollateProperty, **kwargs 675 ), 676 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 677 "COPY": lambda self: self._parse_copy_property(), 678 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 679 "DEFINER": lambda self: self._parse_definer(), 680 "DETERMINISTIC": lambda self: self.expression( 681 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 682 ), 683 "DISTKEY": lambda self: self._parse_distkey(), 684 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 685 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 686 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 687 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 688 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 689 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 690 "FREESPACE": lambda self: self._parse_freespace(), 691 "HEAP": lambda self: self.expression(exp.HeapProperty), 692 "IMMUTABLE": lambda self: self.expression( 693 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 694 ), 695 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 696 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 697 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 698 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 699 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 700 "LIKE": lambda self: self._parse_create_like(), 701 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 702 "LOCK": lambda self: self._parse_locking(), 703 "LOCKING": lambda self: self._parse_locking(), 704 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 705 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 706 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 707 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 708 "NO": lambda self: self._parse_no_property(), 709 "ON": lambda self: self._parse_on_property(), 710 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 711 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 712 "PARTITION": lambda self: self._parse_partitioned_of(), 713 "PARTITION BY": lambda self: self._parse_partitioned_by(), 714 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 715 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 716 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 717 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 718 "REMOTE": lambda self: self._parse_remote_with_connection(), 719 "RETURNS": lambda self: self._parse_returns(), 720 "ROW": lambda self: self._parse_row(), 721 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 722 "SAMPLE": lambda self: self.expression( 723 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 724 ), 725 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 726 "SETTINGS": lambda self: self.expression( 727 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 728 ), 729 "SORTKEY": lambda self: self._parse_sortkey(), 730 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 731 "STABLE": lambda self: self.expression( 732 exp.StabilityProperty, this=exp.Literal.string("STABLE") 733 ), 734 "STORED": lambda self: self._parse_stored(), 735 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 736 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 737 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 738 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 739 "TO": lambda self: self._parse_to_table(), 740 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 741 "TRANSFORM": lambda self: self.expression( 742 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 743 ), 744 "TTL": lambda self: self._parse_ttl(), 745 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 746 "VOLATILE": lambda self: self._parse_volatile_property(), 747 "WITH": lambda self: self._parse_with_property(), 748 } 749 750 CONSTRAINT_PARSERS = { 751 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 752 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 753 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 754 "CHARACTER SET": lambda self: self.expression( 755 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 756 ), 757 "CHECK": lambda self: self.expression( 758 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 759 ), 760 "COLLATE": lambda self: self.expression( 761 exp.CollateColumnConstraint, this=self._parse_var() 762 ), 763 "COMMENT": lambda self: self.expression( 764 exp.CommentColumnConstraint, this=self._parse_string() 765 ), 766 "COMPRESS": lambda self: self._parse_compress(), 767 "CLUSTERED": lambda self: self.expression( 768 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 769 ), 770 "NONCLUSTERED": lambda self: self.expression( 771 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 772 ), 773 "DEFAULT": lambda self: self.expression( 774 exp.DefaultColumnConstraint, this=self._parse_bitwise() 775 ), 776 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 777 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 778 "FORMAT": lambda self: self.expression( 779 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 780 ), 781 "GENERATED": lambda self: self._parse_generated_as_identity(), 782 "IDENTITY": lambda self: self._parse_auto_increment(), 783 "INLINE": lambda self: self._parse_inline(), 784 "LIKE": lambda self: self._parse_create_like(), 785 "NOT": lambda self: self._parse_not_constraint(), 786 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 787 "ON": lambda self: ( 788 self._match(TokenType.UPDATE) 789 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 790 ) 791 or self.expression(exp.OnProperty, this=self._parse_id_var()), 792 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 793 "PERIOD": lambda self: self._parse_period_for_system_time(), 794 "PRIMARY KEY": lambda self: self._parse_primary_key(), 795 "REFERENCES": lambda self: self._parse_references(match=False), 796 "TITLE": lambda self: self.expression( 797 exp.TitleColumnConstraint, this=self._parse_var_or_string() 798 ), 799 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 800 "UNIQUE": lambda self: self._parse_unique(), 801 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 802 "WITH": lambda self: self.expression( 803 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 804 ), 805 } 806 807 ALTER_PARSERS = { 808 "ADD": lambda self: self._parse_alter_table_add(), 809 "ALTER": lambda self: self._parse_alter_table_alter(), 810 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 811 "DROP": lambda self: self._parse_alter_table_drop(), 812 "RENAME": lambda self: self._parse_alter_table_rename(), 813 } 814 815 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 816 817 NO_PAREN_FUNCTION_PARSERS = { 818 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 819 "CASE": lambda self: self._parse_case(), 820 "IF": lambda self: self._parse_if(), 821 "NEXT": lambda self: self._parse_next_value_for(), 822 } 823 824 INVALID_FUNC_NAME_TOKENS = { 825 TokenType.IDENTIFIER, 826 TokenType.STRING, 827 } 828 829 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 830 831 FUNCTION_PARSERS = { 832 "ANY_VALUE": lambda self: self._parse_any_value(), 833 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 834 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 835 "DECODE": lambda self: self._parse_decode(), 836 "EXTRACT": lambda self: self._parse_extract(), 837 "JSON_OBJECT": lambda self: self._parse_json_object(), 838 "JSON_TABLE": lambda self: self._parse_json_table(), 839 "MATCH": lambda self: self._parse_match_against(), 840 "OPENJSON": lambda self: self._parse_open_json(), 841 "POSITION": lambda self: self._parse_position(), 842 "PREDICT": lambda self: self._parse_predict(), 843 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 844 "STRING_AGG": lambda self: self._parse_string_agg(), 845 "SUBSTRING": lambda self: self._parse_substring(), 846 "TRIM": lambda self: self._parse_trim(), 847 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 848 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 849 } 850 851 QUERY_MODIFIER_PARSERS = { 852 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 853 TokenType.WHERE: lambda self: ("where", self._parse_where()), 854 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 855 TokenType.HAVING: lambda self: ("having", self._parse_having()), 856 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 857 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 858 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 859 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 860 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 861 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 862 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 863 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 864 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 865 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 866 TokenType.CLUSTER_BY: lambda self: ( 867 "cluster", 868 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 869 ), 870 TokenType.DISTRIBUTE_BY: lambda self: ( 871 "distribute", 872 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 873 ), 874 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 875 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 876 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 877 } 878 879 SET_PARSERS = { 880 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 881 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 882 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 883 "TRANSACTION": lambda self: self._parse_set_transaction(), 884 } 885 886 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 887 888 TYPE_LITERAL_PARSERS = { 889 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 890 } 891 892 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 893 894 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 895 896 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 897 898 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 899 TRANSACTION_CHARACTERISTICS = { 900 "ISOLATION LEVEL REPEATABLE READ", 901 "ISOLATION LEVEL READ COMMITTED", 902 "ISOLATION LEVEL READ UNCOMMITTED", 903 "ISOLATION LEVEL SERIALIZABLE", 904 "READ WRITE", 905 "READ ONLY", 906 } 907 908 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 909 910 CLONE_KEYWORDS = {"CLONE", "COPY"} 911 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 912 913 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 914 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 915 916 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 917 918 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 919 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 920 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 921 922 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 923 924 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 925 926 DISTINCT_TOKENS = {TokenType.DISTINCT} 927 928 NULL_TOKENS = {TokenType.NULL} 929 930 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 931 932 STRICT_CAST = True 933 934 PREFIXED_PIVOT_COLUMNS = False 935 IDENTIFY_PIVOT_STRINGS = False 936 937 LOG_DEFAULTS_TO_LN = False 938 939 # Whether or not ADD is present for each column added by ALTER TABLE 940 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 941 942 # Whether or not the table sample clause expects CSV syntax 943 TABLESAMPLE_CSV = False 944 945 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 946 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 947 948 # Whether the TRIM function expects the characters to trim as its first argument 949 TRIM_PATTERN_FIRST = False 950 951 __slots__ = ( 952 "error_level", 953 "error_message_context", 954 "max_errors", 955 "dialect", 956 "sql", 957 "errors", 958 "_tokens", 959 "_index", 960 "_curr", 961 "_next", 962 "_prev", 963 "_prev_comments", 964 ) 965 966 # Autofilled 967 SHOW_TRIE: t.Dict = {} 968 SET_TRIE: t.Dict = {} 969 970 def __init__( 971 self, 972 error_level: t.Optional[ErrorLevel] = None, 973 error_message_context: int = 100, 974 max_errors: int = 3, 975 dialect: DialectType = None, 976 ): 977 from sqlglot.dialects import Dialect 978 979 self.error_level = error_level or ErrorLevel.IMMEDIATE 980 self.error_message_context = error_message_context 981 self.max_errors = max_errors 982 self.dialect = Dialect.get_or_raise(dialect) 983 self.reset() 984 985 def reset(self): 986 self.sql = "" 987 self.errors = [] 988 self._tokens = [] 989 self._index = 0 990 self._curr = None 991 self._next = None 992 self._prev = None 993 self._prev_comments = None 994 995 def parse( 996 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 997 ) -> t.List[t.Optional[exp.Expression]]: 998 """ 999 Parses a list of tokens and returns a list of syntax trees, one tree 1000 per parsed SQL statement. 1001 1002 Args: 1003 raw_tokens: The list of tokens. 1004 sql: The original SQL string, used to produce helpful debug messages. 1005 1006 Returns: 1007 The list of the produced syntax trees. 1008 """ 1009 return self._parse( 1010 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1011 ) 1012 1013 def parse_into( 1014 self, 1015 expression_types: exp.IntoType, 1016 raw_tokens: t.List[Token], 1017 sql: t.Optional[str] = None, 1018 ) -> t.List[t.Optional[exp.Expression]]: 1019 """ 1020 Parses a list of tokens into a given Expression type. If a collection of Expression 1021 types is given instead, this method will try to parse the token list into each one 1022 of them, stopping at the first for which the parsing succeeds. 1023 1024 Args: 1025 expression_types: The expression type(s) to try and parse the token list into. 1026 raw_tokens: The list of tokens. 1027 sql: The original SQL string, used to produce helpful debug messages. 1028 1029 Returns: 1030 The target Expression. 1031 """ 1032 errors = [] 1033 for expression_type in ensure_list(expression_types): 1034 parser = self.EXPRESSION_PARSERS.get(expression_type) 1035 if not parser: 1036 raise TypeError(f"No parser registered for {expression_type}") 1037 1038 try: 1039 return self._parse(parser, raw_tokens, sql) 1040 except ParseError as e: 1041 e.errors[0]["into_expression"] = expression_type 1042 errors.append(e) 1043 1044 raise ParseError( 1045 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1046 errors=merge_errors(errors), 1047 ) from errors[-1] 1048 1049 def _parse( 1050 self, 1051 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1052 raw_tokens: t.List[Token], 1053 sql: t.Optional[str] = None, 1054 ) -> t.List[t.Optional[exp.Expression]]: 1055 self.reset() 1056 self.sql = sql or "" 1057 1058 total = len(raw_tokens) 1059 chunks: t.List[t.List[Token]] = [[]] 1060 1061 for i, token in enumerate(raw_tokens): 1062 if token.token_type == TokenType.SEMICOLON: 1063 if i < total - 1: 1064 chunks.append([]) 1065 else: 1066 chunks[-1].append(token) 1067 1068 expressions = [] 1069 1070 for tokens in chunks: 1071 self._index = -1 1072 self._tokens = tokens 1073 self._advance() 1074 1075 expressions.append(parse_method(self)) 1076 1077 if self._index < len(self._tokens): 1078 self.raise_error("Invalid expression / Unexpected token") 1079 1080 self.check_errors() 1081 1082 return expressions 1083 1084 def check_errors(self) -> None: 1085 """Logs or raises any found errors, depending on the chosen error level setting.""" 1086 if self.error_level == ErrorLevel.WARN: 1087 for error in self.errors: 1088 logger.error(str(error)) 1089 elif self.error_level == ErrorLevel.RAISE and self.errors: 1090 raise ParseError( 1091 concat_messages(self.errors, self.max_errors), 1092 errors=merge_errors(self.errors), 1093 ) 1094 1095 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1096 """ 1097 Appends an error in the list of recorded errors or raises it, depending on the chosen 1098 error level setting. 1099 """ 1100 token = token or self._curr or self._prev or Token.string("") 1101 start = token.start 1102 end = token.end + 1 1103 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1104 highlight = self.sql[start:end] 1105 end_context = self.sql[end : end + self.error_message_context] 1106 1107 error = ParseError.new( 1108 f"{message}. Line {token.line}, Col: {token.col}.\n" 1109 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1110 description=message, 1111 line=token.line, 1112 col=token.col, 1113 start_context=start_context, 1114 highlight=highlight, 1115 end_context=end_context, 1116 ) 1117 1118 if self.error_level == ErrorLevel.IMMEDIATE: 1119 raise error 1120 1121 self.errors.append(error) 1122 1123 def expression( 1124 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1125 ) -> E: 1126 """ 1127 Creates a new, validated Expression. 1128 1129 Args: 1130 exp_class: The expression class to instantiate. 1131 comments: An optional list of comments to attach to the expression. 1132 kwargs: The arguments to set for the expression along with their respective values. 1133 1134 Returns: 1135 The target expression. 1136 """ 1137 instance = exp_class(**kwargs) 1138 instance.add_comments(comments) if comments else self._add_comments(instance) 1139 return self.validate_expression(instance) 1140 1141 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1142 if expression and self._prev_comments: 1143 expression.add_comments(self._prev_comments) 1144 self._prev_comments = None 1145 1146 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1147 """ 1148 Validates an Expression, making sure that all its mandatory arguments are set. 1149 1150 Args: 1151 expression: The expression to validate. 1152 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1153 1154 Returns: 1155 The validated expression. 1156 """ 1157 if self.error_level != ErrorLevel.IGNORE: 1158 for error_message in expression.error_messages(args): 1159 self.raise_error(error_message) 1160 1161 return expression 1162 1163 def _find_sql(self, start: Token, end: Token) -> str: 1164 return self.sql[start.start : end.end + 1] 1165 1166 def _advance(self, times: int = 1) -> None: 1167 self._index += times 1168 self._curr = seq_get(self._tokens, self._index) 1169 self._next = seq_get(self._tokens, self._index + 1) 1170 1171 if self._index > 0: 1172 self._prev = self._tokens[self._index - 1] 1173 self._prev_comments = self._prev.comments 1174 else: 1175 self._prev = None 1176 self._prev_comments = None 1177 1178 def _retreat(self, index: int) -> None: 1179 if index != self._index: 1180 self._advance(index - self._index) 1181 1182 def _parse_command(self) -> exp.Command: 1183 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1184 1185 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1186 start = self._prev 1187 exists = self._parse_exists() if allow_exists else None 1188 1189 self._match(TokenType.ON) 1190 1191 kind = self._match_set(self.CREATABLES) and self._prev 1192 if not kind: 1193 return self._parse_as_command(start) 1194 1195 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1196 this = self._parse_user_defined_function(kind=kind.token_type) 1197 elif kind.token_type == TokenType.TABLE: 1198 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1199 elif kind.token_type == TokenType.COLUMN: 1200 this = self._parse_column() 1201 else: 1202 this = self._parse_id_var() 1203 1204 self._match(TokenType.IS) 1205 1206 return self.expression( 1207 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1208 ) 1209 1210 def _parse_to_table( 1211 self, 1212 ) -> exp.ToTableProperty: 1213 table = self._parse_table_parts(schema=True) 1214 return self.expression(exp.ToTableProperty, this=table) 1215 1216 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1217 def _parse_ttl(self) -> exp.Expression: 1218 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1219 this = self._parse_bitwise() 1220 1221 if self._match_text_seq("DELETE"): 1222 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1223 if self._match_text_seq("RECOMPRESS"): 1224 return self.expression( 1225 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1226 ) 1227 if self._match_text_seq("TO", "DISK"): 1228 return self.expression( 1229 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1230 ) 1231 if self._match_text_seq("TO", "VOLUME"): 1232 return self.expression( 1233 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1234 ) 1235 1236 return this 1237 1238 expressions = self._parse_csv(_parse_ttl_action) 1239 where = self._parse_where() 1240 group = self._parse_group() 1241 1242 aggregates = None 1243 if group and self._match(TokenType.SET): 1244 aggregates = self._parse_csv(self._parse_set_item) 1245 1246 return self.expression( 1247 exp.MergeTreeTTL, 1248 expressions=expressions, 1249 where=where, 1250 group=group, 1251 aggregates=aggregates, 1252 ) 1253 1254 def _parse_statement(self) -> t.Optional[exp.Expression]: 1255 if self._curr is None: 1256 return None 1257 1258 if self._match_set(self.STATEMENT_PARSERS): 1259 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1260 1261 if self._match_set(Tokenizer.COMMANDS): 1262 return self._parse_command() 1263 1264 expression = self._parse_expression() 1265 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1266 return self._parse_query_modifiers(expression) 1267 1268 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1269 start = self._prev 1270 temporary = self._match(TokenType.TEMPORARY) 1271 materialized = self._match_text_seq("MATERIALIZED") 1272 1273 kind = self._match_set(self.CREATABLES) and self._prev.text 1274 if not kind: 1275 return self._parse_as_command(start) 1276 1277 return self.expression( 1278 exp.Drop, 1279 comments=start.comments, 1280 exists=exists or self._parse_exists(), 1281 this=self._parse_table(schema=True), 1282 kind=kind, 1283 temporary=temporary, 1284 materialized=materialized, 1285 cascade=self._match_text_seq("CASCADE"), 1286 constraints=self._match_text_seq("CONSTRAINTS"), 1287 purge=self._match_text_seq("PURGE"), 1288 ) 1289 1290 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1291 return ( 1292 self._match_text_seq("IF") 1293 and (not not_ or self._match(TokenType.NOT)) 1294 and self._match(TokenType.EXISTS) 1295 ) 1296 1297 def _parse_create(self) -> exp.Create | exp.Command: 1298 # Note: this can't be None because we've matched a statement parser 1299 start = self._prev 1300 comments = self._prev_comments 1301 1302 replace = start.text.upper() == "REPLACE" or self._match_pair( 1303 TokenType.OR, TokenType.REPLACE 1304 ) 1305 unique = self._match(TokenType.UNIQUE) 1306 1307 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1308 self._advance() 1309 1310 properties = None 1311 create_token = self._match_set(self.CREATABLES) and self._prev 1312 1313 if not create_token: 1314 # exp.Properties.Location.POST_CREATE 1315 properties = self._parse_properties() 1316 create_token = self._match_set(self.CREATABLES) and self._prev 1317 1318 if not properties or not create_token: 1319 return self._parse_as_command(start) 1320 1321 exists = self._parse_exists(not_=True) 1322 this = None 1323 expression: t.Optional[exp.Expression] = None 1324 indexes = None 1325 no_schema_binding = None 1326 begin = None 1327 end = None 1328 clone = None 1329 1330 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1331 nonlocal properties 1332 if properties and temp_props: 1333 properties.expressions.extend(temp_props.expressions) 1334 elif temp_props: 1335 properties = temp_props 1336 1337 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1338 this = self._parse_user_defined_function(kind=create_token.token_type) 1339 1340 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1341 extend_props(self._parse_properties()) 1342 1343 self._match(TokenType.ALIAS) 1344 1345 if self._match(TokenType.COMMAND): 1346 expression = self._parse_as_command(self._prev) 1347 else: 1348 begin = self._match(TokenType.BEGIN) 1349 return_ = self._match_text_seq("RETURN") 1350 1351 if self._match(TokenType.STRING, advance=False): 1352 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1353 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1354 expression = self._parse_string() 1355 extend_props(self._parse_properties()) 1356 else: 1357 expression = self._parse_statement() 1358 1359 end = self._match_text_seq("END") 1360 1361 if return_: 1362 expression = self.expression(exp.Return, this=expression) 1363 elif create_token.token_type == TokenType.INDEX: 1364 this = self._parse_index(index=self._parse_id_var()) 1365 elif create_token.token_type in self.DB_CREATABLES: 1366 table_parts = self._parse_table_parts(schema=True) 1367 1368 # exp.Properties.Location.POST_NAME 1369 self._match(TokenType.COMMA) 1370 extend_props(self._parse_properties(before=True)) 1371 1372 this = self._parse_schema(this=table_parts) 1373 1374 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1375 extend_props(self._parse_properties()) 1376 1377 self._match(TokenType.ALIAS) 1378 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1379 # exp.Properties.Location.POST_ALIAS 1380 extend_props(self._parse_properties()) 1381 1382 expression = self._parse_ddl_select() 1383 1384 if create_token.token_type == TokenType.TABLE: 1385 # exp.Properties.Location.POST_EXPRESSION 1386 extend_props(self._parse_properties()) 1387 1388 indexes = [] 1389 while True: 1390 index = self._parse_index() 1391 1392 # exp.Properties.Location.POST_INDEX 1393 extend_props(self._parse_properties()) 1394 1395 if not index: 1396 break 1397 else: 1398 self._match(TokenType.COMMA) 1399 indexes.append(index) 1400 elif create_token.token_type == TokenType.VIEW: 1401 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1402 no_schema_binding = True 1403 1404 shallow = self._match_text_seq("SHALLOW") 1405 1406 if self._match_texts(self.CLONE_KEYWORDS): 1407 copy = self._prev.text.lower() == "copy" 1408 clone = self._parse_table(schema=True) 1409 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1410 clone_kind = ( 1411 self._match(TokenType.L_PAREN) 1412 and self._match_texts(self.CLONE_KINDS) 1413 and self._prev.text.upper() 1414 ) 1415 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1416 self._match(TokenType.R_PAREN) 1417 clone = self.expression( 1418 exp.Clone, 1419 this=clone, 1420 when=when, 1421 kind=clone_kind, 1422 shallow=shallow, 1423 expression=clone_expression, 1424 copy=copy, 1425 ) 1426 1427 return self.expression( 1428 exp.Create, 1429 comments=comments, 1430 this=this, 1431 kind=create_token.text, 1432 replace=replace, 1433 unique=unique, 1434 expression=expression, 1435 exists=exists, 1436 properties=properties, 1437 indexes=indexes, 1438 no_schema_binding=no_schema_binding, 1439 begin=begin, 1440 end=end, 1441 clone=clone, 1442 ) 1443 1444 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1445 # only used for teradata currently 1446 self._match(TokenType.COMMA) 1447 1448 kwargs = { 1449 "no": self._match_text_seq("NO"), 1450 "dual": self._match_text_seq("DUAL"), 1451 "before": self._match_text_seq("BEFORE"), 1452 "default": self._match_text_seq("DEFAULT"), 1453 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1454 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1455 "after": self._match_text_seq("AFTER"), 1456 "minimum": self._match_texts(("MIN", "MINIMUM")), 1457 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1458 } 1459 1460 if self._match_texts(self.PROPERTY_PARSERS): 1461 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1462 try: 1463 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1464 except TypeError: 1465 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1466 1467 return None 1468 1469 def _parse_property(self) -> t.Optional[exp.Expression]: 1470 if self._match_texts(self.PROPERTY_PARSERS): 1471 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1472 1473 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1474 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1475 1476 if self._match_text_seq("COMPOUND", "SORTKEY"): 1477 return self._parse_sortkey(compound=True) 1478 1479 if self._match_text_seq("SQL", "SECURITY"): 1480 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1481 1482 index = self._index 1483 key = self._parse_column() 1484 1485 if not self._match(TokenType.EQ): 1486 self._retreat(index) 1487 return None 1488 1489 return self.expression( 1490 exp.Property, 1491 this=key.to_dot() if isinstance(key, exp.Column) else key, 1492 value=self._parse_column() or self._parse_var(any_token=True), 1493 ) 1494 1495 def _parse_stored(self) -> exp.FileFormatProperty: 1496 self._match(TokenType.ALIAS) 1497 1498 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1499 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1500 1501 return self.expression( 1502 exp.FileFormatProperty, 1503 this=self.expression( 1504 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1505 ) 1506 if input_format or output_format 1507 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1508 ) 1509 1510 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1511 self._match(TokenType.EQ) 1512 self._match(TokenType.ALIAS) 1513 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1514 1515 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1516 properties = [] 1517 while True: 1518 if before: 1519 prop = self._parse_property_before() 1520 else: 1521 prop = self._parse_property() 1522 1523 if not prop: 1524 break 1525 for p in ensure_list(prop): 1526 properties.append(p) 1527 1528 if properties: 1529 return self.expression(exp.Properties, expressions=properties) 1530 1531 return None 1532 1533 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1534 return self.expression( 1535 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1536 ) 1537 1538 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1539 if self._index >= 2: 1540 pre_volatile_token = self._tokens[self._index - 2] 1541 else: 1542 pre_volatile_token = None 1543 1544 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1545 return exp.VolatileProperty() 1546 1547 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1548 1549 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1550 self._match_pair(TokenType.EQ, TokenType.ON) 1551 1552 prop = self.expression(exp.WithSystemVersioningProperty) 1553 if self._match(TokenType.L_PAREN): 1554 self._match_text_seq("HISTORY_TABLE", "=") 1555 prop.set("this", self._parse_table_parts()) 1556 1557 if self._match(TokenType.COMMA): 1558 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1559 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1560 1561 self._match_r_paren() 1562 1563 return prop 1564 1565 def _parse_with_property( 1566 self, 1567 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1568 if self._match(TokenType.L_PAREN, advance=False): 1569 return self._parse_wrapped_csv(self._parse_property) 1570 1571 if self._match_text_seq("JOURNAL"): 1572 return self._parse_withjournaltable() 1573 1574 if self._match_text_seq("DATA"): 1575 return self._parse_withdata(no=False) 1576 elif self._match_text_seq("NO", "DATA"): 1577 return self._parse_withdata(no=True) 1578 1579 if not self._next: 1580 return None 1581 1582 return self._parse_withisolatedloading() 1583 1584 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1585 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1586 self._match(TokenType.EQ) 1587 1588 user = self._parse_id_var() 1589 self._match(TokenType.PARAMETER) 1590 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1591 1592 if not user or not host: 1593 return None 1594 1595 return exp.DefinerProperty(this=f"{user}@{host}") 1596 1597 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1598 self._match(TokenType.TABLE) 1599 self._match(TokenType.EQ) 1600 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1601 1602 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1603 return self.expression(exp.LogProperty, no=no) 1604 1605 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1606 return self.expression(exp.JournalProperty, **kwargs) 1607 1608 def _parse_checksum(self) -> exp.ChecksumProperty: 1609 self._match(TokenType.EQ) 1610 1611 on = None 1612 if self._match(TokenType.ON): 1613 on = True 1614 elif self._match_text_seq("OFF"): 1615 on = False 1616 1617 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1618 1619 def _parse_cluster(self) -> exp.Cluster: 1620 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1621 1622 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1623 self._match_text_seq("BY") 1624 1625 self._match_l_paren() 1626 expressions = self._parse_csv(self._parse_column) 1627 self._match_r_paren() 1628 1629 if self._match_text_seq("SORTED", "BY"): 1630 self._match_l_paren() 1631 sorted_by = self._parse_csv(self._parse_ordered) 1632 self._match_r_paren() 1633 else: 1634 sorted_by = None 1635 1636 self._match(TokenType.INTO) 1637 buckets = self._parse_number() 1638 self._match_text_seq("BUCKETS") 1639 1640 return self.expression( 1641 exp.ClusteredByProperty, 1642 expressions=expressions, 1643 sorted_by=sorted_by, 1644 buckets=buckets, 1645 ) 1646 1647 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1648 if not self._match_text_seq("GRANTS"): 1649 self._retreat(self._index - 1) 1650 return None 1651 1652 return self.expression(exp.CopyGrantsProperty) 1653 1654 def _parse_freespace(self) -> exp.FreespaceProperty: 1655 self._match(TokenType.EQ) 1656 return self.expression( 1657 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1658 ) 1659 1660 def _parse_mergeblockratio( 1661 self, no: bool = False, default: bool = False 1662 ) -> exp.MergeBlockRatioProperty: 1663 if self._match(TokenType.EQ): 1664 return self.expression( 1665 exp.MergeBlockRatioProperty, 1666 this=self._parse_number(), 1667 percent=self._match(TokenType.PERCENT), 1668 ) 1669 1670 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1671 1672 def _parse_datablocksize( 1673 self, 1674 default: t.Optional[bool] = None, 1675 minimum: t.Optional[bool] = None, 1676 maximum: t.Optional[bool] = None, 1677 ) -> exp.DataBlocksizeProperty: 1678 self._match(TokenType.EQ) 1679 size = self._parse_number() 1680 1681 units = None 1682 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1683 units = self._prev.text 1684 1685 return self.expression( 1686 exp.DataBlocksizeProperty, 1687 size=size, 1688 units=units, 1689 default=default, 1690 minimum=minimum, 1691 maximum=maximum, 1692 ) 1693 1694 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1695 self._match(TokenType.EQ) 1696 always = self._match_text_seq("ALWAYS") 1697 manual = self._match_text_seq("MANUAL") 1698 never = self._match_text_seq("NEVER") 1699 default = self._match_text_seq("DEFAULT") 1700 1701 autotemp = None 1702 if self._match_text_seq("AUTOTEMP"): 1703 autotemp = self._parse_schema() 1704 1705 return self.expression( 1706 exp.BlockCompressionProperty, 1707 always=always, 1708 manual=manual, 1709 never=never, 1710 default=default, 1711 autotemp=autotemp, 1712 ) 1713 1714 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1715 no = self._match_text_seq("NO") 1716 concurrent = self._match_text_seq("CONCURRENT") 1717 self._match_text_seq("ISOLATED", "LOADING") 1718 for_all = self._match_text_seq("FOR", "ALL") 1719 for_insert = self._match_text_seq("FOR", "INSERT") 1720 for_none = self._match_text_seq("FOR", "NONE") 1721 return self.expression( 1722 exp.IsolatedLoadingProperty, 1723 no=no, 1724 concurrent=concurrent, 1725 for_all=for_all, 1726 for_insert=for_insert, 1727 for_none=for_none, 1728 ) 1729 1730 def _parse_locking(self) -> exp.LockingProperty: 1731 if self._match(TokenType.TABLE): 1732 kind = "TABLE" 1733 elif self._match(TokenType.VIEW): 1734 kind = "VIEW" 1735 elif self._match(TokenType.ROW): 1736 kind = "ROW" 1737 elif self._match_text_seq("DATABASE"): 1738 kind = "DATABASE" 1739 else: 1740 kind = None 1741 1742 if kind in ("DATABASE", "TABLE", "VIEW"): 1743 this = self._parse_table_parts() 1744 else: 1745 this = None 1746 1747 if self._match(TokenType.FOR): 1748 for_or_in = "FOR" 1749 elif self._match(TokenType.IN): 1750 for_or_in = "IN" 1751 else: 1752 for_or_in = None 1753 1754 if self._match_text_seq("ACCESS"): 1755 lock_type = "ACCESS" 1756 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1757 lock_type = "EXCLUSIVE" 1758 elif self._match_text_seq("SHARE"): 1759 lock_type = "SHARE" 1760 elif self._match_text_seq("READ"): 1761 lock_type = "READ" 1762 elif self._match_text_seq("WRITE"): 1763 lock_type = "WRITE" 1764 elif self._match_text_seq("CHECKSUM"): 1765 lock_type = "CHECKSUM" 1766 else: 1767 lock_type = None 1768 1769 override = self._match_text_seq("OVERRIDE") 1770 1771 return self.expression( 1772 exp.LockingProperty, 1773 this=this, 1774 kind=kind, 1775 for_or_in=for_or_in, 1776 lock_type=lock_type, 1777 override=override, 1778 ) 1779 1780 def _parse_partition_by(self) -> t.List[exp.Expression]: 1781 if self._match(TokenType.PARTITION_BY): 1782 return self._parse_csv(self._parse_conjunction) 1783 return [] 1784 1785 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1786 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1787 if self._match_text_seq("MINVALUE"): 1788 return exp.var("MINVALUE") 1789 if self._match_text_seq("MAXVALUE"): 1790 return exp.var("MAXVALUE") 1791 return self._parse_bitwise() 1792 1793 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1794 expression = None 1795 from_expressions = None 1796 to_expressions = None 1797 1798 if self._match(TokenType.IN): 1799 this = self._parse_wrapped_csv(self._parse_bitwise) 1800 elif self._match(TokenType.FROM): 1801 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1802 self._match_text_seq("TO") 1803 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1804 elif self._match_text_seq("WITH", "(", "MODULUS"): 1805 this = self._parse_number() 1806 self._match_text_seq(",", "REMAINDER") 1807 expression = self._parse_number() 1808 self._match_r_paren() 1809 else: 1810 self.raise_error("Failed to parse partition bound spec.") 1811 1812 return self.expression( 1813 exp.PartitionBoundSpec, 1814 this=this, 1815 expression=expression, 1816 from_expressions=from_expressions, 1817 to_expressions=to_expressions, 1818 ) 1819 1820 # https://www.postgresql.org/docs/current/sql-createtable.html 1821 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1822 if not self._match_text_seq("OF"): 1823 self._retreat(self._index - 1) 1824 return None 1825 1826 this = self._parse_table(schema=True) 1827 1828 if self._match(TokenType.DEFAULT): 1829 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1830 elif self._match_text_seq("FOR", "VALUES"): 1831 expression = self._parse_partition_bound_spec() 1832 else: 1833 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1834 1835 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1836 1837 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1838 self._match(TokenType.EQ) 1839 return self.expression( 1840 exp.PartitionedByProperty, 1841 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1842 ) 1843 1844 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1845 if self._match_text_seq("AND", "STATISTICS"): 1846 statistics = True 1847 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1848 statistics = False 1849 else: 1850 statistics = None 1851 1852 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1853 1854 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1855 if self._match_text_seq("PRIMARY", "INDEX"): 1856 return exp.NoPrimaryIndexProperty() 1857 return None 1858 1859 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1860 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1861 return exp.OnCommitProperty() 1862 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1863 return exp.OnCommitProperty(delete=True) 1864 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1865 1866 def _parse_distkey(self) -> exp.DistKeyProperty: 1867 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1868 1869 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1870 table = self._parse_table(schema=True) 1871 1872 options = [] 1873 while self._match_texts(("INCLUDING", "EXCLUDING")): 1874 this = self._prev.text.upper() 1875 1876 id_var = self._parse_id_var() 1877 if not id_var: 1878 return None 1879 1880 options.append( 1881 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1882 ) 1883 1884 return self.expression(exp.LikeProperty, this=table, expressions=options) 1885 1886 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1887 return self.expression( 1888 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1889 ) 1890 1891 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1892 self._match(TokenType.EQ) 1893 return self.expression( 1894 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1895 ) 1896 1897 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1898 self._match_text_seq("WITH", "CONNECTION") 1899 return self.expression( 1900 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1901 ) 1902 1903 def _parse_returns(self) -> exp.ReturnsProperty: 1904 value: t.Optional[exp.Expression] 1905 is_table = self._match(TokenType.TABLE) 1906 1907 if is_table: 1908 if self._match(TokenType.LT): 1909 value = self.expression( 1910 exp.Schema, 1911 this="TABLE", 1912 expressions=self._parse_csv(self._parse_struct_types), 1913 ) 1914 if not self._match(TokenType.GT): 1915 self.raise_error("Expecting >") 1916 else: 1917 value = self._parse_schema(exp.var("TABLE")) 1918 else: 1919 value = self._parse_types() 1920 1921 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1922 1923 def _parse_describe(self) -> exp.Describe: 1924 kind = self._match_set(self.CREATABLES) and self._prev.text 1925 this = self._parse_table(schema=True) 1926 properties = self._parse_properties() 1927 expressions = properties.expressions if properties else None 1928 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1929 1930 def _parse_insert(self) -> exp.Insert: 1931 comments = ensure_list(self._prev_comments) 1932 overwrite = self._match(TokenType.OVERWRITE) 1933 ignore = self._match(TokenType.IGNORE) 1934 local = self._match_text_seq("LOCAL") 1935 alternative = None 1936 1937 if self._match_text_seq("DIRECTORY"): 1938 this: t.Optional[exp.Expression] = self.expression( 1939 exp.Directory, 1940 this=self._parse_var_or_string(), 1941 local=local, 1942 row_format=self._parse_row_format(match_row=True), 1943 ) 1944 else: 1945 if self._match(TokenType.OR): 1946 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1947 1948 self._match(TokenType.INTO) 1949 comments += ensure_list(self._prev_comments) 1950 self._match(TokenType.TABLE) 1951 this = self._parse_table(schema=True) 1952 1953 returning = self._parse_returning() 1954 1955 return self.expression( 1956 exp.Insert, 1957 comments=comments, 1958 this=this, 1959 by_name=self._match_text_seq("BY", "NAME"), 1960 exists=self._parse_exists(), 1961 partition=self._parse_partition(), 1962 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1963 and self._parse_conjunction(), 1964 expression=self._parse_ddl_select(), 1965 conflict=self._parse_on_conflict(), 1966 returning=returning or self._parse_returning(), 1967 overwrite=overwrite, 1968 alternative=alternative, 1969 ignore=ignore, 1970 ) 1971 1972 def _parse_kill(self) -> exp.Kill: 1973 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1974 1975 return self.expression( 1976 exp.Kill, 1977 this=self._parse_primary(), 1978 kind=kind, 1979 ) 1980 1981 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1982 conflict = self._match_text_seq("ON", "CONFLICT") 1983 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1984 1985 if not conflict and not duplicate: 1986 return None 1987 1988 nothing = None 1989 expressions = None 1990 key = None 1991 constraint = None 1992 1993 if conflict: 1994 if self._match_text_seq("ON", "CONSTRAINT"): 1995 constraint = self._parse_id_var() 1996 else: 1997 key = self._parse_csv(self._parse_value) 1998 1999 self._match_text_seq("DO") 2000 if self._match_text_seq("NOTHING"): 2001 nothing = True 2002 else: 2003 self._match(TokenType.UPDATE) 2004 self._match(TokenType.SET) 2005 expressions = self._parse_csv(self._parse_equality) 2006 2007 return self.expression( 2008 exp.OnConflict, 2009 duplicate=duplicate, 2010 expressions=expressions, 2011 nothing=nothing, 2012 key=key, 2013 constraint=constraint, 2014 ) 2015 2016 def _parse_returning(self) -> t.Optional[exp.Returning]: 2017 if not self._match(TokenType.RETURNING): 2018 return None 2019 return self.expression( 2020 exp.Returning, 2021 expressions=self._parse_csv(self._parse_expression), 2022 into=self._match(TokenType.INTO) and self._parse_table_part(), 2023 ) 2024 2025 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2026 if not self._match(TokenType.FORMAT): 2027 return None 2028 return self._parse_row_format() 2029 2030 def _parse_row_format( 2031 self, match_row: bool = False 2032 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2033 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2034 return None 2035 2036 if self._match_text_seq("SERDE"): 2037 this = self._parse_string() 2038 2039 serde_properties = None 2040 if self._match(TokenType.SERDE_PROPERTIES): 2041 serde_properties = self.expression( 2042 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2043 ) 2044 2045 return self.expression( 2046 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2047 ) 2048 2049 self._match_text_seq("DELIMITED") 2050 2051 kwargs = {} 2052 2053 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2054 kwargs["fields"] = self._parse_string() 2055 if self._match_text_seq("ESCAPED", "BY"): 2056 kwargs["escaped"] = self._parse_string() 2057 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2058 kwargs["collection_items"] = self._parse_string() 2059 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2060 kwargs["map_keys"] = self._parse_string() 2061 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2062 kwargs["lines"] = self._parse_string() 2063 if self._match_text_seq("NULL", "DEFINED", "AS"): 2064 kwargs["null"] = self._parse_string() 2065 2066 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2067 2068 def _parse_load(self) -> exp.LoadData | exp.Command: 2069 if self._match_text_seq("DATA"): 2070 local = self._match_text_seq("LOCAL") 2071 self._match_text_seq("INPATH") 2072 inpath = self._parse_string() 2073 overwrite = self._match(TokenType.OVERWRITE) 2074 self._match_pair(TokenType.INTO, TokenType.TABLE) 2075 2076 return self.expression( 2077 exp.LoadData, 2078 this=self._parse_table(schema=True), 2079 local=local, 2080 overwrite=overwrite, 2081 inpath=inpath, 2082 partition=self._parse_partition(), 2083 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2084 serde=self._match_text_seq("SERDE") and self._parse_string(), 2085 ) 2086 return self._parse_as_command(self._prev) 2087 2088 def _parse_delete(self) -> exp.Delete: 2089 # This handles MySQL's "Multiple-Table Syntax" 2090 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2091 tables = None 2092 comments = self._prev_comments 2093 if not self._match(TokenType.FROM, advance=False): 2094 tables = self._parse_csv(self._parse_table) or None 2095 2096 returning = self._parse_returning() 2097 2098 return self.expression( 2099 exp.Delete, 2100 comments=comments, 2101 tables=tables, 2102 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2103 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2104 where=self._parse_where(), 2105 returning=returning or self._parse_returning(), 2106 limit=self._parse_limit(), 2107 ) 2108 2109 def _parse_update(self) -> exp.Update: 2110 comments = self._prev_comments 2111 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2112 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2113 returning = self._parse_returning() 2114 return self.expression( 2115 exp.Update, 2116 comments=comments, 2117 **{ # type: ignore 2118 "this": this, 2119 "expressions": expressions, 2120 "from": self._parse_from(joins=True), 2121 "where": self._parse_where(), 2122 "returning": returning or self._parse_returning(), 2123 "order": self._parse_order(), 2124 "limit": self._parse_limit(), 2125 }, 2126 ) 2127 2128 def _parse_uncache(self) -> exp.Uncache: 2129 if not self._match(TokenType.TABLE): 2130 self.raise_error("Expecting TABLE after UNCACHE") 2131 2132 return self.expression( 2133 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2134 ) 2135 2136 def _parse_cache(self) -> exp.Cache: 2137 lazy = self._match_text_seq("LAZY") 2138 self._match(TokenType.TABLE) 2139 table = self._parse_table(schema=True) 2140 2141 options = [] 2142 if self._match_text_seq("OPTIONS"): 2143 self._match_l_paren() 2144 k = self._parse_string() 2145 self._match(TokenType.EQ) 2146 v = self._parse_string() 2147 options = [k, v] 2148 self._match_r_paren() 2149 2150 self._match(TokenType.ALIAS) 2151 return self.expression( 2152 exp.Cache, 2153 this=table, 2154 lazy=lazy, 2155 options=options, 2156 expression=self._parse_select(nested=True), 2157 ) 2158 2159 def _parse_partition(self) -> t.Optional[exp.Partition]: 2160 if not self._match(TokenType.PARTITION): 2161 return None 2162 2163 return self.expression( 2164 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2165 ) 2166 2167 def _parse_value(self) -> exp.Tuple: 2168 if self._match(TokenType.L_PAREN): 2169 expressions = self._parse_csv(self._parse_conjunction) 2170 self._match_r_paren() 2171 return self.expression(exp.Tuple, expressions=expressions) 2172 2173 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2174 # https://prestodb.io/docs/current/sql/values.html 2175 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2176 2177 def _parse_projections(self) -> t.List[exp.Expression]: 2178 return self._parse_expressions() 2179 2180 def _parse_select( 2181 self, 2182 nested: bool = False, 2183 table: bool = False, 2184 parse_subquery_alias: bool = True, 2185 parse_set_operation: bool = True, 2186 ) -> t.Optional[exp.Expression]: 2187 cte = self._parse_with() 2188 2189 if cte: 2190 this = self._parse_statement() 2191 2192 if not this: 2193 self.raise_error("Failed to parse any statement following CTE") 2194 return cte 2195 2196 if "with" in this.arg_types: 2197 this.set("with", cte) 2198 else: 2199 self.raise_error(f"{this.key} does not support CTE") 2200 this = cte 2201 2202 return this 2203 2204 # duckdb supports leading with FROM x 2205 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2206 2207 if self._match(TokenType.SELECT): 2208 comments = self._prev_comments 2209 2210 hint = self._parse_hint() 2211 all_ = self._match(TokenType.ALL) 2212 distinct = self._match_set(self.DISTINCT_TOKENS) 2213 2214 kind = ( 2215 self._match(TokenType.ALIAS) 2216 and self._match_texts(("STRUCT", "VALUE")) 2217 and self._prev.text 2218 ) 2219 2220 if distinct: 2221 distinct = self.expression( 2222 exp.Distinct, 2223 on=self._parse_value() if self._match(TokenType.ON) else None, 2224 ) 2225 2226 if all_ and distinct: 2227 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2228 2229 limit = self._parse_limit(top=True) 2230 projections = self._parse_projections() 2231 2232 this = self.expression( 2233 exp.Select, 2234 kind=kind, 2235 hint=hint, 2236 distinct=distinct, 2237 expressions=projections, 2238 limit=limit, 2239 ) 2240 this.comments = comments 2241 2242 into = self._parse_into() 2243 if into: 2244 this.set("into", into) 2245 2246 if not from_: 2247 from_ = self._parse_from() 2248 2249 if from_: 2250 this.set("from", from_) 2251 2252 this = self._parse_query_modifiers(this) 2253 elif (table or nested) and self._match(TokenType.L_PAREN): 2254 if self._match(TokenType.PIVOT): 2255 this = self._parse_simplified_pivot() 2256 elif self._match(TokenType.FROM): 2257 this = exp.select("*").from_( 2258 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2259 ) 2260 else: 2261 this = ( 2262 self._parse_table() 2263 if table 2264 else self._parse_select(nested=True, parse_set_operation=False) 2265 ) 2266 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2267 2268 self._match_r_paren() 2269 2270 # We return early here so that the UNION isn't attached to the subquery by the 2271 # following call to _parse_set_operations, but instead becomes the parent node 2272 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2273 elif self._match(TokenType.VALUES): 2274 this = self.expression( 2275 exp.Values, 2276 expressions=self._parse_csv(self._parse_value), 2277 alias=self._parse_table_alias(), 2278 ) 2279 elif from_: 2280 this = exp.select("*").from_(from_.this, copy=False) 2281 else: 2282 this = None 2283 2284 if parse_set_operation: 2285 return self._parse_set_operations(this) 2286 return this 2287 2288 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2289 if not skip_with_token and not self._match(TokenType.WITH): 2290 return None 2291 2292 comments = self._prev_comments 2293 recursive = self._match(TokenType.RECURSIVE) 2294 2295 expressions = [] 2296 while True: 2297 expressions.append(self._parse_cte()) 2298 2299 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2300 break 2301 else: 2302 self._match(TokenType.WITH) 2303 2304 return self.expression( 2305 exp.With, comments=comments, expressions=expressions, recursive=recursive 2306 ) 2307 2308 def _parse_cte(self) -> exp.CTE: 2309 alias = self._parse_table_alias() 2310 if not alias or not alias.this: 2311 self.raise_error("Expected CTE to have alias") 2312 2313 self._match(TokenType.ALIAS) 2314 return self.expression( 2315 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2316 ) 2317 2318 def _parse_table_alias( 2319 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2320 ) -> t.Optional[exp.TableAlias]: 2321 any_token = self._match(TokenType.ALIAS) 2322 alias = ( 2323 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2324 or self._parse_string_as_identifier() 2325 ) 2326 2327 index = self._index 2328 if self._match(TokenType.L_PAREN): 2329 columns = self._parse_csv(self._parse_function_parameter) 2330 self._match_r_paren() if columns else self._retreat(index) 2331 else: 2332 columns = None 2333 2334 if not alias and not columns: 2335 return None 2336 2337 return self.expression(exp.TableAlias, this=alias, columns=columns) 2338 2339 def _parse_subquery( 2340 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2341 ) -> t.Optional[exp.Subquery]: 2342 if not this: 2343 return None 2344 2345 return self.expression( 2346 exp.Subquery, 2347 this=this, 2348 pivots=self._parse_pivots(), 2349 alias=self._parse_table_alias() if parse_alias else None, 2350 ) 2351 2352 def _parse_query_modifiers( 2353 self, this: t.Optional[exp.Expression] 2354 ) -> t.Optional[exp.Expression]: 2355 if isinstance(this, self.MODIFIABLES): 2356 for join in iter(self._parse_join, None): 2357 this.append("joins", join) 2358 for lateral in iter(self._parse_lateral, None): 2359 this.append("laterals", lateral) 2360 2361 while True: 2362 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2363 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2364 key, expression = parser(self) 2365 2366 if expression: 2367 this.set(key, expression) 2368 if key == "limit": 2369 offset = expression.args.pop("offset", None) 2370 if offset: 2371 this.set("offset", exp.Offset(expression=offset)) 2372 continue 2373 break 2374 return this 2375 2376 def _parse_hint(self) -> t.Optional[exp.Hint]: 2377 if self._match(TokenType.HINT): 2378 hints = [] 2379 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2380 hints.extend(hint) 2381 2382 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2383 self.raise_error("Expected */ after HINT") 2384 2385 return self.expression(exp.Hint, expressions=hints) 2386 2387 return None 2388 2389 def _parse_into(self) -> t.Optional[exp.Into]: 2390 if not self._match(TokenType.INTO): 2391 return None 2392 2393 temp = self._match(TokenType.TEMPORARY) 2394 unlogged = self._match_text_seq("UNLOGGED") 2395 self._match(TokenType.TABLE) 2396 2397 return self.expression( 2398 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2399 ) 2400 2401 def _parse_from( 2402 self, joins: bool = False, skip_from_token: bool = False 2403 ) -> t.Optional[exp.From]: 2404 if not skip_from_token and not self._match(TokenType.FROM): 2405 return None 2406 2407 return self.expression( 2408 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2409 ) 2410 2411 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2412 if not self._match(TokenType.MATCH_RECOGNIZE): 2413 return None 2414 2415 self._match_l_paren() 2416 2417 partition = self._parse_partition_by() 2418 order = self._parse_order() 2419 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2420 2421 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2422 rows = exp.var("ONE ROW PER MATCH") 2423 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2424 text = "ALL ROWS PER MATCH" 2425 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2426 text += f" SHOW EMPTY MATCHES" 2427 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2428 text += f" OMIT EMPTY MATCHES" 2429 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2430 text += f" WITH UNMATCHED ROWS" 2431 rows = exp.var(text) 2432 else: 2433 rows = None 2434 2435 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2436 text = "AFTER MATCH SKIP" 2437 if self._match_text_seq("PAST", "LAST", "ROW"): 2438 text += f" PAST LAST ROW" 2439 elif self._match_text_seq("TO", "NEXT", "ROW"): 2440 text += f" TO NEXT ROW" 2441 elif self._match_text_seq("TO", "FIRST"): 2442 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2443 elif self._match_text_seq("TO", "LAST"): 2444 text += f" TO LAST {self._advance_any().text}" # type: ignore 2445 after = exp.var(text) 2446 else: 2447 after = None 2448 2449 if self._match_text_seq("PATTERN"): 2450 self._match_l_paren() 2451 2452 if not self._curr: 2453 self.raise_error("Expecting )", self._curr) 2454 2455 paren = 1 2456 start = self._curr 2457 2458 while self._curr and paren > 0: 2459 if self._curr.token_type == TokenType.L_PAREN: 2460 paren += 1 2461 if self._curr.token_type == TokenType.R_PAREN: 2462 paren -= 1 2463 2464 end = self._prev 2465 self._advance() 2466 2467 if paren > 0: 2468 self.raise_error("Expecting )", self._curr) 2469 2470 pattern = exp.var(self._find_sql(start, end)) 2471 else: 2472 pattern = None 2473 2474 define = ( 2475 self._parse_csv( 2476 lambda: self.expression( 2477 exp.Alias, 2478 alias=self._parse_id_var(any_token=True), 2479 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2480 ) 2481 ) 2482 if self._match_text_seq("DEFINE") 2483 else None 2484 ) 2485 2486 self._match_r_paren() 2487 2488 return self.expression( 2489 exp.MatchRecognize, 2490 partition_by=partition, 2491 order=order, 2492 measures=measures, 2493 rows=rows, 2494 after=after, 2495 pattern=pattern, 2496 define=define, 2497 alias=self._parse_table_alias(), 2498 ) 2499 2500 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2501 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2502 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2503 2504 if outer_apply or cross_apply: 2505 this = self._parse_select(table=True) 2506 view = None 2507 outer = not cross_apply 2508 elif self._match(TokenType.LATERAL): 2509 this = self._parse_select(table=True) 2510 view = self._match(TokenType.VIEW) 2511 outer = self._match(TokenType.OUTER) 2512 else: 2513 return None 2514 2515 if not this: 2516 this = ( 2517 self._parse_unnest() 2518 or self._parse_function() 2519 or self._parse_id_var(any_token=False) 2520 ) 2521 2522 while self._match(TokenType.DOT): 2523 this = exp.Dot( 2524 this=this, 2525 expression=self._parse_function() or self._parse_id_var(any_token=False), 2526 ) 2527 2528 if view: 2529 table = self._parse_id_var(any_token=False) 2530 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2531 table_alias: t.Optional[exp.TableAlias] = self.expression( 2532 exp.TableAlias, this=table, columns=columns 2533 ) 2534 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2535 # We move the alias from the lateral's child node to the lateral itself 2536 table_alias = this.args["alias"].pop() 2537 else: 2538 table_alias = self._parse_table_alias() 2539 2540 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2541 2542 def _parse_join_parts( 2543 self, 2544 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2545 return ( 2546 self._match_set(self.JOIN_METHODS) and self._prev, 2547 self._match_set(self.JOIN_SIDES) and self._prev, 2548 self._match_set(self.JOIN_KINDS) and self._prev, 2549 ) 2550 2551 def _parse_join( 2552 self, skip_join_token: bool = False, parse_bracket: bool = False 2553 ) -> t.Optional[exp.Join]: 2554 if self._match(TokenType.COMMA): 2555 return self.expression(exp.Join, this=self._parse_table()) 2556 2557 index = self._index 2558 method, side, kind = self._parse_join_parts() 2559 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2560 join = self._match(TokenType.JOIN) 2561 2562 if not skip_join_token and not join: 2563 self._retreat(index) 2564 kind = None 2565 method = None 2566 side = None 2567 2568 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2569 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2570 2571 if not skip_join_token and not join and not outer_apply and not cross_apply: 2572 return None 2573 2574 if outer_apply: 2575 side = Token(TokenType.LEFT, "LEFT") 2576 2577 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2578 2579 if method: 2580 kwargs["method"] = method.text 2581 if side: 2582 kwargs["side"] = side.text 2583 if kind: 2584 kwargs["kind"] = kind.text 2585 if hint: 2586 kwargs["hint"] = hint 2587 2588 if self._match(TokenType.ON): 2589 kwargs["on"] = self._parse_conjunction() 2590 elif self._match(TokenType.USING): 2591 kwargs["using"] = self._parse_wrapped_id_vars() 2592 elif not (kind and kind.token_type == TokenType.CROSS): 2593 index = self._index 2594 join = self._parse_join() 2595 2596 if join and self._match(TokenType.ON): 2597 kwargs["on"] = self._parse_conjunction() 2598 elif join and self._match(TokenType.USING): 2599 kwargs["using"] = self._parse_wrapped_id_vars() 2600 else: 2601 join = None 2602 self._retreat(index) 2603 2604 kwargs["this"].set("joins", [join] if join else None) 2605 2606 comments = [c for token in (method, side, kind) if token for c in token.comments] 2607 return self.expression(exp.Join, comments=comments, **kwargs) 2608 2609 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2610 this = self._parse_conjunction() 2611 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2612 return this 2613 2614 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2615 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2616 2617 return this 2618 2619 def _parse_index( 2620 self, 2621 index: t.Optional[exp.Expression] = None, 2622 ) -> t.Optional[exp.Index]: 2623 if index: 2624 unique = None 2625 primary = None 2626 amp = None 2627 2628 self._match(TokenType.ON) 2629 self._match(TokenType.TABLE) # hive 2630 table = self._parse_table_parts(schema=True) 2631 else: 2632 unique = self._match(TokenType.UNIQUE) 2633 primary = self._match_text_seq("PRIMARY") 2634 amp = self._match_text_seq("AMP") 2635 2636 if not self._match(TokenType.INDEX): 2637 return None 2638 2639 index = self._parse_id_var() 2640 table = None 2641 2642 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2643 2644 if self._match(TokenType.L_PAREN, advance=False): 2645 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2646 else: 2647 columns = None 2648 2649 return self.expression( 2650 exp.Index, 2651 this=index, 2652 table=table, 2653 using=using, 2654 columns=columns, 2655 unique=unique, 2656 primary=primary, 2657 amp=amp, 2658 partition_by=self._parse_partition_by(), 2659 where=self._parse_where(), 2660 ) 2661 2662 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2663 hints: t.List[exp.Expression] = [] 2664 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2665 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2666 hints.append( 2667 self.expression( 2668 exp.WithTableHint, 2669 expressions=self._parse_csv( 2670 lambda: self._parse_function() or self._parse_var(any_token=True) 2671 ), 2672 ) 2673 ) 2674 self._match_r_paren() 2675 else: 2676 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2677 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2678 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2679 2680 self._match_texts(("INDEX", "KEY")) 2681 if self._match(TokenType.FOR): 2682 hint.set("target", self._advance_any() and self._prev.text.upper()) 2683 2684 hint.set("expressions", self._parse_wrapped_id_vars()) 2685 hints.append(hint) 2686 2687 return hints or None 2688 2689 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2690 return ( 2691 (not schema and self._parse_function(optional_parens=False)) 2692 or self._parse_id_var(any_token=False) 2693 or self._parse_string_as_identifier() 2694 or self._parse_placeholder() 2695 ) 2696 2697 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2698 catalog = None 2699 db = None 2700 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2701 2702 while self._match(TokenType.DOT): 2703 if catalog: 2704 # This allows nesting the table in arbitrarily many dot expressions if needed 2705 table = self.expression( 2706 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2707 ) 2708 else: 2709 catalog = db 2710 db = table 2711 table = self._parse_table_part(schema=schema) or "" 2712 2713 if not table: 2714 self.raise_error(f"Expected table name but got {self._curr}") 2715 2716 return self.expression( 2717 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2718 ) 2719 2720 def _parse_table( 2721 self, 2722 schema: bool = False, 2723 joins: bool = False, 2724 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2725 parse_bracket: bool = False, 2726 ) -> t.Optional[exp.Expression]: 2727 lateral = self._parse_lateral() 2728 if lateral: 2729 return lateral 2730 2731 unnest = self._parse_unnest() 2732 if unnest: 2733 return unnest 2734 2735 values = self._parse_derived_table_values() 2736 if values: 2737 return values 2738 2739 subquery = self._parse_select(table=True) 2740 if subquery: 2741 if not subquery.args.get("pivots"): 2742 subquery.set("pivots", self._parse_pivots()) 2743 return subquery 2744 2745 bracket = parse_bracket and self._parse_bracket(None) 2746 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2747 this = t.cast( 2748 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2749 ) 2750 2751 if schema: 2752 return self._parse_schema(this=this) 2753 2754 version = self._parse_version() 2755 2756 if version: 2757 this.set("version", version) 2758 2759 if self.dialect.ALIAS_POST_TABLESAMPLE: 2760 table_sample = self._parse_table_sample() 2761 2762 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2763 if alias: 2764 this.set("alias", alias) 2765 2766 if self._match_text_seq("AT"): 2767 this.set("index", self._parse_id_var()) 2768 2769 this.set("hints", self._parse_table_hints()) 2770 2771 if not this.args.get("pivots"): 2772 this.set("pivots", self._parse_pivots()) 2773 2774 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2775 table_sample = self._parse_table_sample() 2776 2777 if table_sample: 2778 table_sample.set("this", this) 2779 this = table_sample 2780 2781 if joins: 2782 for join in iter(self._parse_join, None): 2783 this.append("joins", join) 2784 2785 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2786 this.set("ordinality", True) 2787 this.set("alias", self._parse_table_alias()) 2788 2789 return this 2790 2791 def _parse_version(self) -> t.Optional[exp.Version]: 2792 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2793 this = "TIMESTAMP" 2794 elif self._match(TokenType.VERSION_SNAPSHOT): 2795 this = "VERSION" 2796 else: 2797 return None 2798 2799 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2800 kind = self._prev.text.upper() 2801 start = self._parse_bitwise() 2802 self._match_texts(("TO", "AND")) 2803 end = self._parse_bitwise() 2804 expression: t.Optional[exp.Expression] = self.expression( 2805 exp.Tuple, expressions=[start, end] 2806 ) 2807 elif self._match_text_seq("CONTAINED", "IN"): 2808 kind = "CONTAINED IN" 2809 expression = self.expression( 2810 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2811 ) 2812 elif self._match(TokenType.ALL): 2813 kind = "ALL" 2814 expression = None 2815 else: 2816 self._match_text_seq("AS", "OF") 2817 kind = "AS OF" 2818 expression = self._parse_type() 2819 2820 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2821 2822 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2823 if not self._match(TokenType.UNNEST): 2824 return None 2825 2826 expressions = self._parse_wrapped_csv(self._parse_equality) 2827 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2828 2829 alias = self._parse_table_alias() if with_alias else None 2830 2831 if alias: 2832 if self.dialect.UNNEST_COLUMN_ONLY: 2833 if alias.args.get("columns"): 2834 self.raise_error("Unexpected extra column alias in unnest.") 2835 2836 alias.set("columns", [alias.this]) 2837 alias.set("this", None) 2838 2839 columns = alias.args.get("columns") or [] 2840 if offset and len(expressions) < len(columns): 2841 offset = columns.pop() 2842 2843 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2844 self._match(TokenType.ALIAS) 2845 offset = self._parse_id_var( 2846 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2847 ) or exp.to_identifier("offset") 2848 2849 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2850 2851 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2852 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2853 if not is_derived and not self._match(TokenType.VALUES): 2854 return None 2855 2856 expressions = self._parse_csv(self._parse_value) 2857 alias = self._parse_table_alias() 2858 2859 if is_derived: 2860 self._match_r_paren() 2861 2862 return self.expression( 2863 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2864 ) 2865 2866 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2867 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2868 as_modifier and self._match_text_seq("USING", "SAMPLE") 2869 ): 2870 return None 2871 2872 bucket_numerator = None 2873 bucket_denominator = None 2874 bucket_field = None 2875 percent = None 2876 rows = None 2877 size = None 2878 seed = None 2879 2880 kind = ( 2881 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2882 ) 2883 method = self._parse_var(tokens=(TokenType.ROW,)) 2884 2885 matched_l_paren = self._match(TokenType.L_PAREN) 2886 2887 if self.TABLESAMPLE_CSV: 2888 num = None 2889 expressions = self._parse_csv(self._parse_primary) 2890 else: 2891 expressions = None 2892 num = ( 2893 self._parse_factor() 2894 if self._match(TokenType.NUMBER, advance=False) 2895 else self._parse_primary() or self._parse_placeholder() 2896 ) 2897 2898 if self._match_text_seq("BUCKET"): 2899 bucket_numerator = self._parse_number() 2900 self._match_text_seq("OUT", "OF") 2901 bucket_denominator = bucket_denominator = self._parse_number() 2902 self._match(TokenType.ON) 2903 bucket_field = self._parse_field() 2904 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2905 percent = num 2906 elif self._match(TokenType.ROWS): 2907 rows = num 2908 elif num: 2909 size = num 2910 2911 if matched_l_paren: 2912 self._match_r_paren() 2913 2914 if self._match(TokenType.L_PAREN): 2915 method = self._parse_var() 2916 seed = self._match(TokenType.COMMA) and self._parse_number() 2917 self._match_r_paren() 2918 elif self._match_texts(("SEED", "REPEATABLE")): 2919 seed = self._parse_wrapped(self._parse_number) 2920 2921 return self.expression( 2922 exp.TableSample, 2923 expressions=expressions, 2924 method=method, 2925 bucket_numerator=bucket_numerator, 2926 bucket_denominator=bucket_denominator, 2927 bucket_field=bucket_field, 2928 percent=percent, 2929 rows=rows, 2930 size=size, 2931 seed=seed, 2932 kind=kind, 2933 ) 2934 2935 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2936 return list(iter(self._parse_pivot, None)) or None 2937 2938 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2939 return list(iter(self._parse_join, None)) or None 2940 2941 # https://duckdb.org/docs/sql/statements/pivot 2942 def _parse_simplified_pivot(self) -> exp.Pivot: 2943 def _parse_on() -> t.Optional[exp.Expression]: 2944 this = self._parse_bitwise() 2945 return self._parse_in(this) if self._match(TokenType.IN) else this 2946 2947 this = self._parse_table() 2948 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2949 using = self._match(TokenType.USING) and self._parse_csv( 2950 lambda: self._parse_alias(self._parse_function()) 2951 ) 2952 group = self._parse_group() 2953 return self.expression( 2954 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2955 ) 2956 2957 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2958 index = self._index 2959 include_nulls = None 2960 2961 if self._match(TokenType.PIVOT): 2962 unpivot = False 2963 elif self._match(TokenType.UNPIVOT): 2964 unpivot = True 2965 2966 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2967 if self._match_text_seq("INCLUDE", "NULLS"): 2968 include_nulls = True 2969 elif self._match_text_seq("EXCLUDE", "NULLS"): 2970 include_nulls = False 2971 else: 2972 return None 2973 2974 expressions = [] 2975 field = None 2976 2977 if not self._match(TokenType.L_PAREN): 2978 self._retreat(index) 2979 return None 2980 2981 if unpivot: 2982 expressions = self._parse_csv(self._parse_column) 2983 else: 2984 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2985 2986 if not expressions: 2987 self.raise_error("Failed to parse PIVOT's aggregation list") 2988 2989 if not self._match(TokenType.FOR): 2990 self.raise_error("Expecting FOR") 2991 2992 value = self._parse_column() 2993 2994 if not self._match(TokenType.IN): 2995 self.raise_error("Expecting IN") 2996 2997 field = self._parse_in(value, alias=True) 2998 2999 self._match_r_paren() 3000 3001 pivot = self.expression( 3002 exp.Pivot, 3003 expressions=expressions, 3004 field=field, 3005 unpivot=unpivot, 3006 include_nulls=include_nulls, 3007 ) 3008 3009 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3010 pivot.set("alias", self._parse_table_alias()) 3011 3012 if not unpivot: 3013 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3014 3015 columns: t.List[exp.Expression] = [] 3016 for fld in pivot.args["field"].expressions: 3017 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3018 for name in names: 3019 if self.PREFIXED_PIVOT_COLUMNS: 3020 name = f"{name}_{field_name}" if name else field_name 3021 else: 3022 name = f"{field_name}_{name}" if name else field_name 3023 3024 columns.append(exp.to_identifier(name)) 3025 3026 pivot.set("columns", columns) 3027 3028 return pivot 3029 3030 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3031 return [agg.alias for agg in aggregations] 3032 3033 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3034 if not skip_where_token and not self._match(TokenType.WHERE): 3035 return None 3036 3037 return self.expression( 3038 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3039 ) 3040 3041 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3042 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3043 return None 3044 3045 elements = defaultdict(list) 3046 3047 if self._match(TokenType.ALL): 3048 return self.expression(exp.Group, all=True) 3049 3050 while True: 3051 expressions = self._parse_csv(self._parse_conjunction) 3052 if expressions: 3053 elements["expressions"].extend(expressions) 3054 3055 grouping_sets = self._parse_grouping_sets() 3056 if grouping_sets: 3057 elements["grouping_sets"].extend(grouping_sets) 3058 3059 rollup = None 3060 cube = None 3061 totals = None 3062 3063 index = self._index 3064 with_ = self._match(TokenType.WITH) 3065 if self._match(TokenType.ROLLUP): 3066 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3067 elements["rollup"].extend(ensure_list(rollup)) 3068 3069 if self._match(TokenType.CUBE): 3070 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3071 elements["cube"].extend(ensure_list(cube)) 3072 3073 if self._match_text_seq("TOTALS"): 3074 totals = True 3075 elements["totals"] = True # type: ignore 3076 3077 if not (grouping_sets or rollup or cube or totals): 3078 if with_: 3079 self._retreat(index) 3080 break 3081 3082 return self.expression(exp.Group, **elements) # type: ignore 3083 3084 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3085 if not self._match(TokenType.GROUPING_SETS): 3086 return None 3087 3088 return self._parse_wrapped_csv(self._parse_grouping_set) 3089 3090 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3091 if self._match(TokenType.L_PAREN): 3092 grouping_set = self._parse_csv(self._parse_column) 3093 self._match_r_paren() 3094 return self.expression(exp.Tuple, expressions=grouping_set) 3095 3096 return self._parse_column() 3097 3098 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3099 if not skip_having_token and not self._match(TokenType.HAVING): 3100 return None 3101 return self.expression(exp.Having, this=self._parse_conjunction()) 3102 3103 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3104 if not self._match(TokenType.QUALIFY): 3105 return None 3106 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3107 3108 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3109 if skip_start_token: 3110 start = None 3111 elif self._match(TokenType.START_WITH): 3112 start = self._parse_conjunction() 3113 else: 3114 return None 3115 3116 self._match(TokenType.CONNECT_BY) 3117 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3118 exp.Prior, this=self._parse_bitwise() 3119 ) 3120 connect = self._parse_conjunction() 3121 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3122 3123 if not start and self._match(TokenType.START_WITH): 3124 start = self._parse_conjunction() 3125 3126 return self.expression(exp.Connect, start=start, connect=connect) 3127 3128 def _parse_order( 3129 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3130 ) -> t.Optional[exp.Expression]: 3131 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3132 return this 3133 3134 return self.expression( 3135 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3136 ) 3137 3138 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3139 if not self._match(token): 3140 return None 3141 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3142 3143 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3144 this = parse_method() if parse_method else self._parse_conjunction() 3145 3146 asc = self._match(TokenType.ASC) 3147 desc = self._match(TokenType.DESC) or (asc and False) 3148 3149 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3150 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3151 3152 nulls_first = is_nulls_first or False 3153 explicitly_null_ordered = is_nulls_first or is_nulls_last 3154 3155 if ( 3156 not explicitly_null_ordered 3157 and ( 3158 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3159 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3160 ) 3161 and self.dialect.NULL_ORDERING != "nulls_are_last" 3162 ): 3163 nulls_first = True 3164 3165 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3166 3167 def _parse_limit( 3168 self, this: t.Optional[exp.Expression] = None, top: bool = False 3169 ) -> t.Optional[exp.Expression]: 3170 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3171 comments = self._prev_comments 3172 if top: 3173 limit_paren = self._match(TokenType.L_PAREN) 3174 expression = self._parse_term() if limit_paren else self._parse_number() 3175 3176 if limit_paren: 3177 self._match_r_paren() 3178 else: 3179 expression = self._parse_term() 3180 3181 if self._match(TokenType.COMMA): 3182 offset = expression 3183 expression = self._parse_term() 3184 else: 3185 offset = None 3186 3187 limit_exp = self.expression( 3188 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3189 ) 3190 3191 return limit_exp 3192 3193 if self._match(TokenType.FETCH): 3194 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3195 direction = self._prev.text if direction else "FIRST" 3196 3197 count = self._parse_field(tokens=self.FETCH_TOKENS) 3198 percent = self._match(TokenType.PERCENT) 3199 3200 self._match_set((TokenType.ROW, TokenType.ROWS)) 3201 3202 only = self._match_text_seq("ONLY") 3203 with_ties = self._match_text_seq("WITH", "TIES") 3204 3205 if only and with_ties: 3206 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3207 3208 return self.expression( 3209 exp.Fetch, 3210 direction=direction, 3211 count=count, 3212 percent=percent, 3213 with_ties=with_ties, 3214 ) 3215 3216 return this 3217 3218 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3219 if not self._match(TokenType.OFFSET): 3220 return this 3221 3222 count = self._parse_term() 3223 self._match_set((TokenType.ROW, TokenType.ROWS)) 3224 return self.expression(exp.Offset, this=this, expression=count) 3225 3226 def _parse_locks(self) -> t.List[exp.Lock]: 3227 locks = [] 3228 while True: 3229 if self._match_text_seq("FOR", "UPDATE"): 3230 update = True 3231 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3232 "LOCK", "IN", "SHARE", "MODE" 3233 ): 3234 update = False 3235 else: 3236 break 3237 3238 expressions = None 3239 if self._match_text_seq("OF"): 3240 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3241 3242 wait: t.Optional[bool | exp.Expression] = None 3243 if self._match_text_seq("NOWAIT"): 3244 wait = True 3245 elif self._match_text_seq("WAIT"): 3246 wait = self._parse_primary() 3247 elif self._match_text_seq("SKIP", "LOCKED"): 3248 wait = False 3249 3250 locks.append( 3251 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3252 ) 3253 3254 return locks 3255 3256 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3257 if not self._match_set(self.SET_OPERATIONS): 3258 return this 3259 3260 token_type = self._prev.token_type 3261 3262 if token_type == TokenType.UNION: 3263 expression = exp.Union 3264 elif token_type == TokenType.EXCEPT: 3265 expression = exp.Except 3266 else: 3267 expression = exp.Intersect 3268 3269 return self.expression( 3270 expression, 3271 comments=self._prev.comments, 3272 this=this, 3273 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3274 by_name=self._match_text_seq("BY", "NAME"), 3275 expression=self._parse_set_operations( 3276 self._parse_select(nested=True, parse_set_operation=False) 3277 ), 3278 ) 3279 3280 def _parse_expression(self) -> t.Optional[exp.Expression]: 3281 return self._parse_alias(self._parse_conjunction()) 3282 3283 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3284 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3285 3286 def _parse_equality(self) -> t.Optional[exp.Expression]: 3287 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3288 3289 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3290 return self._parse_tokens(self._parse_range, self.COMPARISON) 3291 3292 def _parse_range(self) -> t.Optional[exp.Expression]: 3293 this = self._parse_bitwise() 3294 negate = self._match(TokenType.NOT) 3295 3296 if self._match_set(self.RANGE_PARSERS): 3297 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3298 if not expression: 3299 return this 3300 3301 this = expression 3302 elif self._match(TokenType.ISNULL): 3303 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3304 3305 # Postgres supports ISNULL and NOTNULL for conditions. 3306 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3307 if self._match(TokenType.NOTNULL): 3308 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3309 this = self.expression(exp.Not, this=this) 3310 3311 if negate: 3312 this = self.expression(exp.Not, this=this) 3313 3314 if self._match(TokenType.IS): 3315 this = self._parse_is(this) 3316 3317 return this 3318 3319 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3320 index = self._index - 1 3321 negate = self._match(TokenType.NOT) 3322 3323 if self._match_text_seq("DISTINCT", "FROM"): 3324 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3325 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3326 3327 expression = self._parse_null() or self._parse_boolean() 3328 if not expression: 3329 self._retreat(index) 3330 return None 3331 3332 this = self.expression(exp.Is, this=this, expression=expression) 3333 return self.expression(exp.Not, this=this) if negate else this 3334 3335 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3336 unnest = self._parse_unnest(with_alias=False) 3337 if unnest: 3338 this = self.expression(exp.In, this=this, unnest=unnest) 3339 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3340 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3341 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3342 3343 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3344 this = self.expression(exp.In, this=this, query=expressions[0]) 3345 else: 3346 this = self.expression(exp.In, this=this, expressions=expressions) 3347 3348 if matched_l_paren: 3349 self._match_r_paren(this) 3350 elif not self._match(TokenType.R_BRACKET, expression=this): 3351 self.raise_error("Expecting ]") 3352 else: 3353 this = self.expression(exp.In, this=this, field=self._parse_field()) 3354 3355 return this 3356 3357 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3358 low = self._parse_bitwise() 3359 self._match(TokenType.AND) 3360 high = self._parse_bitwise() 3361 return self.expression(exp.Between, this=this, low=low, high=high) 3362 3363 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3364 if not self._match(TokenType.ESCAPE): 3365 return this 3366 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3367 3368 def _parse_interval(self) -> t.Optional[exp.Interval]: 3369 index = self._index 3370 3371 if not self._match(TokenType.INTERVAL): 3372 return None 3373 3374 if self._match(TokenType.STRING, advance=False): 3375 this = self._parse_primary() 3376 else: 3377 this = self._parse_term() 3378 3379 if not this: 3380 self._retreat(index) 3381 return None 3382 3383 unit = self._parse_function() or self._parse_var(any_token=True) 3384 3385 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3386 # each INTERVAL expression into this canonical form so it's easy to transpile 3387 if this and this.is_number: 3388 this = exp.Literal.string(this.name) 3389 elif this and this.is_string: 3390 parts = this.name.split() 3391 3392 if len(parts) == 2: 3393 if unit: 3394 # This is not actually a unit, it's something else (e.g. a "window side") 3395 unit = None 3396 self._retreat(self._index - 1) 3397 3398 this = exp.Literal.string(parts[0]) 3399 unit = self.expression(exp.Var, this=parts[1]) 3400 3401 return self.expression(exp.Interval, this=this, unit=unit) 3402 3403 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3404 this = self._parse_term() 3405 3406 while True: 3407 if self._match_set(self.BITWISE): 3408 this = self.expression( 3409 self.BITWISE[self._prev.token_type], 3410 this=this, 3411 expression=self._parse_term(), 3412 ) 3413 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3414 this = self.expression( 3415 exp.DPipe, 3416 this=this, 3417 expression=self._parse_term(), 3418 safe=not self.dialect.STRICT_STRING_CONCAT, 3419 ) 3420 elif self._match(TokenType.DQMARK): 3421 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3422 elif self._match_pair(TokenType.LT, TokenType.LT): 3423 this = self.expression( 3424 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3425 ) 3426 elif self._match_pair(TokenType.GT, TokenType.GT): 3427 this = self.expression( 3428 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3429 ) 3430 else: 3431 break 3432 3433 return this 3434 3435 def _parse_term(self) -> t.Optional[exp.Expression]: 3436 return self._parse_tokens(self._parse_factor, self.TERM) 3437 3438 def _parse_factor(self) -> t.Optional[exp.Expression]: 3439 if self.EXPONENT: 3440 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3441 else: 3442 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3443 if isinstance(factor, exp.Div): 3444 factor.args["typed"] = self.dialect.TYPED_DIVISION 3445 factor.args["safe"] = self.dialect.SAFE_DIVISION 3446 return factor 3447 3448 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3449 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3450 3451 def _parse_unary(self) -> t.Optional[exp.Expression]: 3452 if self._match_set(self.UNARY_PARSERS): 3453 return self.UNARY_PARSERS[self._prev.token_type](self) 3454 return self._parse_at_time_zone(self._parse_type()) 3455 3456 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3457 interval = parse_interval and self._parse_interval() 3458 if interval: 3459 return interval 3460 3461 index = self._index 3462 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3463 this = self._parse_column() 3464 3465 if data_type: 3466 if isinstance(this, exp.Literal): 3467 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3468 if parser: 3469 return parser(self, this, data_type) 3470 return self.expression(exp.Cast, this=this, to=data_type) 3471 if not data_type.expressions: 3472 self._retreat(index) 3473 return self._parse_column() 3474 return self._parse_column_ops(data_type) 3475 3476 return this and self._parse_column_ops(this) 3477 3478 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3479 this = self._parse_type() 3480 if not this: 3481 return None 3482 3483 return self.expression( 3484 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3485 ) 3486 3487 def _parse_types( 3488 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3489 ) -> t.Optional[exp.Expression]: 3490 index = self._index 3491 3492 prefix = self._match_text_seq("SYSUDTLIB", ".") 3493 3494 if not self._match_set(self.TYPE_TOKENS): 3495 identifier = allow_identifiers and self._parse_id_var( 3496 any_token=False, tokens=(TokenType.VAR,) 3497 ) 3498 3499 if identifier: 3500 tokens = self.dialect.tokenize(identifier.name) 3501 3502 if len(tokens) != 1: 3503 self.raise_error("Unexpected identifier", self._prev) 3504 3505 if tokens[0].token_type in self.TYPE_TOKENS: 3506 self._prev = tokens[0] 3507 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3508 type_name = identifier.name 3509 3510 while self._match(TokenType.DOT): 3511 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3512 3513 return exp.DataType.build(type_name, udt=True) 3514 else: 3515 return None 3516 else: 3517 return None 3518 3519 type_token = self._prev.token_type 3520 3521 if type_token == TokenType.PSEUDO_TYPE: 3522 return self.expression(exp.PseudoType, this=self._prev.text) 3523 3524 if type_token == TokenType.OBJECT_IDENTIFIER: 3525 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3526 3527 nested = type_token in self.NESTED_TYPE_TOKENS 3528 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3529 expressions = None 3530 maybe_func = False 3531 3532 if self._match(TokenType.L_PAREN): 3533 if is_struct: 3534 expressions = self._parse_csv(self._parse_struct_types) 3535 elif nested: 3536 expressions = self._parse_csv( 3537 lambda: self._parse_types( 3538 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3539 ) 3540 ) 3541 elif type_token in self.ENUM_TYPE_TOKENS: 3542 expressions = self._parse_csv(self._parse_equality) 3543 else: 3544 expressions = self._parse_csv(self._parse_type_size) 3545 3546 if not expressions or not self._match(TokenType.R_PAREN): 3547 self._retreat(index) 3548 return None 3549 3550 maybe_func = True 3551 3552 this: t.Optional[exp.Expression] = None 3553 values: t.Optional[t.List[exp.Expression]] = None 3554 3555 if nested and self._match(TokenType.LT): 3556 if is_struct: 3557 expressions = self._parse_csv(self._parse_struct_types) 3558 else: 3559 expressions = self._parse_csv( 3560 lambda: self._parse_types( 3561 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3562 ) 3563 ) 3564 3565 if not self._match(TokenType.GT): 3566 self.raise_error("Expecting >") 3567 3568 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3569 values = self._parse_csv(self._parse_conjunction) 3570 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3571 3572 if type_token in self.TIMESTAMPS: 3573 if self._match_text_seq("WITH", "TIME", "ZONE"): 3574 maybe_func = False 3575 tz_type = ( 3576 exp.DataType.Type.TIMETZ 3577 if type_token in self.TIMES 3578 else exp.DataType.Type.TIMESTAMPTZ 3579 ) 3580 this = exp.DataType(this=tz_type, expressions=expressions) 3581 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3582 maybe_func = False 3583 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3584 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3585 maybe_func = False 3586 elif type_token == TokenType.INTERVAL: 3587 unit = self._parse_var() 3588 3589 if self._match_text_seq("TO"): 3590 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3591 else: 3592 span = None 3593 3594 if span or not unit: 3595 this = self.expression( 3596 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3597 ) 3598 else: 3599 this = self.expression(exp.Interval, unit=unit) 3600 3601 if maybe_func and check_func: 3602 index2 = self._index 3603 peek = self._parse_string() 3604 3605 if not peek: 3606 self._retreat(index) 3607 return None 3608 3609 self._retreat(index2) 3610 3611 if not this: 3612 if self._match_text_seq("UNSIGNED"): 3613 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3614 if not unsigned_type_token: 3615 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3616 3617 type_token = unsigned_type_token or type_token 3618 3619 this = exp.DataType( 3620 this=exp.DataType.Type[type_token.value], 3621 expressions=expressions, 3622 nested=nested, 3623 values=values, 3624 prefix=prefix, 3625 ) 3626 3627 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3628 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3629 3630 return this 3631 3632 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3633 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3634 self._match(TokenType.COLON) 3635 return self._parse_column_def(this) 3636 3637 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3638 if not self._match_text_seq("AT", "TIME", "ZONE"): 3639 return this 3640 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3641 3642 def _parse_column(self) -> t.Optional[exp.Expression]: 3643 this = self._parse_field() 3644 if isinstance(this, exp.Identifier): 3645 this = self.expression(exp.Column, this=this) 3646 elif not this: 3647 return self._parse_bracket(this) 3648 return self._parse_column_ops(this) 3649 3650 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3651 this = self._parse_bracket(this) 3652 3653 while self._match_set(self.COLUMN_OPERATORS): 3654 op_token = self._prev.token_type 3655 op = self.COLUMN_OPERATORS.get(op_token) 3656 3657 if op_token == TokenType.DCOLON: 3658 field = self._parse_types() 3659 if not field: 3660 self.raise_error("Expected type") 3661 elif op and self._curr: 3662 self._advance() 3663 value = self._prev.text 3664 field = ( 3665 exp.Literal.number(value) 3666 if self._prev.token_type == TokenType.NUMBER 3667 else exp.Literal.string(value) 3668 ) 3669 else: 3670 field = self._parse_field(anonymous_func=True, any_token=True) 3671 3672 if isinstance(field, exp.Func): 3673 # bigquery allows function calls like x.y.count(...) 3674 # SAFE.SUBSTR(...) 3675 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3676 this = self._replace_columns_with_dots(this) 3677 3678 if op: 3679 this = op(self, this, field) 3680 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3681 this = self.expression( 3682 exp.Column, 3683 this=field, 3684 table=this.this, 3685 db=this.args.get("table"), 3686 catalog=this.args.get("db"), 3687 ) 3688 else: 3689 this = self.expression(exp.Dot, this=this, expression=field) 3690 this = self._parse_bracket(this) 3691 return this 3692 3693 def _parse_primary(self) -> t.Optional[exp.Expression]: 3694 if self._match_set(self.PRIMARY_PARSERS): 3695 token_type = self._prev.token_type 3696 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3697 3698 if token_type == TokenType.STRING: 3699 expressions = [primary] 3700 while self._match(TokenType.STRING): 3701 expressions.append(exp.Literal.string(self._prev.text)) 3702 3703 if len(expressions) > 1: 3704 return self.expression(exp.Concat, expressions=expressions) 3705 3706 return primary 3707 3708 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3709 return exp.Literal.number(f"0.{self._prev.text}") 3710 3711 if self._match(TokenType.L_PAREN): 3712 comments = self._prev_comments 3713 query = self._parse_select() 3714 3715 if query: 3716 expressions = [query] 3717 else: 3718 expressions = self._parse_expressions() 3719 3720 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3721 3722 if isinstance(this, exp.Subqueryable): 3723 this = self._parse_set_operations( 3724 self._parse_subquery(this=this, parse_alias=False) 3725 ) 3726 elif len(expressions) > 1: 3727 this = self.expression(exp.Tuple, expressions=expressions) 3728 else: 3729 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3730 3731 if this: 3732 this.add_comments(comments) 3733 3734 self._match_r_paren(expression=this) 3735 return this 3736 3737 return None 3738 3739 def _parse_field( 3740 self, 3741 any_token: bool = False, 3742 tokens: t.Optional[t.Collection[TokenType]] = None, 3743 anonymous_func: bool = False, 3744 ) -> t.Optional[exp.Expression]: 3745 return ( 3746 self._parse_primary() 3747 or self._parse_function(anonymous=anonymous_func) 3748 or self._parse_id_var(any_token=any_token, tokens=tokens) 3749 ) 3750 3751 def _parse_function( 3752 self, 3753 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3754 anonymous: bool = False, 3755 optional_parens: bool = True, 3756 ) -> t.Optional[exp.Expression]: 3757 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3758 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3759 fn_syntax = False 3760 if ( 3761 self._match(TokenType.L_BRACE, advance=False) 3762 and self._next 3763 and self._next.text.upper() == "FN" 3764 ): 3765 self._advance(2) 3766 fn_syntax = True 3767 3768 func = self._parse_function_call( 3769 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3770 ) 3771 3772 if fn_syntax: 3773 self._match(TokenType.R_BRACE) 3774 3775 return func 3776 3777 def _parse_function_call( 3778 self, 3779 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3780 anonymous: bool = False, 3781 optional_parens: bool = True, 3782 ) -> t.Optional[exp.Expression]: 3783 if not self._curr: 3784 return None 3785 3786 comments = self._curr.comments 3787 token_type = self._curr.token_type 3788 this = self._curr.text 3789 upper = this.upper() 3790 3791 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3792 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3793 self._advance() 3794 return parser(self) 3795 3796 if not self._next or self._next.token_type != TokenType.L_PAREN: 3797 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3798 self._advance() 3799 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3800 3801 return None 3802 3803 if token_type not in self.FUNC_TOKENS: 3804 return None 3805 3806 self._advance(2) 3807 3808 parser = self.FUNCTION_PARSERS.get(upper) 3809 if parser and not anonymous: 3810 this = parser(self) 3811 else: 3812 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3813 3814 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3815 this = self.expression(subquery_predicate, this=self._parse_select()) 3816 self._match_r_paren() 3817 return this 3818 3819 if functions is None: 3820 functions = self.FUNCTIONS 3821 3822 function = functions.get(upper) 3823 3824 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3825 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3826 3827 if function and not anonymous: 3828 if "dialect" in function.__code__.co_varnames: 3829 func = function(args, dialect=self.dialect) 3830 else: 3831 func = function(args) 3832 3833 func = self.validate_expression(func, args) 3834 if not self.dialect.NORMALIZE_FUNCTIONS: 3835 func.meta["name"] = this 3836 3837 this = func 3838 else: 3839 this = self.expression(exp.Anonymous, this=this, expressions=args) 3840 3841 if isinstance(this, exp.Expression): 3842 this.add_comments(comments) 3843 3844 self._match_r_paren(this) 3845 return self._parse_window(this) 3846 3847 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3848 return self._parse_column_def(self._parse_id_var()) 3849 3850 def _parse_user_defined_function( 3851 self, kind: t.Optional[TokenType] = None 3852 ) -> t.Optional[exp.Expression]: 3853 this = self._parse_id_var() 3854 3855 while self._match(TokenType.DOT): 3856 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3857 3858 if not self._match(TokenType.L_PAREN): 3859 return this 3860 3861 expressions = self._parse_csv(self._parse_function_parameter) 3862 self._match_r_paren() 3863 return self.expression( 3864 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3865 ) 3866 3867 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3868 literal = self._parse_primary() 3869 if literal: 3870 return self.expression(exp.Introducer, this=token.text, expression=literal) 3871 3872 return self.expression(exp.Identifier, this=token.text) 3873 3874 def _parse_session_parameter(self) -> exp.SessionParameter: 3875 kind = None 3876 this = self._parse_id_var() or self._parse_primary() 3877 3878 if this and self._match(TokenType.DOT): 3879 kind = this.name 3880 this = self._parse_var() or self._parse_primary() 3881 3882 return self.expression(exp.SessionParameter, this=this, kind=kind) 3883 3884 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3885 index = self._index 3886 3887 if self._match(TokenType.L_PAREN): 3888 expressions = t.cast( 3889 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3890 ) 3891 3892 if not self._match(TokenType.R_PAREN): 3893 self._retreat(index) 3894 else: 3895 expressions = [self._parse_id_var()] 3896 3897 if self._match_set(self.LAMBDAS): 3898 return self.LAMBDAS[self._prev.token_type](self, expressions) 3899 3900 self._retreat(index) 3901 3902 this: t.Optional[exp.Expression] 3903 3904 if self._match(TokenType.DISTINCT): 3905 this = self.expression( 3906 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3907 ) 3908 else: 3909 this = self._parse_select_or_expression(alias=alias) 3910 3911 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3912 3913 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3914 index = self._index 3915 3916 if not self.errors: 3917 try: 3918 if self._parse_select(nested=True): 3919 return this 3920 except ParseError: 3921 pass 3922 finally: 3923 self.errors.clear() 3924 self._retreat(index) 3925 3926 if not self._match(TokenType.L_PAREN): 3927 return this 3928 3929 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3930 3931 self._match_r_paren() 3932 return self.expression(exp.Schema, this=this, expressions=args) 3933 3934 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3935 return self._parse_column_def(self._parse_field(any_token=True)) 3936 3937 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3938 # column defs are not really columns, they're identifiers 3939 if isinstance(this, exp.Column): 3940 this = this.this 3941 3942 kind = self._parse_types(schema=True) 3943 3944 if self._match_text_seq("FOR", "ORDINALITY"): 3945 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3946 3947 constraints: t.List[exp.Expression] = [] 3948 3949 if not kind and self._match(TokenType.ALIAS): 3950 constraints.append( 3951 self.expression( 3952 exp.ComputedColumnConstraint, 3953 this=self._parse_conjunction(), 3954 persisted=self._match_text_seq("PERSISTED"), 3955 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3956 ) 3957 ) 3958 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 3959 self._match(TokenType.ALIAS) 3960 constraints.append( 3961 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 3962 ) 3963 3964 while True: 3965 constraint = self._parse_column_constraint() 3966 if not constraint: 3967 break 3968 constraints.append(constraint) 3969 3970 if not kind and not constraints: 3971 return this 3972 3973 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3974 3975 def _parse_auto_increment( 3976 self, 3977 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3978 start = None 3979 increment = None 3980 3981 if self._match(TokenType.L_PAREN, advance=False): 3982 args = self._parse_wrapped_csv(self._parse_bitwise) 3983 start = seq_get(args, 0) 3984 increment = seq_get(args, 1) 3985 elif self._match_text_seq("START"): 3986 start = self._parse_bitwise() 3987 self._match_text_seq("INCREMENT") 3988 increment = self._parse_bitwise() 3989 3990 if start and increment: 3991 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3992 3993 return exp.AutoIncrementColumnConstraint() 3994 3995 def _parse_compress(self) -> exp.CompressColumnConstraint: 3996 if self._match(TokenType.L_PAREN, advance=False): 3997 return self.expression( 3998 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3999 ) 4000 4001 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4002 4003 def _parse_generated_as_identity( 4004 self, 4005 ) -> ( 4006 exp.GeneratedAsIdentityColumnConstraint 4007 | exp.ComputedColumnConstraint 4008 | exp.GeneratedAsRowColumnConstraint 4009 ): 4010 if self._match_text_seq("BY", "DEFAULT"): 4011 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4012 this = self.expression( 4013 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4014 ) 4015 else: 4016 self._match_text_seq("ALWAYS") 4017 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4018 4019 self._match(TokenType.ALIAS) 4020 4021 if self._match_text_seq("ROW"): 4022 start = self._match_text_seq("START") 4023 if not start: 4024 self._match(TokenType.END) 4025 hidden = self._match_text_seq("HIDDEN") 4026 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4027 4028 identity = self._match_text_seq("IDENTITY") 4029 4030 if self._match(TokenType.L_PAREN): 4031 if self._match(TokenType.START_WITH): 4032 this.set("start", self._parse_bitwise()) 4033 if self._match_text_seq("INCREMENT", "BY"): 4034 this.set("increment", self._parse_bitwise()) 4035 if self._match_text_seq("MINVALUE"): 4036 this.set("minvalue", self._parse_bitwise()) 4037 if self._match_text_seq("MAXVALUE"): 4038 this.set("maxvalue", self._parse_bitwise()) 4039 4040 if self._match_text_seq("CYCLE"): 4041 this.set("cycle", True) 4042 elif self._match_text_seq("NO", "CYCLE"): 4043 this.set("cycle", False) 4044 4045 if not identity: 4046 this.set("expression", self._parse_bitwise()) 4047 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4048 args = self._parse_csv(self._parse_bitwise) 4049 this.set("start", seq_get(args, 0)) 4050 this.set("increment", seq_get(args, 1)) 4051 4052 self._match_r_paren() 4053 4054 return this 4055 4056 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4057 self._match_text_seq("LENGTH") 4058 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4059 4060 def _parse_not_constraint( 4061 self, 4062 ) -> t.Optional[exp.Expression]: 4063 if self._match_text_seq("NULL"): 4064 return self.expression(exp.NotNullColumnConstraint) 4065 if self._match_text_seq("CASESPECIFIC"): 4066 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4067 if self._match_text_seq("FOR", "REPLICATION"): 4068 return self.expression(exp.NotForReplicationColumnConstraint) 4069 return None 4070 4071 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4072 if self._match(TokenType.CONSTRAINT): 4073 this = self._parse_id_var() 4074 else: 4075 this = None 4076 4077 if self._match_texts(self.CONSTRAINT_PARSERS): 4078 return self.expression( 4079 exp.ColumnConstraint, 4080 this=this, 4081 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4082 ) 4083 4084 return this 4085 4086 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4087 if not self._match(TokenType.CONSTRAINT): 4088 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4089 4090 this = self._parse_id_var() 4091 expressions = [] 4092 4093 while True: 4094 constraint = self._parse_unnamed_constraint() or self._parse_function() 4095 if not constraint: 4096 break 4097 expressions.append(constraint) 4098 4099 return self.expression(exp.Constraint, this=this, expressions=expressions) 4100 4101 def _parse_unnamed_constraint( 4102 self, constraints: t.Optional[t.Collection[str]] = None 4103 ) -> t.Optional[exp.Expression]: 4104 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4105 constraints or self.CONSTRAINT_PARSERS 4106 ): 4107 return None 4108 4109 constraint = self._prev.text.upper() 4110 if constraint not in self.CONSTRAINT_PARSERS: 4111 self.raise_error(f"No parser found for schema constraint {constraint}.") 4112 4113 return self.CONSTRAINT_PARSERS[constraint](self) 4114 4115 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4116 self._match_text_seq("KEY") 4117 return self.expression( 4118 exp.UniqueColumnConstraint, 4119 this=self._parse_schema(self._parse_id_var(any_token=False)), 4120 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4121 ) 4122 4123 def _parse_key_constraint_options(self) -> t.List[str]: 4124 options = [] 4125 while True: 4126 if not self._curr: 4127 break 4128 4129 if self._match(TokenType.ON): 4130 action = None 4131 on = self._advance_any() and self._prev.text 4132 4133 if self._match_text_seq("NO", "ACTION"): 4134 action = "NO ACTION" 4135 elif self._match_text_seq("CASCADE"): 4136 action = "CASCADE" 4137 elif self._match_text_seq("RESTRICT"): 4138 action = "RESTRICT" 4139 elif self._match_pair(TokenType.SET, TokenType.NULL): 4140 action = "SET NULL" 4141 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4142 action = "SET DEFAULT" 4143 else: 4144 self.raise_error("Invalid key constraint") 4145 4146 options.append(f"ON {on} {action}") 4147 elif self._match_text_seq("NOT", "ENFORCED"): 4148 options.append("NOT ENFORCED") 4149 elif self._match_text_seq("DEFERRABLE"): 4150 options.append("DEFERRABLE") 4151 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4152 options.append("INITIALLY DEFERRED") 4153 elif self._match_text_seq("NORELY"): 4154 options.append("NORELY") 4155 elif self._match_text_seq("MATCH", "FULL"): 4156 options.append("MATCH FULL") 4157 else: 4158 break 4159 4160 return options 4161 4162 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4163 if match and not self._match(TokenType.REFERENCES): 4164 return None 4165 4166 expressions = None 4167 this = self._parse_table(schema=True) 4168 options = self._parse_key_constraint_options() 4169 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4170 4171 def _parse_foreign_key(self) -> exp.ForeignKey: 4172 expressions = self._parse_wrapped_id_vars() 4173 reference = self._parse_references() 4174 options = {} 4175 4176 while self._match(TokenType.ON): 4177 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4178 self.raise_error("Expected DELETE or UPDATE") 4179 4180 kind = self._prev.text.lower() 4181 4182 if self._match_text_seq("NO", "ACTION"): 4183 action = "NO ACTION" 4184 elif self._match(TokenType.SET): 4185 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4186 action = "SET " + self._prev.text.upper() 4187 else: 4188 self._advance() 4189 action = self._prev.text.upper() 4190 4191 options[kind] = action 4192 4193 return self.expression( 4194 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4195 ) 4196 4197 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4198 return self._parse_field() 4199 4200 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4201 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4202 4203 id_vars = self._parse_wrapped_id_vars() 4204 return self.expression( 4205 exp.PeriodForSystemTimeConstraint, 4206 this=seq_get(id_vars, 0), 4207 expression=seq_get(id_vars, 1), 4208 ) 4209 4210 def _parse_primary_key( 4211 self, wrapped_optional: bool = False, in_props: bool = False 4212 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4213 desc = ( 4214 self._match_set((TokenType.ASC, TokenType.DESC)) 4215 and self._prev.token_type == TokenType.DESC 4216 ) 4217 4218 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4219 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4220 4221 expressions = self._parse_wrapped_csv( 4222 self._parse_primary_key_part, optional=wrapped_optional 4223 ) 4224 options = self._parse_key_constraint_options() 4225 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4226 4227 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4228 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4229 return this 4230 4231 bracket_kind = self._prev.token_type 4232 4233 if self._match(TokenType.COLON): 4234 expressions: t.List[exp.Expression] = [ 4235 self.expression(exp.Slice, expression=self._parse_conjunction()) 4236 ] 4237 else: 4238 expressions = self._parse_csv( 4239 lambda: self._parse_slice( 4240 self._parse_alias(self._parse_conjunction(), explicit=True) 4241 ) 4242 ) 4243 4244 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4245 self.raise_error("Expected ]") 4246 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4247 self.raise_error("Expected }") 4248 4249 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4250 if bracket_kind == TokenType.L_BRACE: 4251 this = self.expression(exp.Struct, expressions=expressions) 4252 elif not this or this.name.upper() == "ARRAY": 4253 this = self.expression(exp.Array, expressions=expressions) 4254 else: 4255 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4256 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4257 4258 self._add_comments(this) 4259 return self._parse_bracket(this) 4260 4261 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4262 if self._match(TokenType.COLON): 4263 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4264 return this 4265 4266 def _parse_case(self) -> t.Optional[exp.Expression]: 4267 ifs = [] 4268 default = None 4269 4270 comments = self._prev_comments 4271 expression = self._parse_conjunction() 4272 4273 while self._match(TokenType.WHEN): 4274 this = self._parse_conjunction() 4275 self._match(TokenType.THEN) 4276 then = self._parse_conjunction() 4277 ifs.append(self.expression(exp.If, this=this, true=then)) 4278 4279 if self._match(TokenType.ELSE): 4280 default = self._parse_conjunction() 4281 4282 if not self._match(TokenType.END): 4283 self.raise_error("Expected END after CASE", self._prev) 4284 4285 return self._parse_window( 4286 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4287 ) 4288 4289 def _parse_if(self) -> t.Optional[exp.Expression]: 4290 if self._match(TokenType.L_PAREN): 4291 args = self._parse_csv(self._parse_conjunction) 4292 this = self.validate_expression(exp.If.from_arg_list(args), args) 4293 self._match_r_paren() 4294 else: 4295 index = self._index - 1 4296 condition = self._parse_conjunction() 4297 4298 if not condition: 4299 self._retreat(index) 4300 return None 4301 4302 self._match(TokenType.THEN) 4303 true = self._parse_conjunction() 4304 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4305 self._match(TokenType.END) 4306 this = self.expression(exp.If, this=condition, true=true, false=false) 4307 4308 return self._parse_window(this) 4309 4310 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4311 if not self._match_text_seq("VALUE", "FOR"): 4312 self._retreat(self._index - 1) 4313 return None 4314 4315 return self.expression( 4316 exp.NextValueFor, 4317 this=self._parse_column(), 4318 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4319 ) 4320 4321 def _parse_extract(self) -> exp.Extract: 4322 this = self._parse_function() or self._parse_var() or self._parse_type() 4323 4324 if self._match(TokenType.FROM): 4325 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4326 4327 if not self._match(TokenType.COMMA): 4328 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4329 4330 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4331 4332 def _parse_any_value(self) -> exp.AnyValue: 4333 this = self._parse_lambda() 4334 is_max = None 4335 having = None 4336 4337 if self._match(TokenType.HAVING): 4338 self._match_texts(("MAX", "MIN")) 4339 is_max = self._prev.text == "MAX" 4340 having = self._parse_column() 4341 4342 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4343 4344 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4345 this = self._parse_conjunction() 4346 4347 if not self._match(TokenType.ALIAS): 4348 if self._match(TokenType.COMMA): 4349 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4350 4351 self.raise_error("Expected AS after CAST") 4352 4353 fmt = None 4354 to = self._parse_types() 4355 4356 if self._match(TokenType.FORMAT): 4357 fmt_string = self._parse_string() 4358 fmt = self._parse_at_time_zone(fmt_string) 4359 4360 if not to: 4361 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4362 if to.this in exp.DataType.TEMPORAL_TYPES: 4363 this = self.expression( 4364 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4365 this=this, 4366 format=exp.Literal.string( 4367 format_time( 4368 fmt_string.this if fmt_string else "", 4369 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4370 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4371 ) 4372 ), 4373 ) 4374 4375 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4376 this.set("zone", fmt.args["zone"]) 4377 return this 4378 elif not to: 4379 self.raise_error("Expected TYPE after CAST") 4380 elif isinstance(to, exp.Identifier): 4381 to = exp.DataType.build(to.name, udt=True) 4382 elif to.this == exp.DataType.Type.CHAR: 4383 if self._match(TokenType.CHARACTER_SET): 4384 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4385 4386 return self.expression( 4387 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4388 ) 4389 4390 def _parse_string_agg(self) -> exp.Expression: 4391 if self._match(TokenType.DISTINCT): 4392 args: t.List[t.Optional[exp.Expression]] = [ 4393 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4394 ] 4395 if self._match(TokenType.COMMA): 4396 args.extend(self._parse_csv(self._parse_conjunction)) 4397 else: 4398 args = self._parse_csv(self._parse_conjunction) # type: ignore 4399 4400 index = self._index 4401 if not self._match(TokenType.R_PAREN) and args: 4402 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4403 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4404 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4405 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4406 4407 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4408 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4409 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4410 if not self._match_text_seq("WITHIN", "GROUP"): 4411 self._retreat(index) 4412 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4413 4414 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4415 order = self._parse_order(this=seq_get(args, 0)) 4416 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4417 4418 def _parse_convert( 4419 self, strict: bool, safe: t.Optional[bool] = None 4420 ) -> t.Optional[exp.Expression]: 4421 this = self._parse_bitwise() 4422 4423 if self._match(TokenType.USING): 4424 to: t.Optional[exp.Expression] = self.expression( 4425 exp.CharacterSet, this=self._parse_var() 4426 ) 4427 elif self._match(TokenType.COMMA): 4428 to = self._parse_types() 4429 else: 4430 to = None 4431 4432 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4433 4434 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4435 """ 4436 There are generally two variants of the DECODE function: 4437 4438 - DECODE(bin, charset) 4439 - DECODE(expression, search, result [, search, result] ... [, default]) 4440 4441 The second variant will always be parsed into a CASE expression. Note that NULL 4442 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4443 instead of relying on pattern matching. 4444 """ 4445 args = self._parse_csv(self._parse_conjunction) 4446 4447 if len(args) < 3: 4448 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4449 4450 expression, *expressions = args 4451 if not expression: 4452 return None 4453 4454 ifs = [] 4455 for search, result in zip(expressions[::2], expressions[1::2]): 4456 if not search or not result: 4457 return None 4458 4459 if isinstance(search, exp.Literal): 4460 ifs.append( 4461 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4462 ) 4463 elif isinstance(search, exp.Null): 4464 ifs.append( 4465 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4466 ) 4467 else: 4468 cond = exp.or_( 4469 exp.EQ(this=expression.copy(), expression=search), 4470 exp.and_( 4471 exp.Is(this=expression.copy(), expression=exp.Null()), 4472 exp.Is(this=search.copy(), expression=exp.Null()), 4473 copy=False, 4474 ), 4475 copy=False, 4476 ) 4477 ifs.append(exp.If(this=cond, true=result)) 4478 4479 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4480 4481 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4482 self._match_text_seq("KEY") 4483 key = self._parse_column() 4484 self._match_set((TokenType.COLON, TokenType.COMMA)) 4485 self._match_text_seq("VALUE") 4486 value = self._parse_bitwise() 4487 4488 if not key and not value: 4489 return None 4490 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4491 4492 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4493 if not this or not self._match_text_seq("FORMAT", "JSON"): 4494 return this 4495 4496 return self.expression(exp.FormatJson, this=this) 4497 4498 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4499 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4500 for value in values: 4501 if self._match_text_seq(value, "ON", on): 4502 return f"{value} ON {on}" 4503 4504 return None 4505 4506 def _parse_json_object(self) -> exp.JSONObject: 4507 star = self._parse_star() 4508 expressions = ( 4509 [star] 4510 if star 4511 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4512 ) 4513 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4514 4515 unique_keys = None 4516 if self._match_text_seq("WITH", "UNIQUE"): 4517 unique_keys = True 4518 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4519 unique_keys = False 4520 4521 self._match_text_seq("KEYS") 4522 4523 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4524 self._parse_type() 4525 ) 4526 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4527 4528 return self.expression( 4529 exp.JSONObject, 4530 expressions=expressions, 4531 null_handling=null_handling, 4532 unique_keys=unique_keys, 4533 return_type=return_type, 4534 encoding=encoding, 4535 ) 4536 4537 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4538 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4539 if not self._match_text_seq("NESTED"): 4540 this = self._parse_id_var() 4541 kind = self._parse_types(allow_identifiers=False) 4542 nested = None 4543 else: 4544 this = None 4545 kind = None 4546 nested = True 4547 4548 path = self._match_text_seq("PATH") and self._parse_string() 4549 nested_schema = nested and self._parse_json_schema() 4550 4551 return self.expression( 4552 exp.JSONColumnDef, 4553 this=this, 4554 kind=kind, 4555 path=path, 4556 nested_schema=nested_schema, 4557 ) 4558 4559 def _parse_json_schema(self) -> exp.JSONSchema: 4560 self._match_text_seq("COLUMNS") 4561 return self.expression( 4562 exp.JSONSchema, 4563 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4564 ) 4565 4566 def _parse_json_table(self) -> exp.JSONTable: 4567 this = self._parse_format_json(self._parse_bitwise()) 4568 path = self._match(TokenType.COMMA) and self._parse_string() 4569 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4570 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4571 schema = self._parse_json_schema() 4572 4573 return exp.JSONTable( 4574 this=this, 4575 schema=schema, 4576 path=path, 4577 error_handling=error_handling, 4578 empty_handling=empty_handling, 4579 ) 4580 4581 def _parse_match_against(self) -> exp.MatchAgainst: 4582 expressions = self._parse_csv(self._parse_column) 4583 4584 self._match_text_seq(")", "AGAINST", "(") 4585 4586 this = self._parse_string() 4587 4588 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4589 modifier = "IN NATURAL LANGUAGE MODE" 4590 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4591 modifier = f"{modifier} WITH QUERY EXPANSION" 4592 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4593 modifier = "IN BOOLEAN MODE" 4594 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4595 modifier = "WITH QUERY EXPANSION" 4596 else: 4597 modifier = None 4598 4599 return self.expression( 4600 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4601 ) 4602 4603 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4604 def _parse_open_json(self) -> exp.OpenJSON: 4605 this = self._parse_bitwise() 4606 path = self._match(TokenType.COMMA) and self._parse_string() 4607 4608 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4609 this = self._parse_field(any_token=True) 4610 kind = self._parse_types() 4611 path = self._parse_string() 4612 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4613 4614 return self.expression( 4615 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4616 ) 4617 4618 expressions = None 4619 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4620 self._match_l_paren() 4621 expressions = self._parse_csv(_parse_open_json_column_def) 4622 4623 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4624 4625 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4626 args = self._parse_csv(self._parse_bitwise) 4627 4628 if self._match(TokenType.IN): 4629 return self.expression( 4630 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4631 ) 4632 4633 if haystack_first: 4634 haystack = seq_get(args, 0) 4635 needle = seq_get(args, 1) 4636 else: 4637 needle = seq_get(args, 0) 4638 haystack = seq_get(args, 1) 4639 4640 return self.expression( 4641 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4642 ) 4643 4644 def _parse_predict(self) -> exp.Predict: 4645 self._match_text_seq("MODEL") 4646 this = self._parse_table() 4647 4648 self._match(TokenType.COMMA) 4649 self._match_text_seq("TABLE") 4650 4651 return self.expression( 4652 exp.Predict, 4653 this=this, 4654 expression=self._parse_table(), 4655 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4656 ) 4657 4658 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4659 args = self._parse_csv(self._parse_table) 4660 return exp.JoinHint(this=func_name.upper(), expressions=args) 4661 4662 def _parse_substring(self) -> exp.Substring: 4663 # Postgres supports the form: substring(string [from int] [for int]) 4664 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4665 4666 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4667 4668 if self._match(TokenType.FROM): 4669 args.append(self._parse_bitwise()) 4670 if self._match(TokenType.FOR): 4671 args.append(self._parse_bitwise()) 4672 4673 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4674 4675 def _parse_trim(self) -> exp.Trim: 4676 # https://www.w3resource.com/sql/character-functions/trim.php 4677 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4678 4679 position = None 4680 collation = None 4681 expression = None 4682 4683 if self._match_texts(self.TRIM_TYPES): 4684 position = self._prev.text.upper() 4685 4686 this = self._parse_bitwise() 4687 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4688 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4689 expression = self._parse_bitwise() 4690 4691 if invert_order: 4692 this, expression = expression, this 4693 4694 if self._match(TokenType.COLLATE): 4695 collation = self._parse_bitwise() 4696 4697 return self.expression( 4698 exp.Trim, this=this, position=position, expression=expression, collation=collation 4699 ) 4700 4701 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4702 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4703 4704 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4705 return self._parse_window(self._parse_id_var(), alias=True) 4706 4707 def _parse_respect_or_ignore_nulls( 4708 self, this: t.Optional[exp.Expression] 4709 ) -> t.Optional[exp.Expression]: 4710 if self._match_text_seq("IGNORE", "NULLS"): 4711 return self.expression(exp.IgnoreNulls, this=this) 4712 if self._match_text_seq("RESPECT", "NULLS"): 4713 return self.expression(exp.RespectNulls, this=this) 4714 return this 4715 4716 def _parse_window( 4717 self, this: t.Optional[exp.Expression], alias: bool = False 4718 ) -> t.Optional[exp.Expression]: 4719 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4720 self._match(TokenType.WHERE) 4721 this = self.expression( 4722 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4723 ) 4724 self._match_r_paren() 4725 4726 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4727 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4728 if self._match_text_seq("WITHIN", "GROUP"): 4729 order = self._parse_wrapped(self._parse_order) 4730 this = self.expression(exp.WithinGroup, this=this, expression=order) 4731 4732 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4733 # Some dialects choose to implement and some do not. 4734 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4735 4736 # There is some code above in _parse_lambda that handles 4737 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4738 4739 # The below changes handle 4740 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4741 4742 # Oracle allows both formats 4743 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4744 # and Snowflake chose to do the same for familiarity 4745 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4746 this = self._parse_respect_or_ignore_nulls(this) 4747 4748 # bigquery select from window x AS (partition by ...) 4749 if alias: 4750 over = None 4751 self._match(TokenType.ALIAS) 4752 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4753 return this 4754 else: 4755 over = self._prev.text.upper() 4756 4757 if not self._match(TokenType.L_PAREN): 4758 return self.expression( 4759 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4760 ) 4761 4762 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4763 4764 first = self._match(TokenType.FIRST) 4765 if self._match_text_seq("LAST"): 4766 first = False 4767 4768 partition, order = self._parse_partition_and_order() 4769 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4770 4771 if kind: 4772 self._match(TokenType.BETWEEN) 4773 start = self._parse_window_spec() 4774 self._match(TokenType.AND) 4775 end = self._parse_window_spec() 4776 4777 spec = self.expression( 4778 exp.WindowSpec, 4779 kind=kind, 4780 start=start["value"], 4781 start_side=start["side"], 4782 end=end["value"], 4783 end_side=end["side"], 4784 ) 4785 else: 4786 spec = None 4787 4788 self._match_r_paren() 4789 4790 window = self.expression( 4791 exp.Window, 4792 this=this, 4793 partition_by=partition, 4794 order=order, 4795 spec=spec, 4796 alias=window_alias, 4797 over=over, 4798 first=first, 4799 ) 4800 4801 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4802 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4803 return self._parse_window(window, alias=alias) 4804 4805 return window 4806 4807 def _parse_partition_and_order( 4808 self, 4809 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4810 return self._parse_partition_by(), self._parse_order() 4811 4812 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4813 self._match(TokenType.BETWEEN) 4814 4815 return { 4816 "value": ( 4817 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4818 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4819 or self._parse_bitwise() 4820 ), 4821 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4822 } 4823 4824 def _parse_alias( 4825 self, this: t.Optional[exp.Expression], explicit: bool = False 4826 ) -> t.Optional[exp.Expression]: 4827 any_token = self._match(TokenType.ALIAS) 4828 comments = self._prev_comments 4829 4830 if explicit and not any_token: 4831 return this 4832 4833 if self._match(TokenType.L_PAREN): 4834 aliases = self.expression( 4835 exp.Aliases, 4836 comments=comments, 4837 this=this, 4838 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4839 ) 4840 self._match_r_paren(aliases) 4841 return aliases 4842 4843 alias = self._parse_id_var(any_token) 4844 4845 if alias: 4846 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4847 4848 return this 4849 4850 def _parse_id_var( 4851 self, 4852 any_token: bool = True, 4853 tokens: t.Optional[t.Collection[TokenType]] = None, 4854 ) -> t.Optional[exp.Expression]: 4855 identifier = self._parse_identifier() 4856 4857 if identifier: 4858 return identifier 4859 4860 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4861 quoted = self._prev.token_type == TokenType.STRING 4862 return exp.Identifier(this=self._prev.text, quoted=quoted) 4863 4864 return None 4865 4866 def _parse_string(self) -> t.Optional[exp.Expression]: 4867 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4868 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4869 return self._parse_placeholder() 4870 4871 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4872 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4873 4874 def _parse_number(self) -> t.Optional[exp.Expression]: 4875 if self._match(TokenType.NUMBER): 4876 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4877 return self._parse_placeholder() 4878 4879 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4880 if self._match(TokenType.IDENTIFIER): 4881 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4882 return self._parse_placeholder() 4883 4884 def _parse_var( 4885 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4886 ) -> t.Optional[exp.Expression]: 4887 if ( 4888 (any_token and self._advance_any()) 4889 or self._match(TokenType.VAR) 4890 or (self._match_set(tokens) if tokens else False) 4891 ): 4892 return self.expression(exp.Var, this=self._prev.text) 4893 return self._parse_placeholder() 4894 4895 def _advance_any(self) -> t.Optional[Token]: 4896 if self._curr and self._curr.token_type not in self.RESERVED_TOKENS: 4897 self._advance() 4898 return self._prev 4899 return None 4900 4901 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4902 return self._parse_var() or self._parse_string() 4903 4904 def _parse_null(self) -> t.Optional[exp.Expression]: 4905 if self._match_set(self.NULL_TOKENS): 4906 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4907 return self._parse_placeholder() 4908 4909 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4910 if self._match(TokenType.TRUE): 4911 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4912 if self._match(TokenType.FALSE): 4913 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4914 return self._parse_placeholder() 4915 4916 def _parse_star(self) -> t.Optional[exp.Expression]: 4917 if self._match(TokenType.STAR): 4918 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4919 return self._parse_placeholder() 4920 4921 def _parse_parameter(self) -> exp.Parameter: 4922 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4923 return ( 4924 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4925 ) 4926 4927 self._match(TokenType.L_BRACE) 4928 this = _parse_parameter_part() 4929 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4930 self._match(TokenType.R_BRACE) 4931 4932 return self.expression(exp.Parameter, this=this, expression=expression) 4933 4934 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4935 if self._match_set(self.PLACEHOLDER_PARSERS): 4936 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4937 if placeholder: 4938 return placeholder 4939 self._advance(-1) 4940 return None 4941 4942 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4943 if not self._match(TokenType.EXCEPT): 4944 return None 4945 if self._match(TokenType.L_PAREN, advance=False): 4946 return self._parse_wrapped_csv(self._parse_column) 4947 4948 except_column = self._parse_column() 4949 return [except_column] if except_column else None 4950 4951 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4952 if not self._match(TokenType.REPLACE): 4953 return None 4954 if self._match(TokenType.L_PAREN, advance=False): 4955 return self._parse_wrapped_csv(self._parse_expression) 4956 4957 replace_expression = self._parse_expression() 4958 return [replace_expression] if replace_expression else None 4959 4960 def _parse_csv( 4961 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4962 ) -> t.List[exp.Expression]: 4963 parse_result = parse_method() 4964 items = [parse_result] if parse_result is not None else [] 4965 4966 while self._match(sep): 4967 self._add_comments(parse_result) 4968 parse_result = parse_method() 4969 if parse_result is not None: 4970 items.append(parse_result) 4971 4972 return items 4973 4974 def _parse_tokens( 4975 self, parse_method: t.Callable, expressions: t.Dict 4976 ) -> t.Optional[exp.Expression]: 4977 this = parse_method() 4978 4979 while self._match_set(expressions): 4980 this = self.expression( 4981 expressions[self._prev.token_type], 4982 this=this, 4983 comments=self._prev_comments, 4984 expression=parse_method(), 4985 ) 4986 4987 return this 4988 4989 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4990 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4991 4992 def _parse_wrapped_csv( 4993 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4994 ) -> t.List[exp.Expression]: 4995 return self._parse_wrapped( 4996 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4997 ) 4998 4999 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5000 wrapped = self._match(TokenType.L_PAREN) 5001 if not wrapped and not optional: 5002 self.raise_error("Expecting (") 5003 parse_result = parse_method() 5004 if wrapped: 5005 self._match_r_paren() 5006 return parse_result 5007 5008 def _parse_expressions(self) -> t.List[exp.Expression]: 5009 return self._parse_csv(self._parse_expression) 5010 5011 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5012 return self._parse_select() or self._parse_set_operations( 5013 self._parse_expression() if alias else self._parse_conjunction() 5014 ) 5015 5016 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5017 return self._parse_query_modifiers( 5018 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5019 ) 5020 5021 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5022 this = None 5023 if self._match_texts(self.TRANSACTION_KIND): 5024 this = self._prev.text 5025 5026 self._match_texts(("TRANSACTION", "WORK")) 5027 5028 modes = [] 5029 while True: 5030 mode = [] 5031 while self._match(TokenType.VAR): 5032 mode.append(self._prev.text) 5033 5034 if mode: 5035 modes.append(" ".join(mode)) 5036 if not self._match(TokenType.COMMA): 5037 break 5038 5039 return self.expression(exp.Transaction, this=this, modes=modes) 5040 5041 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5042 chain = None 5043 savepoint = None 5044 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5045 5046 self._match_texts(("TRANSACTION", "WORK")) 5047 5048 if self._match_text_seq("TO"): 5049 self._match_text_seq("SAVEPOINT") 5050 savepoint = self._parse_id_var() 5051 5052 if self._match(TokenType.AND): 5053 chain = not self._match_text_seq("NO") 5054 self._match_text_seq("CHAIN") 5055 5056 if is_rollback: 5057 return self.expression(exp.Rollback, savepoint=savepoint) 5058 5059 return self.expression(exp.Commit, chain=chain) 5060 5061 def _parse_refresh(self) -> exp.Refresh: 5062 self._match(TokenType.TABLE) 5063 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5064 5065 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5066 if not self._match_text_seq("ADD"): 5067 return None 5068 5069 self._match(TokenType.COLUMN) 5070 exists_column = self._parse_exists(not_=True) 5071 expression = self._parse_field_def() 5072 5073 if expression: 5074 expression.set("exists", exists_column) 5075 5076 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5077 if self._match_texts(("FIRST", "AFTER")): 5078 position = self._prev.text 5079 column_position = self.expression( 5080 exp.ColumnPosition, this=self._parse_column(), position=position 5081 ) 5082 expression.set("position", column_position) 5083 5084 return expression 5085 5086 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5087 drop = self._match(TokenType.DROP) and self._parse_drop() 5088 if drop and not isinstance(drop, exp.Command): 5089 drop.set("kind", drop.args.get("kind", "COLUMN")) 5090 return drop 5091 5092 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5093 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5094 return self.expression( 5095 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5096 ) 5097 5098 def _parse_add_constraint(self) -> exp.AddConstraint: 5099 this = None 5100 kind = self._prev.token_type 5101 5102 if kind == TokenType.CONSTRAINT: 5103 this = self._parse_id_var() 5104 5105 if self._match_text_seq("CHECK"): 5106 expression = self._parse_wrapped(self._parse_conjunction) 5107 enforced = self._match_text_seq("ENFORCED") 5108 5109 return self.expression( 5110 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5111 ) 5112 5113 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5114 expression = self._parse_foreign_key() 5115 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5116 expression = self._parse_primary_key() 5117 else: 5118 expression = None 5119 5120 return self.expression(exp.AddConstraint, this=this, expression=expression) 5121 5122 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5123 index = self._index - 1 5124 5125 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5126 return self._parse_csv(self._parse_add_constraint) 5127 5128 self._retreat(index) 5129 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5130 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5131 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5132 5133 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5134 self._match(TokenType.COLUMN) 5135 column = self._parse_field(any_token=True) 5136 5137 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5138 return self.expression(exp.AlterColumn, this=column, drop=True) 5139 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5140 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5141 5142 self._match_text_seq("SET", "DATA") 5143 return self.expression( 5144 exp.AlterColumn, 5145 this=column, 5146 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5147 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5148 using=self._match(TokenType.USING) and self._parse_conjunction(), 5149 ) 5150 5151 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5152 index = self._index - 1 5153 5154 partition_exists = self._parse_exists() 5155 if self._match(TokenType.PARTITION, advance=False): 5156 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5157 5158 self._retreat(index) 5159 return self._parse_csv(self._parse_drop_column) 5160 5161 def _parse_alter_table_rename(self) -> exp.RenameTable: 5162 self._match_text_seq("TO") 5163 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5164 5165 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5166 start = self._prev 5167 5168 if not self._match(TokenType.TABLE): 5169 return self._parse_as_command(start) 5170 5171 exists = self._parse_exists() 5172 only = self._match_text_seq("ONLY") 5173 this = self._parse_table(schema=True) 5174 5175 if self._next: 5176 self._advance() 5177 5178 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5179 if parser: 5180 actions = ensure_list(parser(self)) 5181 5182 if not self._curr: 5183 return self.expression( 5184 exp.AlterTable, 5185 this=this, 5186 exists=exists, 5187 actions=actions, 5188 only=only, 5189 ) 5190 5191 return self._parse_as_command(start) 5192 5193 def _parse_merge(self) -> exp.Merge: 5194 self._match(TokenType.INTO) 5195 target = self._parse_table() 5196 5197 if target and self._match(TokenType.ALIAS, advance=False): 5198 target.set("alias", self._parse_table_alias()) 5199 5200 self._match(TokenType.USING) 5201 using = self._parse_table() 5202 5203 self._match(TokenType.ON) 5204 on = self._parse_conjunction() 5205 5206 return self.expression( 5207 exp.Merge, 5208 this=target, 5209 using=using, 5210 on=on, 5211 expressions=self._parse_when_matched(), 5212 ) 5213 5214 def _parse_when_matched(self) -> t.List[exp.When]: 5215 whens = [] 5216 5217 while self._match(TokenType.WHEN): 5218 matched = not self._match(TokenType.NOT) 5219 self._match_text_seq("MATCHED") 5220 source = ( 5221 False 5222 if self._match_text_seq("BY", "TARGET") 5223 else self._match_text_seq("BY", "SOURCE") 5224 ) 5225 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5226 5227 self._match(TokenType.THEN) 5228 5229 if self._match(TokenType.INSERT): 5230 _this = self._parse_star() 5231 if _this: 5232 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5233 else: 5234 then = self.expression( 5235 exp.Insert, 5236 this=self._parse_value(), 5237 expression=self._match(TokenType.VALUES) and self._parse_value(), 5238 ) 5239 elif self._match(TokenType.UPDATE): 5240 expressions = self._parse_star() 5241 if expressions: 5242 then = self.expression(exp.Update, expressions=expressions) 5243 else: 5244 then = self.expression( 5245 exp.Update, 5246 expressions=self._match(TokenType.SET) 5247 and self._parse_csv(self._parse_equality), 5248 ) 5249 elif self._match(TokenType.DELETE): 5250 then = self.expression(exp.Var, this=self._prev.text) 5251 else: 5252 then = None 5253 5254 whens.append( 5255 self.expression( 5256 exp.When, 5257 matched=matched, 5258 source=source, 5259 condition=condition, 5260 then=then, 5261 ) 5262 ) 5263 return whens 5264 5265 def _parse_show(self) -> t.Optional[exp.Expression]: 5266 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5267 if parser: 5268 return parser(self) 5269 return self._parse_as_command(self._prev) 5270 5271 def _parse_set_item_assignment( 5272 self, kind: t.Optional[str] = None 5273 ) -> t.Optional[exp.Expression]: 5274 index = self._index 5275 5276 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5277 return self._parse_set_transaction(global_=kind == "GLOBAL") 5278 5279 left = self._parse_primary() or self._parse_id_var() 5280 assignment_delimiter = self._match_texts(("=", "TO")) 5281 5282 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5283 self._retreat(index) 5284 return None 5285 5286 right = self._parse_statement() or self._parse_id_var() 5287 this = self.expression(exp.EQ, this=left, expression=right) 5288 5289 return self.expression(exp.SetItem, this=this, kind=kind) 5290 5291 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5292 self._match_text_seq("TRANSACTION") 5293 characteristics = self._parse_csv( 5294 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5295 ) 5296 return self.expression( 5297 exp.SetItem, 5298 expressions=characteristics, 5299 kind="TRANSACTION", 5300 **{"global": global_}, # type: ignore 5301 ) 5302 5303 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5304 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5305 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5306 5307 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5308 index = self._index 5309 set_ = self.expression( 5310 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5311 ) 5312 5313 if self._curr: 5314 self._retreat(index) 5315 return self._parse_as_command(self._prev) 5316 5317 return set_ 5318 5319 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5320 for option in options: 5321 if self._match_text_seq(*option.split(" ")): 5322 return exp.var(option) 5323 return None 5324 5325 def _parse_as_command(self, start: Token) -> exp.Command: 5326 while self._curr: 5327 self._advance() 5328 text = self._find_sql(start, self._prev) 5329 size = len(start.text) 5330 return exp.Command(this=text[:size], expression=text[size:]) 5331 5332 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5333 settings = [] 5334 5335 self._match_l_paren() 5336 kind = self._parse_id_var() 5337 5338 if self._match(TokenType.L_PAREN): 5339 while True: 5340 key = self._parse_id_var() 5341 value = self._parse_primary() 5342 5343 if not key and value is None: 5344 break 5345 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5346 self._match(TokenType.R_PAREN) 5347 5348 self._match_r_paren() 5349 5350 return self.expression( 5351 exp.DictProperty, 5352 this=this, 5353 kind=kind.this if kind else None, 5354 settings=settings, 5355 ) 5356 5357 def _parse_dict_range(self, this: str) -> exp.DictRange: 5358 self._match_l_paren() 5359 has_min = self._match_text_seq("MIN") 5360 if has_min: 5361 min = self._parse_var() or self._parse_primary() 5362 self._match_text_seq("MAX") 5363 max = self._parse_var() or self._parse_primary() 5364 else: 5365 max = self._parse_var() or self._parse_primary() 5366 min = exp.Literal.number(0) 5367 self._match_r_paren() 5368 return self.expression(exp.DictRange, this=this, min=min, max=max) 5369 5370 def _parse_comprehension( 5371 self, this: t.Optional[exp.Expression] 5372 ) -> t.Optional[exp.Comprehension]: 5373 index = self._index 5374 expression = self._parse_column() 5375 if not self._match(TokenType.IN): 5376 self._retreat(index - 1) 5377 return None 5378 iterator = self._parse_column() 5379 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5380 return self.expression( 5381 exp.Comprehension, 5382 this=this, 5383 expression=expression, 5384 iterator=iterator, 5385 condition=condition, 5386 ) 5387 5388 def _find_parser( 5389 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5390 ) -> t.Optional[t.Callable]: 5391 if not self._curr: 5392 return None 5393 5394 index = self._index 5395 this = [] 5396 while True: 5397 # The current token might be multiple words 5398 curr = self._curr.text.upper() 5399 key = curr.split(" ") 5400 this.append(curr) 5401 5402 self._advance() 5403 result, trie = in_trie(trie, key) 5404 if result == TrieResult.FAILED: 5405 break 5406 5407 if result == TrieResult.EXISTS: 5408 subparser = parsers[" ".join(this)] 5409 return subparser 5410 5411 self._retreat(index) 5412 return None 5413 5414 def _match(self, token_type, advance=True, expression=None): 5415 if not self._curr: 5416 return None 5417 5418 if self._curr.token_type == token_type: 5419 if advance: 5420 self._advance() 5421 self._add_comments(expression) 5422 return True 5423 5424 return None 5425 5426 def _match_set(self, types, advance=True): 5427 if not self._curr: 5428 return None 5429 5430 if self._curr.token_type in types: 5431 if advance: 5432 self._advance() 5433 return True 5434 5435 return None 5436 5437 def _match_pair(self, token_type_a, token_type_b, advance=True): 5438 if not self._curr or not self._next: 5439 return None 5440 5441 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5442 if advance: 5443 self._advance(2) 5444 return True 5445 5446 return None 5447 5448 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5449 if not self._match(TokenType.L_PAREN, expression=expression): 5450 self.raise_error("Expecting (") 5451 5452 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5453 if not self._match(TokenType.R_PAREN, expression=expression): 5454 self.raise_error("Expecting )") 5455 5456 def _match_texts(self, texts, advance=True): 5457 if self._curr and self._curr.text.upper() in texts: 5458 if advance: 5459 self._advance() 5460 return True 5461 return False 5462 5463 def _match_text_seq(self, *texts, advance=True): 5464 index = self._index 5465 for text in texts: 5466 if self._curr and self._curr.text.upper() == text: 5467 self._advance() 5468 else: 5469 self._retreat(index) 5470 return False 5471 5472 if not advance: 5473 self._retreat(index) 5474 5475 return True 5476 5477 @t.overload 5478 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5479 ... 5480 5481 @t.overload 5482 def _replace_columns_with_dots( 5483 self, this: t.Optional[exp.Expression] 5484 ) -> t.Optional[exp.Expression]: 5485 ... 5486 5487 def _replace_columns_with_dots(self, this): 5488 if isinstance(this, exp.Dot): 5489 exp.replace_children(this, self._replace_columns_with_dots) 5490 elif isinstance(this, exp.Column): 5491 exp.replace_children(this, self._replace_columns_with_dots) 5492 table = this.args.get("table") 5493 this = ( 5494 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5495 ) 5496 5497 return this 5498 5499 def _replace_lambda( 5500 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5501 ) -> t.Optional[exp.Expression]: 5502 if not node: 5503 return node 5504 5505 for column in node.find_all(exp.Column): 5506 if column.parts[0].name in lambda_variables: 5507 dot_or_id = column.to_dot() if column.table else column.this 5508 parent = column.parent 5509 5510 while isinstance(parent, exp.Dot): 5511 if not isinstance(parent.parent, exp.Dot): 5512 parent.replace(dot_or_id) 5513 break 5514 parent = parent.parent 5515 else: 5516 if column is node: 5517 node = dot_or_id 5518 else: 5519 column.replace(dot_or_id) 5520 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
970 def __init__( 971 self, 972 error_level: t.Optional[ErrorLevel] = None, 973 error_message_context: int = 100, 974 max_errors: int = 3, 975 dialect: DialectType = None, 976 ): 977 from sqlglot.dialects import Dialect 978 979 self.error_level = error_level or ErrorLevel.IMMEDIATE 980 self.error_message_context = error_message_context 981 self.max_errors = max_errors 982 self.dialect = Dialect.get_or_raise(dialect) 983 self.reset()
995 def parse( 996 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 997 ) -> t.List[t.Optional[exp.Expression]]: 998 """ 999 Parses a list of tokens and returns a list of syntax trees, one tree 1000 per parsed SQL statement. 1001 1002 Args: 1003 raw_tokens: The list of tokens. 1004 sql: The original SQL string, used to produce helpful debug messages. 1005 1006 Returns: 1007 The list of the produced syntax trees. 1008 """ 1009 return self._parse( 1010 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1011 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1013 def parse_into( 1014 self, 1015 expression_types: exp.IntoType, 1016 raw_tokens: t.List[Token], 1017 sql: t.Optional[str] = None, 1018 ) -> t.List[t.Optional[exp.Expression]]: 1019 """ 1020 Parses a list of tokens into a given Expression type. If a collection of Expression 1021 types is given instead, this method will try to parse the token list into each one 1022 of them, stopping at the first for which the parsing succeeds. 1023 1024 Args: 1025 expression_types: The expression type(s) to try and parse the token list into. 1026 raw_tokens: The list of tokens. 1027 sql: The original SQL string, used to produce helpful debug messages. 1028 1029 Returns: 1030 The target Expression. 1031 """ 1032 errors = [] 1033 for expression_type in ensure_list(expression_types): 1034 parser = self.EXPRESSION_PARSERS.get(expression_type) 1035 if not parser: 1036 raise TypeError(f"No parser registered for {expression_type}") 1037 1038 try: 1039 return self._parse(parser, raw_tokens, sql) 1040 except ParseError as e: 1041 e.errors[0]["into_expression"] = expression_type 1042 errors.append(e) 1043 1044 raise ParseError( 1045 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1046 errors=merge_errors(errors), 1047 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1084 def check_errors(self) -> None: 1085 """Logs or raises any found errors, depending on the chosen error level setting.""" 1086 if self.error_level == ErrorLevel.WARN: 1087 for error in self.errors: 1088 logger.error(str(error)) 1089 elif self.error_level == ErrorLevel.RAISE and self.errors: 1090 raise ParseError( 1091 concat_messages(self.errors, self.max_errors), 1092 errors=merge_errors(self.errors), 1093 )
Logs or raises any found errors, depending on the chosen error level setting.
1095 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1096 """ 1097 Appends an error in the list of recorded errors or raises it, depending on the chosen 1098 error level setting. 1099 """ 1100 token = token or self._curr or self._prev or Token.string("") 1101 start = token.start 1102 end = token.end + 1 1103 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1104 highlight = self.sql[start:end] 1105 end_context = self.sql[end : end + self.error_message_context] 1106 1107 error = ParseError.new( 1108 f"{message}. Line {token.line}, Col: {token.col}.\n" 1109 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1110 description=message, 1111 line=token.line, 1112 col=token.col, 1113 start_context=start_context, 1114 highlight=highlight, 1115 end_context=end_context, 1116 ) 1117 1118 if self.error_level == ErrorLevel.IMMEDIATE: 1119 raise error 1120 1121 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1123 def expression( 1124 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1125 ) -> E: 1126 """ 1127 Creates a new, validated Expression. 1128 1129 Args: 1130 exp_class: The expression class to instantiate. 1131 comments: An optional list of comments to attach to the expression. 1132 kwargs: The arguments to set for the expression along with their respective values. 1133 1134 Returns: 1135 The target expression. 1136 """ 1137 instance = exp_class(**kwargs) 1138 instance.add_comments(comments) if comments else self._add_comments(instance) 1139 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1146 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1147 """ 1148 Validates an Expression, making sure that all its mandatory arguments are set. 1149 1150 Args: 1151 expression: The expression to validate. 1152 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1153 1154 Returns: 1155 The validated expression. 1156 """ 1157 if self.error_level != ErrorLevel.IGNORE: 1158 for error_message in expression.error_messages(args): 1159 self.raise_error(error_message) 1160 1161 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.