sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.CONSTRAINT, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 TokenType.FOREIGN_KEY, 253 *DB_CREATABLES, 254 } 255 256 # Tokens that can represent identifiers 257 ID_VAR_TOKENS = { 258 TokenType.VAR, 259 TokenType.ANTI, 260 TokenType.APPLY, 261 TokenType.ASC, 262 TokenType.AUTO_INCREMENT, 263 TokenType.BEGIN, 264 TokenType.CACHE, 265 TokenType.CASE, 266 TokenType.COLLATE, 267 TokenType.COMMAND, 268 TokenType.COMMENT, 269 TokenType.COMMIT, 270 TokenType.CONSTRAINT, 271 TokenType.DEFAULT, 272 TokenType.DELETE, 273 TokenType.DESC, 274 TokenType.DESCRIBE, 275 TokenType.DICTIONARY, 276 TokenType.DIV, 277 TokenType.END, 278 TokenType.EXECUTE, 279 TokenType.ESCAPE, 280 TokenType.FALSE, 281 TokenType.FIRST, 282 TokenType.FILTER, 283 TokenType.FORMAT, 284 TokenType.FULL, 285 TokenType.IS, 286 TokenType.ISNULL, 287 TokenType.INTERVAL, 288 TokenType.KEEP, 289 TokenType.KILL, 290 TokenType.LEFT, 291 TokenType.LOAD, 292 TokenType.MERGE, 293 TokenType.NATURAL, 294 TokenType.NEXT, 295 TokenType.OFFSET, 296 TokenType.OPERATOR, 297 TokenType.ORDINALITY, 298 TokenType.OVERLAPS, 299 TokenType.OVERWRITE, 300 TokenType.PARTITION, 301 TokenType.PERCENT, 302 TokenType.PIVOT, 303 TokenType.PRAGMA, 304 TokenType.RANGE, 305 TokenType.RECURSIVE, 306 TokenType.REFERENCES, 307 TokenType.REFRESH, 308 TokenType.REPLACE, 309 TokenType.RIGHT, 310 TokenType.ROW, 311 TokenType.ROWS, 312 TokenType.SEMI, 313 TokenType.SET, 314 TokenType.SETTINGS, 315 TokenType.SHOW, 316 TokenType.TEMPORARY, 317 TokenType.TOP, 318 TokenType.TRUE, 319 TokenType.UNIQUE, 320 TokenType.UNPIVOT, 321 TokenType.UPDATE, 322 TokenType.USE, 323 TokenType.VOLATILE, 324 TokenType.WINDOW, 325 *CREATABLES, 326 *SUBQUERY_PREDICATES, 327 *TYPE_TOKENS, 328 *NO_PAREN_FUNCTIONS, 329 } 330 331 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 332 333 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 334 TokenType.ANTI, 335 TokenType.APPLY, 336 TokenType.ASOF, 337 TokenType.FULL, 338 TokenType.LEFT, 339 TokenType.LOCK, 340 TokenType.NATURAL, 341 TokenType.OFFSET, 342 TokenType.RIGHT, 343 TokenType.SEMI, 344 TokenType.WINDOW, 345 } 346 347 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 348 349 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 350 351 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 352 353 FUNC_TOKENS = { 354 TokenType.COLLATE, 355 TokenType.COMMAND, 356 TokenType.CURRENT_DATE, 357 TokenType.CURRENT_DATETIME, 358 TokenType.CURRENT_TIMESTAMP, 359 TokenType.CURRENT_TIME, 360 TokenType.CURRENT_USER, 361 TokenType.FILTER, 362 TokenType.FIRST, 363 TokenType.FORMAT, 364 TokenType.GLOB, 365 TokenType.IDENTIFIER, 366 TokenType.INDEX, 367 TokenType.ISNULL, 368 TokenType.ILIKE, 369 TokenType.INSERT, 370 TokenType.LIKE, 371 TokenType.MERGE, 372 TokenType.OFFSET, 373 TokenType.PRIMARY_KEY, 374 TokenType.RANGE, 375 TokenType.REPLACE, 376 TokenType.RLIKE, 377 TokenType.ROW, 378 TokenType.UNNEST, 379 TokenType.VAR, 380 TokenType.LEFT, 381 TokenType.RIGHT, 382 TokenType.DATE, 383 TokenType.DATETIME, 384 TokenType.TABLE, 385 TokenType.TIMESTAMP, 386 TokenType.TIMESTAMPTZ, 387 TokenType.WINDOW, 388 TokenType.XOR, 389 *TYPE_TOKENS, 390 *SUBQUERY_PREDICATES, 391 } 392 393 CONJUNCTION = { 394 TokenType.AND: exp.And, 395 TokenType.OR: exp.Or, 396 } 397 398 EQUALITY = { 399 TokenType.COLON_EQ: exp.PropertyEQ, 400 TokenType.EQ: exp.EQ, 401 TokenType.NEQ: exp.NEQ, 402 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 403 } 404 405 COMPARISON = { 406 TokenType.GT: exp.GT, 407 TokenType.GTE: exp.GTE, 408 TokenType.LT: exp.LT, 409 TokenType.LTE: exp.LTE, 410 } 411 412 BITWISE = { 413 TokenType.AMP: exp.BitwiseAnd, 414 TokenType.CARET: exp.BitwiseXor, 415 TokenType.PIPE: exp.BitwiseOr, 416 } 417 418 TERM = { 419 TokenType.DASH: exp.Sub, 420 TokenType.PLUS: exp.Add, 421 TokenType.MOD: exp.Mod, 422 TokenType.COLLATE: exp.Collate, 423 } 424 425 FACTOR = { 426 TokenType.DIV: exp.IntDiv, 427 TokenType.LR_ARROW: exp.Distance, 428 TokenType.SLASH: exp.Div, 429 TokenType.STAR: exp.Mul, 430 } 431 432 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 433 434 TIMES = { 435 TokenType.TIME, 436 TokenType.TIMETZ, 437 } 438 439 TIMESTAMPS = { 440 TokenType.TIMESTAMP, 441 TokenType.TIMESTAMPTZ, 442 TokenType.TIMESTAMPLTZ, 443 *TIMES, 444 } 445 446 SET_OPERATIONS = { 447 TokenType.UNION, 448 TokenType.INTERSECT, 449 TokenType.EXCEPT, 450 } 451 452 JOIN_METHODS = { 453 TokenType.NATURAL, 454 TokenType.ASOF, 455 } 456 457 JOIN_SIDES = { 458 TokenType.LEFT, 459 TokenType.RIGHT, 460 TokenType.FULL, 461 } 462 463 JOIN_KINDS = { 464 TokenType.INNER, 465 TokenType.OUTER, 466 TokenType.CROSS, 467 TokenType.SEMI, 468 TokenType.ANTI, 469 } 470 471 JOIN_HINTS: t.Set[str] = set() 472 473 LAMBDAS = { 474 TokenType.ARROW: lambda self, expressions: self.expression( 475 exp.Lambda, 476 this=self._replace_lambda( 477 self._parse_conjunction(), 478 {node.name for node in expressions}, 479 ), 480 expressions=expressions, 481 ), 482 TokenType.FARROW: lambda self, expressions: self.expression( 483 exp.Kwarg, 484 this=exp.var(expressions[0].name), 485 expression=self._parse_conjunction(), 486 ), 487 } 488 489 COLUMN_OPERATORS = { 490 TokenType.DOT: None, 491 TokenType.DCOLON: lambda self, this, to: self.expression( 492 exp.Cast if self.STRICT_CAST else exp.TryCast, 493 this=this, 494 to=to, 495 ), 496 TokenType.ARROW: lambda self, this, path: self.expression( 497 exp.JSONExtract, 498 this=this, 499 expression=path, 500 ), 501 TokenType.DARROW: lambda self, this, path: self.expression( 502 exp.JSONExtractScalar, 503 this=this, 504 expression=path, 505 ), 506 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 507 exp.JSONBExtract, 508 this=this, 509 expression=path, 510 ), 511 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 512 exp.JSONBExtractScalar, 513 this=this, 514 expression=path, 515 ), 516 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 517 exp.JSONBContains, 518 this=this, 519 expression=key, 520 ), 521 } 522 523 EXPRESSION_PARSERS = { 524 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 525 exp.Column: lambda self: self._parse_column(), 526 exp.Condition: lambda self: self._parse_conjunction(), 527 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 528 exp.Expression: lambda self: self._parse_statement(), 529 exp.From: lambda self: self._parse_from(), 530 exp.Group: lambda self: self._parse_group(), 531 exp.Having: lambda self: self._parse_having(), 532 exp.Identifier: lambda self: self._parse_id_var(), 533 exp.Join: lambda self: self._parse_join(), 534 exp.Lambda: lambda self: self._parse_lambda(), 535 exp.Lateral: lambda self: self._parse_lateral(), 536 exp.Limit: lambda self: self._parse_limit(), 537 exp.Offset: lambda self: self._parse_offset(), 538 exp.Order: lambda self: self._parse_order(), 539 exp.Ordered: lambda self: self._parse_ordered(), 540 exp.Properties: lambda self: self._parse_properties(), 541 exp.Qualify: lambda self: self._parse_qualify(), 542 exp.Returning: lambda self: self._parse_returning(), 543 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 544 exp.Table: lambda self: self._parse_table_parts(), 545 exp.TableAlias: lambda self: self._parse_table_alias(), 546 exp.Where: lambda self: self._parse_where(), 547 exp.Window: lambda self: self._parse_named_window(), 548 exp.With: lambda self: self._parse_with(), 549 "JOIN_TYPE": lambda self: self._parse_join_parts(), 550 } 551 552 STATEMENT_PARSERS = { 553 TokenType.ALTER: lambda self: self._parse_alter(), 554 TokenType.BEGIN: lambda self: self._parse_transaction(), 555 TokenType.CACHE: lambda self: self._parse_cache(), 556 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 557 TokenType.COMMENT: lambda self: self._parse_comment(), 558 TokenType.CREATE: lambda self: self._parse_create(), 559 TokenType.DELETE: lambda self: self._parse_delete(), 560 TokenType.DESC: lambda self: self._parse_describe(), 561 TokenType.DESCRIBE: lambda self: self._parse_describe(), 562 TokenType.DROP: lambda self: self._parse_drop(), 563 TokenType.INSERT: lambda self: self._parse_insert(), 564 TokenType.KILL: lambda self: self._parse_kill(), 565 TokenType.LOAD: lambda self: self._parse_load(), 566 TokenType.MERGE: lambda self: self._parse_merge(), 567 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 568 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 569 TokenType.REFRESH: lambda self: self._parse_refresh(), 570 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 571 TokenType.SET: lambda self: self._parse_set(), 572 TokenType.UNCACHE: lambda self: self._parse_uncache(), 573 TokenType.UPDATE: lambda self: self._parse_update(), 574 TokenType.USE: lambda self: self.expression( 575 exp.Use, 576 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 577 and exp.var(self._prev.text), 578 this=self._parse_table(schema=False), 579 ), 580 } 581 582 UNARY_PARSERS = { 583 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 584 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 585 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 586 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 587 } 588 589 PRIMARY_PARSERS = { 590 TokenType.STRING: lambda self, token: self.expression( 591 exp.Literal, this=token.text, is_string=True 592 ), 593 TokenType.NUMBER: lambda self, token: self.expression( 594 exp.Literal, this=token.text, is_string=False 595 ), 596 TokenType.STAR: lambda self, _: self.expression( 597 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 598 ), 599 TokenType.NULL: lambda self, _: self.expression(exp.Null), 600 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 601 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 602 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 603 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 604 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 605 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 606 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 607 exp.National, this=token.text 608 ), 609 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 610 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 611 exp.RawString, this=token.text 612 ), 613 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 614 } 615 616 PLACEHOLDER_PARSERS = { 617 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 618 TokenType.PARAMETER: lambda self: self._parse_parameter(), 619 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 620 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 621 else None, 622 } 623 624 RANGE_PARSERS = { 625 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 626 TokenType.GLOB: binary_range_parser(exp.Glob), 627 TokenType.ILIKE: binary_range_parser(exp.ILike), 628 TokenType.IN: lambda self, this: self._parse_in(this), 629 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 630 TokenType.IS: lambda self, this: self._parse_is(this), 631 TokenType.LIKE: binary_range_parser(exp.Like), 632 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 633 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 634 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 635 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 636 } 637 638 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 639 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 640 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 641 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 642 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 643 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 644 "CHECKSUM": lambda self: self._parse_checksum(), 645 "CLUSTER BY": lambda self: self._parse_cluster(), 646 "CLUSTERED": lambda self: self._parse_clustered_by(), 647 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 648 exp.CollateProperty, **kwargs 649 ), 650 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 651 "COPY": lambda self: self._parse_copy_property(), 652 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 653 "DEFINER": lambda self: self._parse_definer(), 654 "DETERMINISTIC": lambda self: self.expression( 655 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 656 ), 657 "DISTKEY": lambda self: self._parse_distkey(), 658 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 659 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 660 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 661 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 662 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 663 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 664 "FREESPACE": lambda self: self._parse_freespace(), 665 "HEAP": lambda self: self.expression(exp.HeapProperty), 666 "IMMUTABLE": lambda self: self.expression( 667 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 668 ), 669 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 670 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 671 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 672 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 673 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 674 "LIKE": lambda self: self._parse_create_like(), 675 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 676 "LOCK": lambda self: self._parse_locking(), 677 "LOCKING": lambda self: self._parse_locking(), 678 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 679 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 680 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 681 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 682 "NO": lambda self: self._parse_no_property(), 683 "ON": lambda self: self._parse_on_property(), 684 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 685 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 686 "PARTITION": lambda self: self._parse_partitioned_of(), 687 "PARTITION BY": lambda self: self._parse_partitioned_by(), 688 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 689 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 690 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 691 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 692 "REMOTE": lambda self: self._parse_remote_with_connection(), 693 "RETURNS": lambda self: self._parse_returns(), 694 "ROW": lambda self: self._parse_row(), 695 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 696 "SAMPLE": lambda self: self.expression( 697 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 698 ), 699 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 700 "SETTINGS": lambda self: self.expression( 701 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 702 ), 703 "SORTKEY": lambda self: self._parse_sortkey(), 704 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 705 "STABLE": lambda self: self.expression( 706 exp.StabilityProperty, this=exp.Literal.string("STABLE") 707 ), 708 "STORED": lambda self: self._parse_stored(), 709 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 710 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 711 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 712 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 713 "TO": lambda self: self._parse_to_table(), 714 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 715 "TRANSFORM": lambda self: self.expression( 716 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 717 ), 718 "TTL": lambda self: self._parse_ttl(), 719 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 720 "VOLATILE": lambda self: self._parse_volatile_property(), 721 "WITH": lambda self: self._parse_with_property(), 722 } 723 724 CONSTRAINT_PARSERS = { 725 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 726 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 727 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 728 "CHARACTER SET": lambda self: self.expression( 729 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 730 ), 731 "CHECK": lambda self: self.expression( 732 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 733 ), 734 "COLLATE": lambda self: self.expression( 735 exp.CollateColumnConstraint, this=self._parse_var() 736 ), 737 "COMMENT": lambda self: self.expression( 738 exp.CommentColumnConstraint, this=self._parse_string() 739 ), 740 "COMPRESS": lambda self: self._parse_compress(), 741 "CLUSTERED": lambda self: self.expression( 742 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 743 ), 744 "NONCLUSTERED": lambda self: self.expression( 745 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 746 ), 747 "DEFAULT": lambda self: self.expression( 748 exp.DefaultColumnConstraint, this=self._parse_bitwise() 749 ), 750 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 751 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 752 "FORMAT": lambda self: self.expression( 753 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 754 ), 755 "GENERATED": lambda self: self._parse_generated_as_identity(), 756 "IDENTITY": lambda self: self._parse_auto_increment(), 757 "INLINE": lambda self: self._parse_inline(), 758 "LIKE": lambda self: self._parse_create_like(), 759 "NOT": lambda self: self._parse_not_constraint(), 760 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 761 "ON": lambda self: ( 762 self._match(TokenType.UPDATE) 763 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 764 ) 765 or self.expression(exp.OnProperty, this=self._parse_id_var()), 766 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 767 "PERIOD": lambda self: self._parse_period_for_system_time(), 768 "PRIMARY KEY": lambda self: self._parse_primary_key(), 769 "REFERENCES": lambda self: self._parse_references(match=False), 770 "TITLE": lambda self: self.expression( 771 exp.TitleColumnConstraint, this=self._parse_var_or_string() 772 ), 773 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 774 "UNIQUE": lambda self: self._parse_unique(), 775 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 776 "WITH": lambda self: self.expression( 777 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 778 ), 779 } 780 781 ALTER_PARSERS = { 782 "ADD": lambda self: self._parse_alter_table_add(), 783 "ALTER": lambda self: self._parse_alter_table_alter(), 784 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 785 "DROP": lambda self: self._parse_alter_table_drop(), 786 "RENAME": lambda self: self._parse_alter_table_rename(), 787 } 788 789 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 790 791 NO_PAREN_FUNCTION_PARSERS = { 792 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 793 "CASE": lambda self: self._parse_case(), 794 "IF": lambda self: self._parse_if(), 795 "NEXT": lambda self: self._parse_next_value_for(), 796 } 797 798 INVALID_FUNC_NAME_TOKENS = { 799 TokenType.IDENTIFIER, 800 TokenType.STRING, 801 } 802 803 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 804 805 FUNCTION_PARSERS = { 806 "ANY_VALUE": lambda self: self._parse_any_value(), 807 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 808 "CONCAT": lambda self: self._parse_concat(), 809 "CONCAT_WS": lambda self: self._parse_concat_ws(), 810 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 811 "DECODE": lambda self: self._parse_decode(), 812 "EXTRACT": lambda self: self._parse_extract(), 813 "JSON_OBJECT": lambda self: self._parse_json_object(), 814 "JSON_TABLE": lambda self: self._parse_json_table(), 815 "LOG": lambda self: self._parse_logarithm(), 816 "MATCH": lambda self: self._parse_match_against(), 817 "OPENJSON": lambda self: self._parse_open_json(), 818 "POSITION": lambda self: self._parse_position(), 819 "PREDICT": lambda self: self._parse_predict(), 820 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 821 "STRING_AGG": lambda self: self._parse_string_agg(), 822 "SUBSTRING": lambda self: self._parse_substring(), 823 "TRIM": lambda self: self._parse_trim(), 824 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 825 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 826 } 827 828 QUERY_MODIFIER_PARSERS = { 829 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 830 TokenType.WHERE: lambda self: ("where", self._parse_where()), 831 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 832 TokenType.HAVING: lambda self: ("having", self._parse_having()), 833 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 834 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 835 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 836 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 837 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 838 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 839 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 840 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 841 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 842 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 843 TokenType.CLUSTER_BY: lambda self: ( 844 "cluster", 845 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 846 ), 847 TokenType.DISTRIBUTE_BY: lambda self: ( 848 "distribute", 849 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 850 ), 851 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 852 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 853 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 854 } 855 856 SET_PARSERS = { 857 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 858 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 859 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 860 "TRANSACTION": lambda self: self._parse_set_transaction(), 861 } 862 863 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 864 865 TYPE_LITERAL_PARSERS = { 866 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 867 } 868 869 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 870 871 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 872 873 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 874 875 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 876 TRANSACTION_CHARACTERISTICS = { 877 "ISOLATION LEVEL REPEATABLE READ", 878 "ISOLATION LEVEL READ COMMITTED", 879 "ISOLATION LEVEL READ UNCOMMITTED", 880 "ISOLATION LEVEL SERIALIZABLE", 881 "READ WRITE", 882 "READ ONLY", 883 } 884 885 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 886 887 CLONE_KEYWORDS = {"CLONE", "COPY"} 888 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 889 890 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 891 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 892 893 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 894 895 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 896 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 897 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 898 899 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 900 901 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 902 903 DISTINCT_TOKENS = {TokenType.DISTINCT} 904 905 NULL_TOKENS = {TokenType.NULL} 906 907 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 908 909 STRICT_CAST = True 910 911 # A NULL arg in CONCAT yields NULL by default 912 CONCAT_NULL_OUTPUTS_STRING = False 913 914 PREFIXED_PIVOT_COLUMNS = False 915 IDENTIFY_PIVOT_STRINGS = False 916 917 LOG_BASE_FIRST = True 918 LOG_DEFAULTS_TO_LN = False 919 920 # Whether or not ADD is present for each column added by ALTER TABLE 921 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 922 923 # Whether or not the table sample clause expects CSV syntax 924 TABLESAMPLE_CSV = False 925 926 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 927 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 928 929 # Whether the TRIM function expects the characters to trim as its first argument 930 TRIM_PATTERN_FIRST = False 931 932 # Whether the behavior of a / b depends on the types of a and b. 933 # False means a / b is always float division. 934 # True means a / b is integer division if both a and b are integers. 935 TYPED_DIVISION = False 936 937 # False means 1 / 0 throws an error. 938 # True means 1 / 0 returns null. 939 SAFE_DIVISION = False 940 941 __slots__ = ( 942 "error_level", 943 "error_message_context", 944 "max_errors", 945 "sql", 946 "errors", 947 "_tokens", 948 "_index", 949 "_curr", 950 "_next", 951 "_prev", 952 "_prev_comments", 953 "_tokenizer", 954 ) 955 956 # Autofilled 957 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 958 INDEX_OFFSET: int = 0 959 UNNEST_COLUMN_ONLY: bool = False 960 ALIAS_POST_TABLESAMPLE: bool = False 961 STRICT_STRING_CONCAT = False 962 DPIPE_IS_STRING_CONCAT = True 963 SUPPORTS_USER_DEFINED_TYPES = True 964 NORMALIZE_FUNCTIONS = "upper" 965 NULL_ORDERING: str = "nulls_are_small" 966 SHOW_TRIE: t.Dict = {} 967 SET_TRIE: t.Dict = {} 968 FORMAT_MAPPING: t.Dict[str, str] = {} 969 FORMAT_TRIE: t.Dict = {} 970 TIME_MAPPING: t.Dict[str, str] = {} 971 TIME_TRIE: t.Dict = {} 972 973 def __init__( 974 self, 975 error_level: t.Optional[ErrorLevel] = None, 976 error_message_context: int = 100, 977 max_errors: int = 3, 978 ): 979 self.error_level = error_level or ErrorLevel.IMMEDIATE 980 self.error_message_context = error_message_context 981 self.max_errors = max_errors 982 self._tokenizer = self.TOKENIZER_CLASS() 983 self.reset() 984 985 def reset(self): 986 self.sql = "" 987 self.errors = [] 988 self._tokens = [] 989 self._index = 0 990 self._curr = None 991 self._next = None 992 self._prev = None 993 self._prev_comments = None 994 995 def parse( 996 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 997 ) -> t.List[t.Optional[exp.Expression]]: 998 """ 999 Parses a list of tokens and returns a list of syntax trees, one tree 1000 per parsed SQL statement. 1001 1002 Args: 1003 raw_tokens: The list of tokens. 1004 sql: The original SQL string, used to produce helpful debug messages. 1005 1006 Returns: 1007 The list of the produced syntax trees. 1008 """ 1009 return self._parse( 1010 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1011 ) 1012 1013 def parse_into( 1014 self, 1015 expression_types: exp.IntoType, 1016 raw_tokens: t.List[Token], 1017 sql: t.Optional[str] = None, 1018 ) -> t.List[t.Optional[exp.Expression]]: 1019 """ 1020 Parses a list of tokens into a given Expression type. If a collection of Expression 1021 types is given instead, this method will try to parse the token list into each one 1022 of them, stopping at the first for which the parsing succeeds. 1023 1024 Args: 1025 expression_types: The expression type(s) to try and parse the token list into. 1026 raw_tokens: The list of tokens. 1027 sql: The original SQL string, used to produce helpful debug messages. 1028 1029 Returns: 1030 The target Expression. 1031 """ 1032 errors = [] 1033 for expression_type in ensure_list(expression_types): 1034 parser = self.EXPRESSION_PARSERS.get(expression_type) 1035 if not parser: 1036 raise TypeError(f"No parser registered for {expression_type}") 1037 1038 try: 1039 return self._parse(parser, raw_tokens, sql) 1040 except ParseError as e: 1041 e.errors[0]["into_expression"] = expression_type 1042 errors.append(e) 1043 1044 raise ParseError( 1045 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1046 errors=merge_errors(errors), 1047 ) from errors[-1] 1048 1049 def _parse( 1050 self, 1051 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1052 raw_tokens: t.List[Token], 1053 sql: t.Optional[str] = None, 1054 ) -> t.List[t.Optional[exp.Expression]]: 1055 self.reset() 1056 self.sql = sql or "" 1057 1058 total = len(raw_tokens) 1059 chunks: t.List[t.List[Token]] = [[]] 1060 1061 for i, token in enumerate(raw_tokens): 1062 if token.token_type == TokenType.SEMICOLON: 1063 if i < total - 1: 1064 chunks.append([]) 1065 else: 1066 chunks[-1].append(token) 1067 1068 expressions = [] 1069 1070 for tokens in chunks: 1071 self._index = -1 1072 self._tokens = tokens 1073 self._advance() 1074 1075 expressions.append(parse_method(self)) 1076 1077 if self._index < len(self._tokens): 1078 self.raise_error("Invalid expression / Unexpected token") 1079 1080 self.check_errors() 1081 1082 return expressions 1083 1084 def check_errors(self) -> None: 1085 """Logs or raises any found errors, depending on the chosen error level setting.""" 1086 if self.error_level == ErrorLevel.WARN: 1087 for error in self.errors: 1088 logger.error(str(error)) 1089 elif self.error_level == ErrorLevel.RAISE and self.errors: 1090 raise ParseError( 1091 concat_messages(self.errors, self.max_errors), 1092 errors=merge_errors(self.errors), 1093 ) 1094 1095 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1096 """ 1097 Appends an error in the list of recorded errors or raises it, depending on the chosen 1098 error level setting. 1099 """ 1100 token = token or self._curr or self._prev or Token.string("") 1101 start = token.start 1102 end = token.end + 1 1103 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1104 highlight = self.sql[start:end] 1105 end_context = self.sql[end : end + self.error_message_context] 1106 1107 error = ParseError.new( 1108 f"{message}. Line {token.line}, Col: {token.col}.\n" 1109 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1110 description=message, 1111 line=token.line, 1112 col=token.col, 1113 start_context=start_context, 1114 highlight=highlight, 1115 end_context=end_context, 1116 ) 1117 1118 if self.error_level == ErrorLevel.IMMEDIATE: 1119 raise error 1120 1121 self.errors.append(error) 1122 1123 def expression( 1124 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1125 ) -> E: 1126 """ 1127 Creates a new, validated Expression. 1128 1129 Args: 1130 exp_class: The expression class to instantiate. 1131 comments: An optional list of comments to attach to the expression. 1132 kwargs: The arguments to set for the expression along with their respective values. 1133 1134 Returns: 1135 The target expression. 1136 """ 1137 instance = exp_class(**kwargs) 1138 instance.add_comments(comments) if comments else self._add_comments(instance) 1139 return self.validate_expression(instance) 1140 1141 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1142 if expression and self._prev_comments: 1143 expression.add_comments(self._prev_comments) 1144 self._prev_comments = None 1145 1146 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1147 """ 1148 Validates an Expression, making sure that all its mandatory arguments are set. 1149 1150 Args: 1151 expression: The expression to validate. 1152 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1153 1154 Returns: 1155 The validated expression. 1156 """ 1157 if self.error_level != ErrorLevel.IGNORE: 1158 for error_message in expression.error_messages(args): 1159 self.raise_error(error_message) 1160 1161 return expression 1162 1163 def _find_sql(self, start: Token, end: Token) -> str: 1164 return self.sql[start.start : end.end + 1] 1165 1166 def _advance(self, times: int = 1) -> None: 1167 self._index += times 1168 self._curr = seq_get(self._tokens, self._index) 1169 self._next = seq_get(self._tokens, self._index + 1) 1170 1171 if self._index > 0: 1172 self._prev = self._tokens[self._index - 1] 1173 self._prev_comments = self._prev.comments 1174 else: 1175 self._prev = None 1176 self._prev_comments = None 1177 1178 def _retreat(self, index: int) -> None: 1179 if index != self._index: 1180 self._advance(index - self._index) 1181 1182 def _parse_command(self) -> exp.Command: 1183 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1184 1185 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1186 start = self._prev 1187 exists = self._parse_exists() if allow_exists else None 1188 1189 self._match(TokenType.ON) 1190 1191 kind = self._match_set(self.CREATABLES) and self._prev 1192 if not kind: 1193 return self._parse_as_command(start) 1194 1195 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1196 this = self._parse_user_defined_function(kind=kind.token_type) 1197 elif kind.token_type == TokenType.TABLE: 1198 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1199 elif kind.token_type == TokenType.COLUMN: 1200 this = self._parse_column() 1201 else: 1202 this = self._parse_id_var() 1203 1204 self._match(TokenType.IS) 1205 1206 return self.expression( 1207 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1208 ) 1209 1210 def _parse_to_table( 1211 self, 1212 ) -> exp.ToTableProperty: 1213 table = self._parse_table_parts(schema=True) 1214 return self.expression(exp.ToTableProperty, this=table) 1215 1216 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1217 def _parse_ttl(self) -> exp.Expression: 1218 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1219 this = self._parse_bitwise() 1220 1221 if self._match_text_seq("DELETE"): 1222 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1223 if self._match_text_seq("RECOMPRESS"): 1224 return self.expression( 1225 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1226 ) 1227 if self._match_text_seq("TO", "DISK"): 1228 return self.expression( 1229 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1230 ) 1231 if self._match_text_seq("TO", "VOLUME"): 1232 return self.expression( 1233 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1234 ) 1235 1236 return this 1237 1238 expressions = self._parse_csv(_parse_ttl_action) 1239 where = self._parse_where() 1240 group = self._parse_group() 1241 1242 aggregates = None 1243 if group and self._match(TokenType.SET): 1244 aggregates = self._parse_csv(self._parse_set_item) 1245 1246 return self.expression( 1247 exp.MergeTreeTTL, 1248 expressions=expressions, 1249 where=where, 1250 group=group, 1251 aggregates=aggregates, 1252 ) 1253 1254 def _parse_statement(self) -> t.Optional[exp.Expression]: 1255 if self._curr is None: 1256 return None 1257 1258 if self._match_set(self.STATEMENT_PARSERS): 1259 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1260 1261 if self._match_set(Tokenizer.COMMANDS): 1262 return self._parse_command() 1263 1264 expression = self._parse_expression() 1265 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1266 return self._parse_query_modifiers(expression) 1267 1268 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1269 start = self._prev 1270 temporary = self._match(TokenType.TEMPORARY) 1271 materialized = self._match_text_seq("MATERIALIZED") 1272 1273 kind = self._match_set(self.CREATABLES) and self._prev.text 1274 if not kind: 1275 return self._parse_as_command(start) 1276 1277 return self.expression( 1278 exp.Drop, 1279 comments=start.comments, 1280 exists=exists or self._parse_exists(), 1281 this=self._parse_table(schema=True), 1282 kind=kind, 1283 temporary=temporary, 1284 materialized=materialized, 1285 cascade=self._match_text_seq("CASCADE"), 1286 constraints=self._match_text_seq("CONSTRAINTS"), 1287 purge=self._match_text_seq("PURGE"), 1288 ) 1289 1290 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1291 return ( 1292 self._match_text_seq("IF") 1293 and (not not_ or self._match(TokenType.NOT)) 1294 and self._match(TokenType.EXISTS) 1295 ) 1296 1297 def _parse_create(self) -> exp.Create | exp.Command: 1298 # Note: this can't be None because we've matched a statement parser 1299 start = self._prev 1300 comments = self._prev_comments 1301 1302 replace = start.text.upper() == "REPLACE" or self._match_pair( 1303 TokenType.OR, TokenType.REPLACE 1304 ) 1305 unique = self._match(TokenType.UNIQUE) 1306 1307 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1308 self._advance() 1309 1310 properties = None 1311 create_token = self._match_set(self.CREATABLES) and self._prev 1312 1313 if not create_token: 1314 # exp.Properties.Location.POST_CREATE 1315 properties = self._parse_properties() 1316 create_token = self._match_set(self.CREATABLES) and self._prev 1317 1318 if not properties or not create_token: 1319 return self._parse_as_command(start) 1320 1321 exists = self._parse_exists(not_=True) 1322 this = None 1323 expression: t.Optional[exp.Expression] = None 1324 indexes = None 1325 no_schema_binding = None 1326 begin = None 1327 end = None 1328 clone = None 1329 1330 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1331 nonlocal properties 1332 if properties and temp_props: 1333 properties.expressions.extend(temp_props.expressions) 1334 elif temp_props: 1335 properties = temp_props 1336 1337 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1338 this = self._parse_user_defined_function(kind=create_token.token_type) 1339 1340 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1341 extend_props(self._parse_properties()) 1342 1343 self._match(TokenType.ALIAS) 1344 1345 if self._match(TokenType.COMMAND): 1346 expression = self._parse_as_command(self._prev) 1347 else: 1348 begin = self._match(TokenType.BEGIN) 1349 return_ = self._match_text_seq("RETURN") 1350 1351 if self._match(TokenType.STRING, advance=False): 1352 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1353 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1354 expression = self._parse_string() 1355 extend_props(self._parse_properties()) 1356 else: 1357 expression = self._parse_statement() 1358 1359 end = self._match_text_seq("END") 1360 1361 if return_: 1362 expression = self.expression(exp.Return, this=expression) 1363 elif create_token.token_type == TokenType.INDEX: 1364 this = self._parse_index(index=self._parse_id_var()) 1365 elif create_token.token_type in self.DB_CREATABLES: 1366 table_parts = self._parse_table_parts(schema=True) 1367 1368 # exp.Properties.Location.POST_NAME 1369 self._match(TokenType.COMMA) 1370 extend_props(self._parse_properties(before=True)) 1371 1372 this = self._parse_schema(this=table_parts) 1373 1374 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1375 extend_props(self._parse_properties()) 1376 1377 self._match(TokenType.ALIAS) 1378 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1379 # exp.Properties.Location.POST_ALIAS 1380 extend_props(self._parse_properties()) 1381 1382 expression = self._parse_ddl_select() 1383 1384 if create_token.token_type == TokenType.TABLE: 1385 # exp.Properties.Location.POST_EXPRESSION 1386 extend_props(self._parse_properties()) 1387 1388 indexes = [] 1389 while True: 1390 index = self._parse_index() 1391 1392 # exp.Properties.Location.POST_INDEX 1393 extend_props(self._parse_properties()) 1394 1395 if not index: 1396 break 1397 else: 1398 self._match(TokenType.COMMA) 1399 indexes.append(index) 1400 elif create_token.token_type == TokenType.VIEW: 1401 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1402 no_schema_binding = True 1403 1404 shallow = self._match_text_seq("SHALLOW") 1405 1406 if self._match_texts(self.CLONE_KEYWORDS): 1407 copy = self._prev.text.lower() == "copy" 1408 clone = self._parse_table(schema=True) 1409 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1410 clone_kind = ( 1411 self._match(TokenType.L_PAREN) 1412 and self._match_texts(self.CLONE_KINDS) 1413 and self._prev.text.upper() 1414 ) 1415 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1416 self._match(TokenType.R_PAREN) 1417 clone = self.expression( 1418 exp.Clone, 1419 this=clone, 1420 when=when, 1421 kind=clone_kind, 1422 shallow=shallow, 1423 expression=clone_expression, 1424 copy=copy, 1425 ) 1426 1427 return self.expression( 1428 exp.Create, 1429 comments=comments, 1430 this=this, 1431 kind=create_token.text, 1432 replace=replace, 1433 unique=unique, 1434 expression=expression, 1435 exists=exists, 1436 properties=properties, 1437 indexes=indexes, 1438 no_schema_binding=no_schema_binding, 1439 begin=begin, 1440 end=end, 1441 clone=clone, 1442 ) 1443 1444 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1445 # only used for teradata currently 1446 self._match(TokenType.COMMA) 1447 1448 kwargs = { 1449 "no": self._match_text_seq("NO"), 1450 "dual": self._match_text_seq("DUAL"), 1451 "before": self._match_text_seq("BEFORE"), 1452 "default": self._match_text_seq("DEFAULT"), 1453 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1454 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1455 "after": self._match_text_seq("AFTER"), 1456 "minimum": self._match_texts(("MIN", "MINIMUM")), 1457 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1458 } 1459 1460 if self._match_texts(self.PROPERTY_PARSERS): 1461 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1462 try: 1463 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1464 except TypeError: 1465 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1466 1467 return None 1468 1469 def _parse_property(self) -> t.Optional[exp.Expression]: 1470 if self._match_texts(self.PROPERTY_PARSERS): 1471 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1472 1473 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1474 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1475 1476 if self._match_text_seq("COMPOUND", "SORTKEY"): 1477 return self._parse_sortkey(compound=True) 1478 1479 if self._match_text_seq("SQL", "SECURITY"): 1480 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1481 1482 index = self._index 1483 key = self._parse_column() 1484 1485 if not self._match(TokenType.EQ): 1486 self._retreat(index) 1487 return None 1488 1489 return self.expression( 1490 exp.Property, 1491 this=key.to_dot() if isinstance(key, exp.Column) else key, 1492 value=self._parse_column() or self._parse_var(any_token=True), 1493 ) 1494 1495 def _parse_stored(self) -> exp.FileFormatProperty: 1496 self._match(TokenType.ALIAS) 1497 1498 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1499 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1500 1501 return self.expression( 1502 exp.FileFormatProperty, 1503 this=self.expression( 1504 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1505 ) 1506 if input_format or output_format 1507 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1508 ) 1509 1510 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1511 self._match(TokenType.EQ) 1512 self._match(TokenType.ALIAS) 1513 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1514 1515 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1516 properties = [] 1517 while True: 1518 if before: 1519 prop = self._parse_property_before() 1520 else: 1521 prop = self._parse_property() 1522 1523 if not prop: 1524 break 1525 for p in ensure_list(prop): 1526 properties.append(p) 1527 1528 if properties: 1529 return self.expression(exp.Properties, expressions=properties) 1530 1531 return None 1532 1533 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1534 return self.expression( 1535 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1536 ) 1537 1538 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1539 if self._index >= 2: 1540 pre_volatile_token = self._tokens[self._index - 2] 1541 else: 1542 pre_volatile_token = None 1543 1544 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1545 return exp.VolatileProperty() 1546 1547 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1548 1549 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1550 self._match_pair(TokenType.EQ, TokenType.ON) 1551 1552 prop = self.expression(exp.WithSystemVersioningProperty) 1553 if self._match(TokenType.L_PAREN): 1554 self._match_text_seq("HISTORY_TABLE", "=") 1555 prop.set("this", self._parse_table_parts()) 1556 1557 if self._match(TokenType.COMMA): 1558 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1559 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1560 1561 self._match_r_paren() 1562 1563 return prop 1564 1565 def _parse_with_property( 1566 self, 1567 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1568 if self._match(TokenType.L_PAREN, advance=False): 1569 return self._parse_wrapped_csv(self._parse_property) 1570 1571 if self._match_text_seq("JOURNAL"): 1572 return self._parse_withjournaltable() 1573 1574 if self._match_text_seq("DATA"): 1575 return self._parse_withdata(no=False) 1576 elif self._match_text_seq("NO", "DATA"): 1577 return self._parse_withdata(no=True) 1578 1579 if not self._next: 1580 return None 1581 1582 return self._parse_withisolatedloading() 1583 1584 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1585 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1586 self._match(TokenType.EQ) 1587 1588 user = self._parse_id_var() 1589 self._match(TokenType.PARAMETER) 1590 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1591 1592 if not user or not host: 1593 return None 1594 1595 return exp.DefinerProperty(this=f"{user}@{host}") 1596 1597 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1598 self._match(TokenType.TABLE) 1599 self._match(TokenType.EQ) 1600 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1601 1602 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1603 return self.expression(exp.LogProperty, no=no) 1604 1605 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1606 return self.expression(exp.JournalProperty, **kwargs) 1607 1608 def _parse_checksum(self) -> exp.ChecksumProperty: 1609 self._match(TokenType.EQ) 1610 1611 on = None 1612 if self._match(TokenType.ON): 1613 on = True 1614 elif self._match_text_seq("OFF"): 1615 on = False 1616 1617 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1618 1619 def _parse_cluster(self) -> exp.Cluster: 1620 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1621 1622 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1623 self._match_text_seq("BY") 1624 1625 self._match_l_paren() 1626 expressions = self._parse_csv(self._parse_column) 1627 self._match_r_paren() 1628 1629 if self._match_text_seq("SORTED", "BY"): 1630 self._match_l_paren() 1631 sorted_by = self._parse_csv(self._parse_ordered) 1632 self._match_r_paren() 1633 else: 1634 sorted_by = None 1635 1636 self._match(TokenType.INTO) 1637 buckets = self._parse_number() 1638 self._match_text_seq("BUCKETS") 1639 1640 return self.expression( 1641 exp.ClusteredByProperty, 1642 expressions=expressions, 1643 sorted_by=sorted_by, 1644 buckets=buckets, 1645 ) 1646 1647 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1648 if not self._match_text_seq("GRANTS"): 1649 self._retreat(self._index - 1) 1650 return None 1651 1652 return self.expression(exp.CopyGrantsProperty) 1653 1654 def _parse_freespace(self) -> exp.FreespaceProperty: 1655 self._match(TokenType.EQ) 1656 return self.expression( 1657 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1658 ) 1659 1660 def _parse_mergeblockratio( 1661 self, no: bool = False, default: bool = False 1662 ) -> exp.MergeBlockRatioProperty: 1663 if self._match(TokenType.EQ): 1664 return self.expression( 1665 exp.MergeBlockRatioProperty, 1666 this=self._parse_number(), 1667 percent=self._match(TokenType.PERCENT), 1668 ) 1669 1670 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1671 1672 def _parse_datablocksize( 1673 self, 1674 default: t.Optional[bool] = None, 1675 minimum: t.Optional[bool] = None, 1676 maximum: t.Optional[bool] = None, 1677 ) -> exp.DataBlocksizeProperty: 1678 self._match(TokenType.EQ) 1679 size = self._parse_number() 1680 1681 units = None 1682 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1683 units = self._prev.text 1684 1685 return self.expression( 1686 exp.DataBlocksizeProperty, 1687 size=size, 1688 units=units, 1689 default=default, 1690 minimum=minimum, 1691 maximum=maximum, 1692 ) 1693 1694 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1695 self._match(TokenType.EQ) 1696 always = self._match_text_seq("ALWAYS") 1697 manual = self._match_text_seq("MANUAL") 1698 never = self._match_text_seq("NEVER") 1699 default = self._match_text_seq("DEFAULT") 1700 1701 autotemp = None 1702 if self._match_text_seq("AUTOTEMP"): 1703 autotemp = self._parse_schema() 1704 1705 return self.expression( 1706 exp.BlockCompressionProperty, 1707 always=always, 1708 manual=manual, 1709 never=never, 1710 default=default, 1711 autotemp=autotemp, 1712 ) 1713 1714 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1715 no = self._match_text_seq("NO") 1716 concurrent = self._match_text_seq("CONCURRENT") 1717 self._match_text_seq("ISOLATED", "LOADING") 1718 for_all = self._match_text_seq("FOR", "ALL") 1719 for_insert = self._match_text_seq("FOR", "INSERT") 1720 for_none = self._match_text_seq("FOR", "NONE") 1721 return self.expression( 1722 exp.IsolatedLoadingProperty, 1723 no=no, 1724 concurrent=concurrent, 1725 for_all=for_all, 1726 for_insert=for_insert, 1727 for_none=for_none, 1728 ) 1729 1730 def _parse_locking(self) -> exp.LockingProperty: 1731 if self._match(TokenType.TABLE): 1732 kind = "TABLE" 1733 elif self._match(TokenType.VIEW): 1734 kind = "VIEW" 1735 elif self._match(TokenType.ROW): 1736 kind = "ROW" 1737 elif self._match_text_seq("DATABASE"): 1738 kind = "DATABASE" 1739 else: 1740 kind = None 1741 1742 if kind in ("DATABASE", "TABLE", "VIEW"): 1743 this = self._parse_table_parts() 1744 else: 1745 this = None 1746 1747 if self._match(TokenType.FOR): 1748 for_or_in = "FOR" 1749 elif self._match(TokenType.IN): 1750 for_or_in = "IN" 1751 else: 1752 for_or_in = None 1753 1754 if self._match_text_seq("ACCESS"): 1755 lock_type = "ACCESS" 1756 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1757 lock_type = "EXCLUSIVE" 1758 elif self._match_text_seq("SHARE"): 1759 lock_type = "SHARE" 1760 elif self._match_text_seq("READ"): 1761 lock_type = "READ" 1762 elif self._match_text_seq("WRITE"): 1763 lock_type = "WRITE" 1764 elif self._match_text_seq("CHECKSUM"): 1765 lock_type = "CHECKSUM" 1766 else: 1767 lock_type = None 1768 1769 override = self._match_text_seq("OVERRIDE") 1770 1771 return self.expression( 1772 exp.LockingProperty, 1773 this=this, 1774 kind=kind, 1775 for_or_in=for_or_in, 1776 lock_type=lock_type, 1777 override=override, 1778 ) 1779 1780 def _parse_partition_by(self) -> t.List[exp.Expression]: 1781 if self._match(TokenType.PARTITION_BY): 1782 return self._parse_csv(self._parse_conjunction) 1783 return [] 1784 1785 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1786 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1787 if self._match_text_seq("MINVALUE"): 1788 return exp.var("MINVALUE") 1789 if self._match_text_seq("MAXVALUE"): 1790 return exp.var("MAXVALUE") 1791 return self._parse_bitwise() 1792 1793 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1794 expression = None 1795 from_expressions = None 1796 to_expressions = None 1797 1798 if self._match(TokenType.IN): 1799 this = self._parse_wrapped_csv(self._parse_bitwise) 1800 elif self._match(TokenType.FROM): 1801 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1802 self._match_text_seq("TO") 1803 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1804 elif self._match_text_seq("WITH", "(", "MODULUS"): 1805 this = self._parse_number() 1806 self._match_text_seq(",", "REMAINDER") 1807 expression = self._parse_number() 1808 self._match_r_paren() 1809 else: 1810 self.raise_error("Failed to parse partition bound spec.") 1811 1812 return self.expression( 1813 exp.PartitionBoundSpec, 1814 this=this, 1815 expression=expression, 1816 from_expressions=from_expressions, 1817 to_expressions=to_expressions, 1818 ) 1819 1820 # https://www.postgresql.org/docs/current/sql-createtable.html 1821 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1822 if not self._match_text_seq("OF"): 1823 self._retreat(self._index - 1) 1824 return None 1825 1826 this = self._parse_table(schema=True) 1827 1828 if self._match(TokenType.DEFAULT): 1829 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1830 elif self._match_text_seq("FOR", "VALUES"): 1831 expression = self._parse_partition_bound_spec() 1832 else: 1833 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1834 1835 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1836 1837 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1838 self._match(TokenType.EQ) 1839 return self.expression( 1840 exp.PartitionedByProperty, 1841 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1842 ) 1843 1844 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1845 if self._match_text_seq("AND", "STATISTICS"): 1846 statistics = True 1847 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1848 statistics = False 1849 else: 1850 statistics = None 1851 1852 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1853 1854 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1855 if self._match_text_seq("PRIMARY", "INDEX"): 1856 return exp.NoPrimaryIndexProperty() 1857 return None 1858 1859 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1860 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1861 return exp.OnCommitProperty() 1862 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1863 return exp.OnCommitProperty(delete=True) 1864 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1865 1866 def _parse_distkey(self) -> exp.DistKeyProperty: 1867 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1868 1869 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1870 table = self._parse_table(schema=True) 1871 1872 options = [] 1873 while self._match_texts(("INCLUDING", "EXCLUDING")): 1874 this = self._prev.text.upper() 1875 1876 id_var = self._parse_id_var() 1877 if not id_var: 1878 return None 1879 1880 options.append( 1881 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1882 ) 1883 1884 return self.expression(exp.LikeProperty, this=table, expressions=options) 1885 1886 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1887 return self.expression( 1888 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1889 ) 1890 1891 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1892 self._match(TokenType.EQ) 1893 return self.expression( 1894 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1895 ) 1896 1897 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1898 self._match_text_seq("WITH", "CONNECTION") 1899 return self.expression( 1900 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1901 ) 1902 1903 def _parse_returns(self) -> exp.ReturnsProperty: 1904 value: t.Optional[exp.Expression] 1905 is_table = self._match(TokenType.TABLE) 1906 1907 if is_table: 1908 if self._match(TokenType.LT): 1909 value = self.expression( 1910 exp.Schema, 1911 this="TABLE", 1912 expressions=self._parse_csv(self._parse_struct_types), 1913 ) 1914 if not self._match(TokenType.GT): 1915 self.raise_error("Expecting >") 1916 else: 1917 value = self._parse_schema(exp.var("TABLE")) 1918 else: 1919 value = self._parse_types() 1920 1921 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1922 1923 def _parse_describe(self) -> exp.Describe: 1924 kind = self._match_set(self.CREATABLES) and self._prev.text 1925 this = self._parse_table(schema=True) 1926 properties = self._parse_properties() 1927 expressions = properties.expressions if properties else None 1928 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1929 1930 def _parse_insert(self) -> exp.Insert: 1931 comments = ensure_list(self._prev_comments) 1932 overwrite = self._match(TokenType.OVERWRITE) 1933 ignore = self._match(TokenType.IGNORE) 1934 local = self._match_text_seq("LOCAL") 1935 alternative = None 1936 1937 if self._match_text_seq("DIRECTORY"): 1938 this: t.Optional[exp.Expression] = self.expression( 1939 exp.Directory, 1940 this=self._parse_var_or_string(), 1941 local=local, 1942 row_format=self._parse_row_format(match_row=True), 1943 ) 1944 else: 1945 if self._match(TokenType.OR): 1946 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1947 1948 self._match(TokenType.INTO) 1949 comments += ensure_list(self._prev_comments) 1950 self._match(TokenType.TABLE) 1951 this = self._parse_table(schema=True) 1952 1953 returning = self._parse_returning() 1954 1955 return self.expression( 1956 exp.Insert, 1957 comments=comments, 1958 this=this, 1959 by_name=self._match_text_seq("BY", "NAME"), 1960 exists=self._parse_exists(), 1961 partition=self._parse_partition(), 1962 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1963 and self._parse_conjunction(), 1964 expression=self._parse_ddl_select(), 1965 conflict=self._parse_on_conflict(), 1966 returning=returning or self._parse_returning(), 1967 overwrite=overwrite, 1968 alternative=alternative, 1969 ignore=ignore, 1970 ) 1971 1972 def _parse_kill(self) -> exp.Kill: 1973 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1974 1975 return self.expression( 1976 exp.Kill, 1977 this=self._parse_primary(), 1978 kind=kind, 1979 ) 1980 1981 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1982 conflict = self._match_text_seq("ON", "CONFLICT") 1983 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1984 1985 if not conflict and not duplicate: 1986 return None 1987 1988 nothing = None 1989 expressions = None 1990 key = None 1991 constraint = None 1992 1993 if conflict: 1994 if self._match_text_seq("ON", "CONSTRAINT"): 1995 constraint = self._parse_id_var() 1996 else: 1997 key = self._parse_csv(self._parse_value) 1998 1999 self._match_text_seq("DO") 2000 if self._match_text_seq("NOTHING"): 2001 nothing = True 2002 else: 2003 self._match(TokenType.UPDATE) 2004 self._match(TokenType.SET) 2005 expressions = self._parse_csv(self._parse_equality) 2006 2007 return self.expression( 2008 exp.OnConflict, 2009 duplicate=duplicate, 2010 expressions=expressions, 2011 nothing=nothing, 2012 key=key, 2013 constraint=constraint, 2014 ) 2015 2016 def _parse_returning(self) -> t.Optional[exp.Returning]: 2017 if not self._match(TokenType.RETURNING): 2018 return None 2019 return self.expression( 2020 exp.Returning, 2021 expressions=self._parse_csv(self._parse_expression), 2022 into=self._match(TokenType.INTO) and self._parse_table_part(), 2023 ) 2024 2025 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2026 if not self._match(TokenType.FORMAT): 2027 return None 2028 return self._parse_row_format() 2029 2030 def _parse_row_format( 2031 self, match_row: bool = False 2032 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2033 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2034 return None 2035 2036 if self._match_text_seq("SERDE"): 2037 this = self._parse_string() 2038 2039 serde_properties = None 2040 if self._match(TokenType.SERDE_PROPERTIES): 2041 serde_properties = self.expression( 2042 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2043 ) 2044 2045 return self.expression( 2046 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2047 ) 2048 2049 self._match_text_seq("DELIMITED") 2050 2051 kwargs = {} 2052 2053 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2054 kwargs["fields"] = self._parse_string() 2055 if self._match_text_seq("ESCAPED", "BY"): 2056 kwargs["escaped"] = self._parse_string() 2057 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2058 kwargs["collection_items"] = self._parse_string() 2059 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2060 kwargs["map_keys"] = self._parse_string() 2061 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2062 kwargs["lines"] = self._parse_string() 2063 if self._match_text_seq("NULL", "DEFINED", "AS"): 2064 kwargs["null"] = self._parse_string() 2065 2066 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2067 2068 def _parse_load(self) -> exp.LoadData | exp.Command: 2069 if self._match_text_seq("DATA"): 2070 local = self._match_text_seq("LOCAL") 2071 self._match_text_seq("INPATH") 2072 inpath = self._parse_string() 2073 overwrite = self._match(TokenType.OVERWRITE) 2074 self._match_pair(TokenType.INTO, TokenType.TABLE) 2075 2076 return self.expression( 2077 exp.LoadData, 2078 this=self._parse_table(schema=True), 2079 local=local, 2080 overwrite=overwrite, 2081 inpath=inpath, 2082 partition=self._parse_partition(), 2083 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2084 serde=self._match_text_seq("SERDE") and self._parse_string(), 2085 ) 2086 return self._parse_as_command(self._prev) 2087 2088 def _parse_delete(self) -> exp.Delete: 2089 # This handles MySQL's "Multiple-Table Syntax" 2090 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2091 tables = None 2092 comments = self._prev_comments 2093 if not self._match(TokenType.FROM, advance=False): 2094 tables = self._parse_csv(self._parse_table) or None 2095 2096 returning = self._parse_returning() 2097 2098 return self.expression( 2099 exp.Delete, 2100 comments=comments, 2101 tables=tables, 2102 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2103 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2104 where=self._parse_where(), 2105 returning=returning or self._parse_returning(), 2106 limit=self._parse_limit(), 2107 ) 2108 2109 def _parse_update(self) -> exp.Update: 2110 comments = self._prev_comments 2111 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2112 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2113 returning = self._parse_returning() 2114 return self.expression( 2115 exp.Update, 2116 comments=comments, 2117 **{ # type: ignore 2118 "this": this, 2119 "expressions": expressions, 2120 "from": self._parse_from(joins=True), 2121 "where": self._parse_where(), 2122 "returning": returning or self._parse_returning(), 2123 "order": self._parse_order(), 2124 "limit": self._parse_limit(), 2125 }, 2126 ) 2127 2128 def _parse_uncache(self) -> exp.Uncache: 2129 if not self._match(TokenType.TABLE): 2130 self.raise_error("Expecting TABLE after UNCACHE") 2131 2132 return self.expression( 2133 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2134 ) 2135 2136 def _parse_cache(self) -> exp.Cache: 2137 lazy = self._match_text_seq("LAZY") 2138 self._match(TokenType.TABLE) 2139 table = self._parse_table(schema=True) 2140 2141 options = [] 2142 if self._match_text_seq("OPTIONS"): 2143 self._match_l_paren() 2144 k = self._parse_string() 2145 self._match(TokenType.EQ) 2146 v = self._parse_string() 2147 options = [k, v] 2148 self._match_r_paren() 2149 2150 self._match(TokenType.ALIAS) 2151 return self.expression( 2152 exp.Cache, 2153 this=table, 2154 lazy=lazy, 2155 options=options, 2156 expression=self._parse_select(nested=True), 2157 ) 2158 2159 def _parse_partition(self) -> t.Optional[exp.Partition]: 2160 if not self._match(TokenType.PARTITION): 2161 return None 2162 2163 return self.expression( 2164 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2165 ) 2166 2167 def _parse_value(self) -> exp.Tuple: 2168 if self._match(TokenType.L_PAREN): 2169 expressions = self._parse_csv(self._parse_conjunction) 2170 self._match_r_paren() 2171 return self.expression(exp.Tuple, expressions=expressions) 2172 2173 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2174 # https://prestodb.io/docs/current/sql/values.html 2175 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2176 2177 def _parse_projections(self) -> t.List[exp.Expression]: 2178 return self._parse_expressions() 2179 2180 def _parse_select( 2181 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2182 ) -> t.Optional[exp.Expression]: 2183 cte = self._parse_with() 2184 2185 if cte: 2186 this = self._parse_statement() 2187 2188 if not this: 2189 self.raise_error("Failed to parse any statement following CTE") 2190 return cte 2191 2192 if "with" in this.arg_types: 2193 this.set("with", cte) 2194 else: 2195 self.raise_error(f"{this.key} does not support CTE") 2196 this = cte 2197 2198 return this 2199 2200 # duckdb supports leading with FROM x 2201 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2202 2203 if self._match(TokenType.SELECT): 2204 comments = self._prev_comments 2205 2206 hint = self._parse_hint() 2207 all_ = self._match(TokenType.ALL) 2208 distinct = self._match_set(self.DISTINCT_TOKENS) 2209 2210 kind = ( 2211 self._match(TokenType.ALIAS) 2212 and self._match_texts(("STRUCT", "VALUE")) 2213 and self._prev.text 2214 ) 2215 2216 if distinct: 2217 distinct = self.expression( 2218 exp.Distinct, 2219 on=self._parse_value() if self._match(TokenType.ON) else None, 2220 ) 2221 2222 if all_ and distinct: 2223 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2224 2225 limit = self._parse_limit(top=True) 2226 projections = self._parse_projections() 2227 2228 this = self.expression( 2229 exp.Select, 2230 kind=kind, 2231 hint=hint, 2232 distinct=distinct, 2233 expressions=projections, 2234 limit=limit, 2235 ) 2236 this.comments = comments 2237 2238 into = self._parse_into() 2239 if into: 2240 this.set("into", into) 2241 2242 if not from_: 2243 from_ = self._parse_from() 2244 2245 if from_: 2246 this.set("from", from_) 2247 2248 this = self._parse_query_modifiers(this) 2249 elif (table or nested) and self._match(TokenType.L_PAREN): 2250 if self._match(TokenType.PIVOT): 2251 this = self._parse_simplified_pivot() 2252 elif self._match(TokenType.FROM): 2253 this = exp.select("*").from_( 2254 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2255 ) 2256 else: 2257 this = self._parse_table() if table else self._parse_select(nested=True) 2258 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2259 2260 self._match_r_paren() 2261 2262 # We return early here so that the UNION isn't attached to the subquery by the 2263 # following call to _parse_set_operations, but instead becomes the parent node 2264 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2265 elif self._match(TokenType.VALUES): 2266 this = self.expression( 2267 exp.Values, 2268 expressions=self._parse_csv(self._parse_value), 2269 alias=self._parse_table_alias(), 2270 ) 2271 elif from_: 2272 this = exp.select("*").from_(from_.this, copy=False) 2273 else: 2274 this = None 2275 2276 return self._parse_set_operations(this) 2277 2278 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2279 if not skip_with_token and not self._match(TokenType.WITH): 2280 return None 2281 2282 comments = self._prev_comments 2283 recursive = self._match(TokenType.RECURSIVE) 2284 2285 expressions = [] 2286 while True: 2287 expressions.append(self._parse_cte()) 2288 2289 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2290 break 2291 else: 2292 self._match(TokenType.WITH) 2293 2294 return self.expression( 2295 exp.With, comments=comments, expressions=expressions, recursive=recursive 2296 ) 2297 2298 def _parse_cte(self) -> exp.CTE: 2299 alias = self._parse_table_alias() 2300 if not alias or not alias.this: 2301 self.raise_error("Expected CTE to have alias") 2302 2303 self._match(TokenType.ALIAS) 2304 return self.expression( 2305 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2306 ) 2307 2308 def _parse_table_alias( 2309 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2310 ) -> t.Optional[exp.TableAlias]: 2311 any_token = self._match(TokenType.ALIAS) 2312 alias = ( 2313 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2314 or self._parse_string_as_identifier() 2315 ) 2316 2317 index = self._index 2318 if self._match(TokenType.L_PAREN): 2319 columns = self._parse_csv(self._parse_function_parameter) 2320 self._match_r_paren() if columns else self._retreat(index) 2321 else: 2322 columns = None 2323 2324 if not alias and not columns: 2325 return None 2326 2327 return self.expression(exp.TableAlias, this=alias, columns=columns) 2328 2329 def _parse_subquery( 2330 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2331 ) -> t.Optional[exp.Subquery]: 2332 if not this: 2333 return None 2334 2335 return self.expression( 2336 exp.Subquery, 2337 this=this, 2338 pivots=self._parse_pivots(), 2339 alias=self._parse_table_alias() if parse_alias else None, 2340 ) 2341 2342 def _parse_query_modifiers( 2343 self, this: t.Optional[exp.Expression] 2344 ) -> t.Optional[exp.Expression]: 2345 if isinstance(this, self.MODIFIABLES): 2346 for join in iter(self._parse_join, None): 2347 this.append("joins", join) 2348 for lateral in iter(self._parse_lateral, None): 2349 this.append("laterals", lateral) 2350 2351 while True: 2352 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2353 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2354 key, expression = parser(self) 2355 2356 if expression: 2357 this.set(key, expression) 2358 if key == "limit": 2359 offset = expression.args.pop("offset", None) 2360 if offset: 2361 this.set("offset", exp.Offset(expression=offset)) 2362 continue 2363 break 2364 return this 2365 2366 def _parse_hint(self) -> t.Optional[exp.Hint]: 2367 if self._match(TokenType.HINT): 2368 hints = [] 2369 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2370 hints.extend(hint) 2371 2372 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2373 self.raise_error("Expected */ after HINT") 2374 2375 return self.expression(exp.Hint, expressions=hints) 2376 2377 return None 2378 2379 def _parse_into(self) -> t.Optional[exp.Into]: 2380 if not self._match(TokenType.INTO): 2381 return None 2382 2383 temp = self._match(TokenType.TEMPORARY) 2384 unlogged = self._match_text_seq("UNLOGGED") 2385 self._match(TokenType.TABLE) 2386 2387 return self.expression( 2388 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2389 ) 2390 2391 def _parse_from( 2392 self, joins: bool = False, skip_from_token: bool = False 2393 ) -> t.Optional[exp.From]: 2394 if not skip_from_token and not self._match(TokenType.FROM): 2395 return None 2396 2397 return self.expression( 2398 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2399 ) 2400 2401 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2402 if not self._match(TokenType.MATCH_RECOGNIZE): 2403 return None 2404 2405 self._match_l_paren() 2406 2407 partition = self._parse_partition_by() 2408 order = self._parse_order() 2409 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2410 2411 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2412 rows = exp.var("ONE ROW PER MATCH") 2413 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2414 text = "ALL ROWS PER MATCH" 2415 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2416 text += f" SHOW EMPTY MATCHES" 2417 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2418 text += f" OMIT EMPTY MATCHES" 2419 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2420 text += f" WITH UNMATCHED ROWS" 2421 rows = exp.var(text) 2422 else: 2423 rows = None 2424 2425 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2426 text = "AFTER MATCH SKIP" 2427 if self._match_text_seq("PAST", "LAST", "ROW"): 2428 text += f" PAST LAST ROW" 2429 elif self._match_text_seq("TO", "NEXT", "ROW"): 2430 text += f" TO NEXT ROW" 2431 elif self._match_text_seq("TO", "FIRST"): 2432 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2433 elif self._match_text_seq("TO", "LAST"): 2434 text += f" TO LAST {self._advance_any().text}" # type: ignore 2435 after = exp.var(text) 2436 else: 2437 after = None 2438 2439 if self._match_text_seq("PATTERN"): 2440 self._match_l_paren() 2441 2442 if not self._curr: 2443 self.raise_error("Expecting )", self._curr) 2444 2445 paren = 1 2446 start = self._curr 2447 2448 while self._curr and paren > 0: 2449 if self._curr.token_type == TokenType.L_PAREN: 2450 paren += 1 2451 if self._curr.token_type == TokenType.R_PAREN: 2452 paren -= 1 2453 2454 end = self._prev 2455 self._advance() 2456 2457 if paren > 0: 2458 self.raise_error("Expecting )", self._curr) 2459 2460 pattern = exp.var(self._find_sql(start, end)) 2461 else: 2462 pattern = None 2463 2464 define = ( 2465 self._parse_csv( 2466 lambda: self.expression( 2467 exp.Alias, 2468 alias=self._parse_id_var(any_token=True), 2469 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2470 ) 2471 ) 2472 if self._match_text_seq("DEFINE") 2473 else None 2474 ) 2475 2476 self._match_r_paren() 2477 2478 return self.expression( 2479 exp.MatchRecognize, 2480 partition_by=partition, 2481 order=order, 2482 measures=measures, 2483 rows=rows, 2484 after=after, 2485 pattern=pattern, 2486 define=define, 2487 alias=self._parse_table_alias(), 2488 ) 2489 2490 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2491 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2492 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2493 2494 if outer_apply or cross_apply: 2495 this = self._parse_select(table=True) 2496 view = None 2497 outer = not cross_apply 2498 elif self._match(TokenType.LATERAL): 2499 this = self._parse_select(table=True) 2500 view = self._match(TokenType.VIEW) 2501 outer = self._match(TokenType.OUTER) 2502 else: 2503 return None 2504 2505 if not this: 2506 this = ( 2507 self._parse_unnest() 2508 or self._parse_function() 2509 or self._parse_id_var(any_token=False) 2510 ) 2511 2512 while self._match(TokenType.DOT): 2513 this = exp.Dot( 2514 this=this, 2515 expression=self._parse_function() or self._parse_id_var(any_token=False), 2516 ) 2517 2518 if view: 2519 table = self._parse_id_var(any_token=False) 2520 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2521 table_alias: t.Optional[exp.TableAlias] = self.expression( 2522 exp.TableAlias, this=table, columns=columns 2523 ) 2524 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2525 # We move the alias from the lateral's child node to the lateral itself 2526 table_alias = this.args["alias"].pop() 2527 else: 2528 table_alias = self._parse_table_alias() 2529 2530 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2531 2532 def _parse_join_parts( 2533 self, 2534 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2535 return ( 2536 self._match_set(self.JOIN_METHODS) and self._prev, 2537 self._match_set(self.JOIN_SIDES) and self._prev, 2538 self._match_set(self.JOIN_KINDS) and self._prev, 2539 ) 2540 2541 def _parse_join( 2542 self, skip_join_token: bool = False, parse_bracket: bool = False 2543 ) -> t.Optional[exp.Join]: 2544 if self._match(TokenType.COMMA): 2545 return self.expression(exp.Join, this=self._parse_table()) 2546 2547 index = self._index 2548 method, side, kind = self._parse_join_parts() 2549 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2550 join = self._match(TokenType.JOIN) 2551 2552 if not skip_join_token and not join: 2553 self._retreat(index) 2554 kind = None 2555 method = None 2556 side = None 2557 2558 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2559 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2560 2561 if not skip_join_token and not join and not outer_apply and not cross_apply: 2562 return None 2563 2564 if outer_apply: 2565 side = Token(TokenType.LEFT, "LEFT") 2566 2567 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2568 2569 if method: 2570 kwargs["method"] = method.text 2571 if side: 2572 kwargs["side"] = side.text 2573 if kind: 2574 kwargs["kind"] = kind.text 2575 if hint: 2576 kwargs["hint"] = hint 2577 2578 if self._match(TokenType.ON): 2579 kwargs["on"] = self._parse_conjunction() 2580 elif self._match(TokenType.USING): 2581 kwargs["using"] = self._parse_wrapped_id_vars() 2582 elif not (kind and kind.token_type == TokenType.CROSS): 2583 index = self._index 2584 join = self._parse_join() 2585 2586 if join and self._match(TokenType.ON): 2587 kwargs["on"] = self._parse_conjunction() 2588 elif join and self._match(TokenType.USING): 2589 kwargs["using"] = self._parse_wrapped_id_vars() 2590 else: 2591 join = None 2592 self._retreat(index) 2593 2594 kwargs["this"].set("joins", [join] if join else None) 2595 2596 comments = [c for token in (method, side, kind) if token for c in token.comments] 2597 return self.expression(exp.Join, comments=comments, **kwargs) 2598 2599 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2600 this = self._parse_conjunction() 2601 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2602 return this 2603 2604 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2605 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2606 2607 return this 2608 2609 def _parse_index( 2610 self, 2611 index: t.Optional[exp.Expression] = None, 2612 ) -> t.Optional[exp.Index]: 2613 if index: 2614 unique = None 2615 primary = None 2616 amp = None 2617 2618 self._match(TokenType.ON) 2619 self._match(TokenType.TABLE) # hive 2620 table = self._parse_table_parts(schema=True) 2621 else: 2622 unique = self._match(TokenType.UNIQUE) 2623 primary = self._match_text_seq("PRIMARY") 2624 amp = self._match_text_seq("AMP") 2625 2626 if not self._match(TokenType.INDEX): 2627 return None 2628 2629 index = self._parse_id_var() 2630 table = None 2631 2632 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2633 2634 if self._match(TokenType.L_PAREN, advance=False): 2635 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2636 else: 2637 columns = None 2638 2639 return self.expression( 2640 exp.Index, 2641 this=index, 2642 table=table, 2643 using=using, 2644 columns=columns, 2645 unique=unique, 2646 primary=primary, 2647 amp=amp, 2648 partition_by=self._parse_partition_by(), 2649 where=self._parse_where(), 2650 ) 2651 2652 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2653 hints: t.List[exp.Expression] = [] 2654 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2655 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2656 hints.append( 2657 self.expression( 2658 exp.WithTableHint, 2659 expressions=self._parse_csv( 2660 lambda: self._parse_function() or self._parse_var(any_token=True) 2661 ), 2662 ) 2663 ) 2664 self._match_r_paren() 2665 else: 2666 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2667 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2668 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2669 2670 self._match_texts(("INDEX", "KEY")) 2671 if self._match(TokenType.FOR): 2672 hint.set("target", self._advance_any() and self._prev.text.upper()) 2673 2674 hint.set("expressions", self._parse_wrapped_id_vars()) 2675 hints.append(hint) 2676 2677 return hints or None 2678 2679 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2680 return ( 2681 (not schema and self._parse_function(optional_parens=False)) 2682 or self._parse_id_var(any_token=False) 2683 or self._parse_string_as_identifier() 2684 or self._parse_placeholder() 2685 ) 2686 2687 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2688 catalog = None 2689 db = None 2690 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2691 2692 while self._match(TokenType.DOT): 2693 if catalog: 2694 # This allows nesting the table in arbitrarily many dot expressions if needed 2695 table = self.expression( 2696 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2697 ) 2698 else: 2699 catalog = db 2700 db = table 2701 table = self._parse_table_part(schema=schema) or "" 2702 2703 if not table: 2704 self.raise_error(f"Expected table name but got {self._curr}") 2705 2706 return self.expression( 2707 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2708 ) 2709 2710 def _parse_table( 2711 self, 2712 schema: bool = False, 2713 joins: bool = False, 2714 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2715 parse_bracket: bool = False, 2716 ) -> t.Optional[exp.Expression]: 2717 lateral = self._parse_lateral() 2718 if lateral: 2719 return lateral 2720 2721 unnest = self._parse_unnest() 2722 if unnest: 2723 return unnest 2724 2725 values = self._parse_derived_table_values() 2726 if values: 2727 return values 2728 2729 subquery = self._parse_select(table=True) 2730 if subquery: 2731 if not subquery.args.get("pivots"): 2732 subquery.set("pivots", self._parse_pivots()) 2733 return subquery 2734 2735 bracket = parse_bracket and self._parse_bracket(None) 2736 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2737 this = t.cast( 2738 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2739 ) 2740 2741 if schema: 2742 return self._parse_schema(this=this) 2743 2744 version = self._parse_version() 2745 2746 if version: 2747 this.set("version", version) 2748 2749 if self.ALIAS_POST_TABLESAMPLE: 2750 table_sample = self._parse_table_sample() 2751 2752 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2753 if alias: 2754 this.set("alias", alias) 2755 2756 if self._match_text_seq("AT"): 2757 this.set("index", self._parse_id_var()) 2758 2759 this.set("hints", self._parse_table_hints()) 2760 2761 if not this.args.get("pivots"): 2762 this.set("pivots", self._parse_pivots()) 2763 2764 if not self.ALIAS_POST_TABLESAMPLE: 2765 table_sample = self._parse_table_sample() 2766 2767 if table_sample: 2768 table_sample.set("this", this) 2769 this = table_sample 2770 2771 if joins: 2772 for join in iter(self._parse_join, None): 2773 this.append("joins", join) 2774 2775 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2776 this.set("ordinality", True) 2777 this.set("alias", self._parse_table_alias()) 2778 2779 return this 2780 2781 def _parse_version(self) -> t.Optional[exp.Version]: 2782 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2783 this = "TIMESTAMP" 2784 elif self._match(TokenType.VERSION_SNAPSHOT): 2785 this = "VERSION" 2786 else: 2787 return None 2788 2789 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2790 kind = self._prev.text.upper() 2791 start = self._parse_bitwise() 2792 self._match_texts(("TO", "AND")) 2793 end = self._parse_bitwise() 2794 expression: t.Optional[exp.Expression] = self.expression( 2795 exp.Tuple, expressions=[start, end] 2796 ) 2797 elif self._match_text_seq("CONTAINED", "IN"): 2798 kind = "CONTAINED IN" 2799 expression = self.expression( 2800 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2801 ) 2802 elif self._match(TokenType.ALL): 2803 kind = "ALL" 2804 expression = None 2805 else: 2806 self._match_text_seq("AS", "OF") 2807 kind = "AS OF" 2808 expression = self._parse_type() 2809 2810 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2811 2812 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2813 if not self._match(TokenType.UNNEST): 2814 return None 2815 2816 expressions = self._parse_wrapped_csv(self._parse_equality) 2817 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2818 2819 alias = self._parse_table_alias() if with_alias else None 2820 2821 if alias: 2822 if self.UNNEST_COLUMN_ONLY: 2823 if alias.args.get("columns"): 2824 self.raise_error("Unexpected extra column alias in unnest.") 2825 2826 alias.set("columns", [alias.this]) 2827 alias.set("this", None) 2828 2829 columns = alias.args.get("columns") or [] 2830 if offset and len(expressions) < len(columns): 2831 offset = columns.pop() 2832 2833 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2834 self._match(TokenType.ALIAS) 2835 offset = self._parse_id_var( 2836 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2837 ) or exp.to_identifier("offset") 2838 2839 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2840 2841 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2842 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2843 if not is_derived and not self._match(TokenType.VALUES): 2844 return None 2845 2846 expressions = self._parse_csv(self._parse_value) 2847 alias = self._parse_table_alias() 2848 2849 if is_derived: 2850 self._match_r_paren() 2851 2852 return self.expression( 2853 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2854 ) 2855 2856 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2857 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2858 as_modifier and self._match_text_seq("USING", "SAMPLE") 2859 ): 2860 return None 2861 2862 bucket_numerator = None 2863 bucket_denominator = None 2864 bucket_field = None 2865 percent = None 2866 rows = None 2867 size = None 2868 seed = None 2869 2870 kind = ( 2871 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2872 ) 2873 method = self._parse_var(tokens=(TokenType.ROW,)) 2874 2875 matched_l_paren = self._match(TokenType.L_PAREN) 2876 2877 if self.TABLESAMPLE_CSV: 2878 num = None 2879 expressions = self._parse_csv(self._parse_primary) 2880 else: 2881 expressions = None 2882 num = ( 2883 self._parse_factor() 2884 if self._match(TokenType.NUMBER, advance=False) 2885 else self._parse_primary() or self._parse_placeholder() 2886 ) 2887 2888 if self._match_text_seq("BUCKET"): 2889 bucket_numerator = self._parse_number() 2890 self._match_text_seq("OUT", "OF") 2891 bucket_denominator = bucket_denominator = self._parse_number() 2892 self._match(TokenType.ON) 2893 bucket_field = self._parse_field() 2894 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2895 percent = num 2896 elif self._match(TokenType.ROWS): 2897 rows = num 2898 elif num: 2899 size = num 2900 2901 if matched_l_paren: 2902 self._match_r_paren() 2903 2904 if self._match(TokenType.L_PAREN): 2905 method = self._parse_var() 2906 seed = self._match(TokenType.COMMA) and self._parse_number() 2907 self._match_r_paren() 2908 elif self._match_texts(("SEED", "REPEATABLE")): 2909 seed = self._parse_wrapped(self._parse_number) 2910 2911 return self.expression( 2912 exp.TableSample, 2913 expressions=expressions, 2914 method=method, 2915 bucket_numerator=bucket_numerator, 2916 bucket_denominator=bucket_denominator, 2917 bucket_field=bucket_field, 2918 percent=percent, 2919 rows=rows, 2920 size=size, 2921 seed=seed, 2922 kind=kind, 2923 ) 2924 2925 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2926 return list(iter(self._parse_pivot, None)) or None 2927 2928 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2929 return list(iter(self._parse_join, None)) or None 2930 2931 # https://duckdb.org/docs/sql/statements/pivot 2932 def _parse_simplified_pivot(self) -> exp.Pivot: 2933 def _parse_on() -> t.Optional[exp.Expression]: 2934 this = self._parse_bitwise() 2935 return self._parse_in(this) if self._match(TokenType.IN) else this 2936 2937 this = self._parse_table() 2938 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2939 using = self._match(TokenType.USING) and self._parse_csv( 2940 lambda: self._parse_alias(self._parse_function()) 2941 ) 2942 group = self._parse_group() 2943 return self.expression( 2944 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2945 ) 2946 2947 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2948 index = self._index 2949 include_nulls = None 2950 2951 if self._match(TokenType.PIVOT): 2952 unpivot = False 2953 elif self._match(TokenType.UNPIVOT): 2954 unpivot = True 2955 2956 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2957 if self._match_text_seq("INCLUDE", "NULLS"): 2958 include_nulls = True 2959 elif self._match_text_seq("EXCLUDE", "NULLS"): 2960 include_nulls = False 2961 else: 2962 return None 2963 2964 expressions = [] 2965 field = None 2966 2967 if not self._match(TokenType.L_PAREN): 2968 self._retreat(index) 2969 return None 2970 2971 if unpivot: 2972 expressions = self._parse_csv(self._parse_column) 2973 else: 2974 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2975 2976 if not expressions: 2977 self.raise_error("Failed to parse PIVOT's aggregation list") 2978 2979 if not self._match(TokenType.FOR): 2980 self.raise_error("Expecting FOR") 2981 2982 value = self._parse_column() 2983 2984 if not self._match(TokenType.IN): 2985 self.raise_error("Expecting IN") 2986 2987 field = self._parse_in(value, alias=True) 2988 2989 self._match_r_paren() 2990 2991 pivot = self.expression( 2992 exp.Pivot, 2993 expressions=expressions, 2994 field=field, 2995 unpivot=unpivot, 2996 include_nulls=include_nulls, 2997 ) 2998 2999 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3000 pivot.set("alias", self._parse_table_alias()) 3001 3002 if not unpivot: 3003 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3004 3005 columns: t.List[exp.Expression] = [] 3006 for fld in pivot.args["field"].expressions: 3007 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3008 for name in names: 3009 if self.PREFIXED_PIVOT_COLUMNS: 3010 name = f"{name}_{field_name}" if name else field_name 3011 else: 3012 name = f"{field_name}_{name}" if name else field_name 3013 3014 columns.append(exp.to_identifier(name)) 3015 3016 pivot.set("columns", columns) 3017 3018 return pivot 3019 3020 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3021 return [agg.alias for agg in aggregations] 3022 3023 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3024 if not skip_where_token and not self._match(TokenType.WHERE): 3025 return None 3026 3027 return self.expression( 3028 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3029 ) 3030 3031 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3032 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3033 return None 3034 3035 elements = defaultdict(list) 3036 3037 if self._match(TokenType.ALL): 3038 return self.expression(exp.Group, all=True) 3039 3040 while True: 3041 expressions = self._parse_csv(self._parse_conjunction) 3042 if expressions: 3043 elements["expressions"].extend(expressions) 3044 3045 grouping_sets = self._parse_grouping_sets() 3046 if grouping_sets: 3047 elements["grouping_sets"].extend(grouping_sets) 3048 3049 rollup = None 3050 cube = None 3051 totals = None 3052 3053 index = self._index 3054 with_ = self._match(TokenType.WITH) 3055 if self._match(TokenType.ROLLUP): 3056 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3057 elements["rollup"].extend(ensure_list(rollup)) 3058 3059 if self._match(TokenType.CUBE): 3060 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3061 elements["cube"].extend(ensure_list(cube)) 3062 3063 if self._match_text_seq("TOTALS"): 3064 totals = True 3065 elements["totals"] = True # type: ignore 3066 3067 if not (grouping_sets or rollup or cube or totals): 3068 if with_: 3069 self._retreat(index) 3070 break 3071 3072 return self.expression(exp.Group, **elements) # type: ignore 3073 3074 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3075 if not self._match(TokenType.GROUPING_SETS): 3076 return None 3077 3078 return self._parse_wrapped_csv(self._parse_grouping_set) 3079 3080 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3081 if self._match(TokenType.L_PAREN): 3082 grouping_set = self._parse_csv(self._parse_column) 3083 self._match_r_paren() 3084 return self.expression(exp.Tuple, expressions=grouping_set) 3085 3086 return self._parse_column() 3087 3088 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3089 if not skip_having_token and not self._match(TokenType.HAVING): 3090 return None 3091 return self.expression(exp.Having, this=self._parse_conjunction()) 3092 3093 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3094 if not self._match(TokenType.QUALIFY): 3095 return None 3096 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3097 3098 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3099 if skip_start_token: 3100 start = None 3101 elif self._match(TokenType.START_WITH): 3102 start = self._parse_conjunction() 3103 else: 3104 return None 3105 3106 self._match(TokenType.CONNECT_BY) 3107 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3108 exp.Prior, this=self._parse_bitwise() 3109 ) 3110 connect = self._parse_conjunction() 3111 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3112 3113 if not start and self._match(TokenType.START_WITH): 3114 start = self._parse_conjunction() 3115 3116 return self.expression(exp.Connect, start=start, connect=connect) 3117 3118 def _parse_order( 3119 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3120 ) -> t.Optional[exp.Expression]: 3121 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3122 return this 3123 3124 return self.expression( 3125 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3126 ) 3127 3128 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3129 if not self._match(token): 3130 return None 3131 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3132 3133 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3134 this = parse_method() if parse_method else self._parse_conjunction() 3135 3136 asc = self._match(TokenType.ASC) 3137 desc = self._match(TokenType.DESC) or (asc and False) 3138 3139 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3140 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3141 3142 nulls_first = is_nulls_first or False 3143 explicitly_null_ordered = is_nulls_first or is_nulls_last 3144 3145 if ( 3146 not explicitly_null_ordered 3147 and ( 3148 (not desc and self.NULL_ORDERING == "nulls_are_small") 3149 or (desc and self.NULL_ORDERING != "nulls_are_small") 3150 ) 3151 and self.NULL_ORDERING != "nulls_are_last" 3152 ): 3153 nulls_first = True 3154 3155 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3156 3157 def _parse_limit( 3158 self, this: t.Optional[exp.Expression] = None, top: bool = False 3159 ) -> t.Optional[exp.Expression]: 3160 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3161 comments = self._prev_comments 3162 if top: 3163 limit_paren = self._match(TokenType.L_PAREN) 3164 expression = self._parse_term() if limit_paren else self._parse_number() 3165 3166 if limit_paren: 3167 self._match_r_paren() 3168 else: 3169 expression = self._parse_term() 3170 3171 if self._match(TokenType.COMMA): 3172 offset = expression 3173 expression = self._parse_term() 3174 else: 3175 offset = None 3176 3177 limit_exp = self.expression( 3178 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3179 ) 3180 3181 return limit_exp 3182 3183 if self._match(TokenType.FETCH): 3184 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3185 direction = self._prev.text if direction else "FIRST" 3186 3187 count = self._parse_field(tokens=self.FETCH_TOKENS) 3188 percent = self._match(TokenType.PERCENT) 3189 3190 self._match_set((TokenType.ROW, TokenType.ROWS)) 3191 3192 only = self._match_text_seq("ONLY") 3193 with_ties = self._match_text_seq("WITH", "TIES") 3194 3195 if only and with_ties: 3196 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3197 3198 return self.expression( 3199 exp.Fetch, 3200 direction=direction, 3201 count=count, 3202 percent=percent, 3203 with_ties=with_ties, 3204 ) 3205 3206 return this 3207 3208 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3209 if not self._match(TokenType.OFFSET): 3210 return this 3211 3212 count = self._parse_term() 3213 self._match_set((TokenType.ROW, TokenType.ROWS)) 3214 return self.expression(exp.Offset, this=this, expression=count) 3215 3216 def _parse_locks(self) -> t.List[exp.Lock]: 3217 locks = [] 3218 while True: 3219 if self._match_text_seq("FOR", "UPDATE"): 3220 update = True 3221 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3222 "LOCK", "IN", "SHARE", "MODE" 3223 ): 3224 update = False 3225 else: 3226 break 3227 3228 expressions = None 3229 if self._match_text_seq("OF"): 3230 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3231 3232 wait: t.Optional[bool | exp.Expression] = None 3233 if self._match_text_seq("NOWAIT"): 3234 wait = True 3235 elif self._match_text_seq("WAIT"): 3236 wait = self._parse_primary() 3237 elif self._match_text_seq("SKIP", "LOCKED"): 3238 wait = False 3239 3240 locks.append( 3241 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3242 ) 3243 3244 return locks 3245 3246 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3247 if not self._match_set(self.SET_OPERATIONS): 3248 return this 3249 3250 token_type = self._prev.token_type 3251 3252 if token_type == TokenType.UNION: 3253 expression = exp.Union 3254 elif token_type == TokenType.EXCEPT: 3255 expression = exp.Except 3256 else: 3257 expression = exp.Intersect 3258 3259 return self.expression( 3260 expression, 3261 comments=self._prev.comments, 3262 this=this, 3263 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3264 by_name=self._match_text_seq("BY", "NAME"), 3265 expression=self._parse_set_operations(self._parse_select(nested=True)), 3266 ) 3267 3268 def _parse_expression(self) -> t.Optional[exp.Expression]: 3269 return self._parse_alias(self._parse_conjunction()) 3270 3271 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3272 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3273 3274 def _parse_equality(self) -> t.Optional[exp.Expression]: 3275 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3276 3277 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3278 return self._parse_tokens(self._parse_range, self.COMPARISON) 3279 3280 def _parse_range(self) -> t.Optional[exp.Expression]: 3281 this = self._parse_bitwise() 3282 negate = self._match(TokenType.NOT) 3283 3284 if self._match_set(self.RANGE_PARSERS): 3285 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3286 if not expression: 3287 return this 3288 3289 this = expression 3290 elif self._match(TokenType.ISNULL): 3291 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3292 3293 # Postgres supports ISNULL and NOTNULL for conditions. 3294 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3295 if self._match(TokenType.NOTNULL): 3296 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3297 this = self.expression(exp.Not, this=this) 3298 3299 if negate: 3300 this = self.expression(exp.Not, this=this) 3301 3302 if self._match(TokenType.IS): 3303 this = self._parse_is(this) 3304 3305 return this 3306 3307 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3308 index = self._index - 1 3309 negate = self._match(TokenType.NOT) 3310 3311 if self._match_text_seq("DISTINCT", "FROM"): 3312 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3313 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3314 3315 expression = self._parse_null() or self._parse_boolean() 3316 if not expression: 3317 self._retreat(index) 3318 return None 3319 3320 this = self.expression(exp.Is, this=this, expression=expression) 3321 return self.expression(exp.Not, this=this) if negate else this 3322 3323 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3324 unnest = self._parse_unnest(with_alias=False) 3325 if unnest: 3326 this = self.expression(exp.In, this=this, unnest=unnest) 3327 elif self._match(TokenType.L_PAREN): 3328 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3329 3330 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3331 this = self.expression(exp.In, this=this, query=expressions[0]) 3332 else: 3333 this = self.expression(exp.In, this=this, expressions=expressions) 3334 3335 self._match_r_paren(this) 3336 else: 3337 this = self.expression(exp.In, this=this, field=self._parse_field()) 3338 3339 return this 3340 3341 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3342 low = self._parse_bitwise() 3343 self._match(TokenType.AND) 3344 high = self._parse_bitwise() 3345 return self.expression(exp.Between, this=this, low=low, high=high) 3346 3347 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3348 if not self._match(TokenType.ESCAPE): 3349 return this 3350 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3351 3352 def _parse_interval(self) -> t.Optional[exp.Interval]: 3353 index = self._index 3354 3355 if not self._match(TokenType.INTERVAL): 3356 return None 3357 3358 if self._match(TokenType.STRING, advance=False): 3359 this = self._parse_primary() 3360 else: 3361 this = self._parse_term() 3362 3363 if not this: 3364 self._retreat(index) 3365 return None 3366 3367 unit = self._parse_function() or self._parse_var(any_token=True) 3368 3369 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3370 # each INTERVAL expression into this canonical form so it's easy to transpile 3371 if this and this.is_number: 3372 this = exp.Literal.string(this.name) 3373 elif this and this.is_string: 3374 parts = this.name.split() 3375 3376 if len(parts) == 2: 3377 if unit: 3378 # This is not actually a unit, it's something else (e.g. a "window side") 3379 unit = None 3380 self._retreat(self._index - 1) 3381 3382 this = exp.Literal.string(parts[0]) 3383 unit = self.expression(exp.Var, this=parts[1]) 3384 3385 return self.expression(exp.Interval, this=this, unit=unit) 3386 3387 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3388 this = self._parse_term() 3389 3390 while True: 3391 if self._match_set(self.BITWISE): 3392 this = self.expression( 3393 self.BITWISE[self._prev.token_type], 3394 this=this, 3395 expression=self._parse_term(), 3396 ) 3397 elif self.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3398 this = self.expression( 3399 exp.DPipe, 3400 this=this, 3401 expression=self._parse_term(), 3402 safe=not self.STRICT_STRING_CONCAT, 3403 ) 3404 elif self._match(TokenType.DQMARK): 3405 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3406 elif self._match_pair(TokenType.LT, TokenType.LT): 3407 this = self.expression( 3408 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3409 ) 3410 elif self._match_pair(TokenType.GT, TokenType.GT): 3411 this = self.expression( 3412 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3413 ) 3414 else: 3415 break 3416 3417 return this 3418 3419 def _parse_term(self) -> t.Optional[exp.Expression]: 3420 return self._parse_tokens(self._parse_factor, self.TERM) 3421 3422 def _parse_factor(self) -> t.Optional[exp.Expression]: 3423 if self.EXPONENT: 3424 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3425 else: 3426 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3427 if isinstance(factor, exp.Div): 3428 factor.args["typed"] = self.TYPED_DIVISION 3429 factor.args["safe"] = self.SAFE_DIVISION 3430 return factor 3431 3432 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3433 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3434 3435 def _parse_unary(self) -> t.Optional[exp.Expression]: 3436 if self._match_set(self.UNARY_PARSERS): 3437 return self.UNARY_PARSERS[self._prev.token_type](self) 3438 return self._parse_at_time_zone(self._parse_type()) 3439 3440 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3441 interval = parse_interval and self._parse_interval() 3442 if interval: 3443 return interval 3444 3445 index = self._index 3446 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3447 this = self._parse_column() 3448 3449 if data_type: 3450 if isinstance(this, exp.Literal): 3451 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3452 if parser: 3453 return parser(self, this, data_type) 3454 return self.expression(exp.Cast, this=this, to=data_type) 3455 if not data_type.expressions: 3456 self._retreat(index) 3457 return self._parse_column() 3458 return self._parse_column_ops(data_type) 3459 3460 return this and self._parse_column_ops(this) 3461 3462 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3463 this = self._parse_type() 3464 if not this: 3465 return None 3466 3467 return self.expression( 3468 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3469 ) 3470 3471 def _parse_types( 3472 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3473 ) -> t.Optional[exp.Expression]: 3474 index = self._index 3475 3476 prefix = self._match_text_seq("SYSUDTLIB", ".") 3477 3478 if not self._match_set(self.TYPE_TOKENS): 3479 identifier = allow_identifiers and self._parse_id_var( 3480 any_token=False, tokens=(TokenType.VAR,) 3481 ) 3482 3483 if identifier: 3484 tokens = self._tokenizer.tokenize(identifier.name) 3485 3486 if len(tokens) != 1: 3487 self.raise_error("Unexpected identifier", self._prev) 3488 3489 if tokens[0].token_type in self.TYPE_TOKENS: 3490 self._prev = tokens[0] 3491 elif self.SUPPORTS_USER_DEFINED_TYPES: 3492 type_name = identifier.name 3493 3494 while self._match(TokenType.DOT): 3495 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3496 3497 return exp.DataType.build(type_name, udt=True) 3498 else: 3499 return None 3500 else: 3501 return None 3502 3503 type_token = self._prev.token_type 3504 3505 if type_token == TokenType.PSEUDO_TYPE: 3506 return self.expression(exp.PseudoType, this=self._prev.text) 3507 3508 if type_token == TokenType.OBJECT_IDENTIFIER: 3509 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3510 3511 nested = type_token in self.NESTED_TYPE_TOKENS 3512 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3513 expressions = None 3514 maybe_func = False 3515 3516 if self._match(TokenType.L_PAREN): 3517 if is_struct: 3518 expressions = self._parse_csv(self._parse_struct_types) 3519 elif nested: 3520 expressions = self._parse_csv( 3521 lambda: self._parse_types( 3522 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3523 ) 3524 ) 3525 elif type_token in self.ENUM_TYPE_TOKENS: 3526 expressions = self._parse_csv(self._parse_equality) 3527 else: 3528 expressions = self._parse_csv(self._parse_type_size) 3529 3530 if not expressions or not self._match(TokenType.R_PAREN): 3531 self._retreat(index) 3532 return None 3533 3534 maybe_func = True 3535 3536 this: t.Optional[exp.Expression] = None 3537 values: t.Optional[t.List[exp.Expression]] = None 3538 3539 if nested and self._match(TokenType.LT): 3540 if is_struct: 3541 expressions = self._parse_csv(self._parse_struct_types) 3542 else: 3543 expressions = self._parse_csv( 3544 lambda: self._parse_types( 3545 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3546 ) 3547 ) 3548 3549 if not self._match(TokenType.GT): 3550 self.raise_error("Expecting >") 3551 3552 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3553 values = self._parse_csv(self._parse_conjunction) 3554 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3555 3556 if type_token in self.TIMESTAMPS: 3557 if self._match_text_seq("WITH", "TIME", "ZONE"): 3558 maybe_func = False 3559 tz_type = ( 3560 exp.DataType.Type.TIMETZ 3561 if type_token in self.TIMES 3562 else exp.DataType.Type.TIMESTAMPTZ 3563 ) 3564 this = exp.DataType(this=tz_type, expressions=expressions) 3565 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3566 maybe_func = False 3567 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3568 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3569 maybe_func = False 3570 elif type_token == TokenType.INTERVAL: 3571 unit = self._parse_var() 3572 3573 if self._match_text_seq("TO"): 3574 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3575 else: 3576 span = None 3577 3578 if span or not unit: 3579 this = self.expression( 3580 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3581 ) 3582 else: 3583 this = self.expression(exp.Interval, unit=unit) 3584 3585 if maybe_func and check_func: 3586 index2 = self._index 3587 peek = self._parse_string() 3588 3589 if not peek: 3590 self._retreat(index) 3591 return None 3592 3593 self._retreat(index2) 3594 3595 if not this: 3596 if self._match_text_seq("UNSIGNED"): 3597 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3598 if not unsigned_type_token: 3599 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3600 3601 type_token = unsigned_type_token or type_token 3602 3603 this = exp.DataType( 3604 this=exp.DataType.Type[type_token.value], 3605 expressions=expressions, 3606 nested=nested, 3607 values=values, 3608 prefix=prefix, 3609 ) 3610 3611 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3612 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3613 3614 return this 3615 3616 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3617 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3618 self._match(TokenType.COLON) 3619 return self._parse_column_def(this) 3620 3621 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3622 if not self._match_text_seq("AT", "TIME", "ZONE"): 3623 return this 3624 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3625 3626 def _parse_column(self) -> t.Optional[exp.Expression]: 3627 this = self._parse_field() 3628 if isinstance(this, exp.Identifier): 3629 this = self.expression(exp.Column, this=this) 3630 elif not this: 3631 return self._parse_bracket(this) 3632 return self._parse_column_ops(this) 3633 3634 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3635 this = self._parse_bracket(this) 3636 3637 while self._match_set(self.COLUMN_OPERATORS): 3638 op_token = self._prev.token_type 3639 op = self.COLUMN_OPERATORS.get(op_token) 3640 3641 if op_token == TokenType.DCOLON: 3642 field = self._parse_types() 3643 if not field: 3644 self.raise_error("Expected type") 3645 elif op and self._curr: 3646 self._advance() 3647 value = self._prev.text 3648 field = ( 3649 exp.Literal.number(value) 3650 if self._prev.token_type == TokenType.NUMBER 3651 else exp.Literal.string(value) 3652 ) 3653 else: 3654 field = self._parse_field(anonymous_func=True, any_token=True) 3655 3656 if isinstance(field, exp.Func): 3657 # bigquery allows function calls like x.y.count(...) 3658 # SAFE.SUBSTR(...) 3659 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3660 this = self._replace_columns_with_dots(this) 3661 3662 if op: 3663 this = op(self, this, field) 3664 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3665 this = self.expression( 3666 exp.Column, 3667 this=field, 3668 table=this.this, 3669 db=this.args.get("table"), 3670 catalog=this.args.get("db"), 3671 ) 3672 else: 3673 this = self.expression(exp.Dot, this=this, expression=field) 3674 this = self._parse_bracket(this) 3675 return this 3676 3677 def _parse_primary(self) -> t.Optional[exp.Expression]: 3678 if self._match_set(self.PRIMARY_PARSERS): 3679 token_type = self._prev.token_type 3680 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3681 3682 if token_type == TokenType.STRING: 3683 expressions = [primary] 3684 while self._match(TokenType.STRING): 3685 expressions.append(exp.Literal.string(self._prev.text)) 3686 3687 if len(expressions) > 1: 3688 return self.expression(exp.Concat, expressions=expressions) 3689 3690 return primary 3691 3692 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3693 return exp.Literal.number(f"0.{self._prev.text}") 3694 3695 if self._match(TokenType.L_PAREN): 3696 comments = self._prev_comments 3697 query = self._parse_select() 3698 3699 if query: 3700 expressions = [query] 3701 else: 3702 expressions = self._parse_expressions() 3703 3704 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3705 3706 if isinstance(this, exp.Subqueryable): 3707 this = self._parse_set_operations( 3708 self._parse_subquery(this=this, parse_alias=False) 3709 ) 3710 elif len(expressions) > 1: 3711 this = self.expression(exp.Tuple, expressions=expressions) 3712 else: 3713 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3714 3715 if this: 3716 this.add_comments(comments) 3717 3718 self._match_r_paren(expression=this) 3719 return this 3720 3721 return None 3722 3723 def _parse_field( 3724 self, 3725 any_token: bool = False, 3726 tokens: t.Optional[t.Collection[TokenType]] = None, 3727 anonymous_func: bool = False, 3728 ) -> t.Optional[exp.Expression]: 3729 return ( 3730 self._parse_primary() 3731 or self._parse_function(anonymous=anonymous_func) 3732 or self._parse_id_var(any_token=any_token, tokens=tokens) 3733 ) 3734 3735 def _parse_function( 3736 self, 3737 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3738 anonymous: bool = False, 3739 optional_parens: bool = True, 3740 ) -> t.Optional[exp.Expression]: 3741 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3742 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3743 fn_syntax = False 3744 if ( 3745 self._match(TokenType.L_BRACE, advance=False) 3746 and self._next 3747 and self._next.text.upper() == "FN" 3748 ): 3749 self._advance(2) 3750 fn_syntax = True 3751 3752 func = self._parse_function_call( 3753 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3754 ) 3755 3756 if fn_syntax: 3757 self._match(TokenType.R_BRACE) 3758 3759 return func 3760 3761 def _parse_function_call( 3762 self, 3763 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3764 anonymous: bool = False, 3765 optional_parens: bool = True, 3766 ) -> t.Optional[exp.Expression]: 3767 if not self._curr: 3768 return None 3769 3770 comments = self._curr.comments 3771 token_type = self._curr.token_type 3772 this = self._curr.text 3773 upper = this.upper() 3774 3775 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3776 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3777 self._advance() 3778 return parser(self) 3779 3780 if not self._next or self._next.token_type != TokenType.L_PAREN: 3781 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3782 self._advance() 3783 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3784 3785 return None 3786 3787 if token_type not in self.FUNC_TOKENS: 3788 return None 3789 3790 self._advance(2) 3791 3792 parser = self.FUNCTION_PARSERS.get(upper) 3793 if parser and not anonymous: 3794 this = parser(self) 3795 else: 3796 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3797 3798 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3799 this = self.expression(subquery_predicate, this=self._parse_select()) 3800 self._match_r_paren() 3801 return this 3802 3803 if functions is None: 3804 functions = self.FUNCTIONS 3805 3806 function = functions.get(upper) 3807 3808 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3809 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3810 3811 if function and not anonymous: 3812 func = self.validate_expression(function(args), args) 3813 if not self.NORMALIZE_FUNCTIONS: 3814 func.meta["name"] = this 3815 this = func 3816 else: 3817 this = self.expression(exp.Anonymous, this=this, expressions=args) 3818 3819 if isinstance(this, exp.Expression): 3820 this.add_comments(comments) 3821 3822 self._match_r_paren(this) 3823 return self._parse_window(this) 3824 3825 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3826 return self._parse_column_def(self._parse_id_var()) 3827 3828 def _parse_user_defined_function( 3829 self, kind: t.Optional[TokenType] = None 3830 ) -> t.Optional[exp.Expression]: 3831 this = self._parse_id_var() 3832 3833 while self._match(TokenType.DOT): 3834 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3835 3836 if not self._match(TokenType.L_PAREN): 3837 return this 3838 3839 expressions = self._parse_csv(self._parse_function_parameter) 3840 self._match_r_paren() 3841 return self.expression( 3842 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3843 ) 3844 3845 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3846 literal = self._parse_primary() 3847 if literal: 3848 return self.expression(exp.Introducer, this=token.text, expression=literal) 3849 3850 return self.expression(exp.Identifier, this=token.text) 3851 3852 def _parse_session_parameter(self) -> exp.SessionParameter: 3853 kind = None 3854 this = self._parse_id_var() or self._parse_primary() 3855 3856 if this and self._match(TokenType.DOT): 3857 kind = this.name 3858 this = self._parse_var() or self._parse_primary() 3859 3860 return self.expression(exp.SessionParameter, this=this, kind=kind) 3861 3862 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3863 index = self._index 3864 3865 if self._match(TokenType.L_PAREN): 3866 expressions = t.cast( 3867 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3868 ) 3869 3870 if not self._match(TokenType.R_PAREN): 3871 self._retreat(index) 3872 else: 3873 expressions = [self._parse_id_var()] 3874 3875 if self._match_set(self.LAMBDAS): 3876 return self.LAMBDAS[self._prev.token_type](self, expressions) 3877 3878 self._retreat(index) 3879 3880 this: t.Optional[exp.Expression] 3881 3882 if self._match(TokenType.DISTINCT): 3883 this = self.expression( 3884 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3885 ) 3886 else: 3887 this = self._parse_select_or_expression(alias=alias) 3888 3889 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3890 3891 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3892 index = self._index 3893 3894 if not self.errors: 3895 try: 3896 if self._parse_select(nested=True): 3897 return this 3898 except ParseError: 3899 pass 3900 finally: 3901 self.errors.clear() 3902 self._retreat(index) 3903 3904 if not self._match(TokenType.L_PAREN): 3905 return this 3906 3907 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3908 3909 self._match_r_paren() 3910 return self.expression(exp.Schema, this=this, expressions=args) 3911 3912 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3913 return self._parse_column_def(self._parse_field(any_token=True)) 3914 3915 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3916 # column defs are not really columns, they're identifiers 3917 if isinstance(this, exp.Column): 3918 this = this.this 3919 3920 kind = self._parse_types(schema=True) 3921 3922 if self._match_text_seq("FOR", "ORDINALITY"): 3923 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3924 3925 constraints: t.List[exp.Expression] = [] 3926 3927 if not kind and self._match(TokenType.ALIAS): 3928 constraints.append( 3929 self.expression( 3930 exp.ComputedColumnConstraint, 3931 this=self._parse_conjunction(), 3932 persisted=self._match_text_seq("PERSISTED"), 3933 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3934 ) 3935 ) 3936 3937 while True: 3938 constraint = self._parse_column_constraint() 3939 if not constraint: 3940 break 3941 constraints.append(constraint) 3942 3943 if not kind and not constraints: 3944 return this 3945 3946 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3947 3948 def _parse_auto_increment( 3949 self, 3950 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3951 start = None 3952 increment = None 3953 3954 if self._match(TokenType.L_PAREN, advance=False): 3955 args = self._parse_wrapped_csv(self._parse_bitwise) 3956 start = seq_get(args, 0) 3957 increment = seq_get(args, 1) 3958 elif self._match_text_seq("START"): 3959 start = self._parse_bitwise() 3960 self._match_text_seq("INCREMENT") 3961 increment = self._parse_bitwise() 3962 3963 if start and increment: 3964 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3965 3966 return exp.AutoIncrementColumnConstraint() 3967 3968 def _parse_compress(self) -> exp.CompressColumnConstraint: 3969 if self._match(TokenType.L_PAREN, advance=False): 3970 return self.expression( 3971 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3972 ) 3973 3974 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3975 3976 def _parse_generated_as_identity( 3977 self, 3978 ) -> ( 3979 exp.GeneratedAsIdentityColumnConstraint 3980 | exp.ComputedColumnConstraint 3981 | exp.GeneratedAsRowColumnConstraint 3982 ): 3983 if self._match_text_seq("BY", "DEFAULT"): 3984 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3985 this = self.expression( 3986 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3987 ) 3988 else: 3989 self._match_text_seq("ALWAYS") 3990 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3991 3992 self._match(TokenType.ALIAS) 3993 3994 if self._match_text_seq("ROW"): 3995 start = self._match_text_seq("START") 3996 if not start: 3997 self._match(TokenType.END) 3998 hidden = self._match_text_seq("HIDDEN") 3999 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4000 4001 identity = self._match_text_seq("IDENTITY") 4002 4003 if self._match(TokenType.L_PAREN): 4004 if self._match(TokenType.START_WITH): 4005 this.set("start", self._parse_bitwise()) 4006 if self._match_text_seq("INCREMENT", "BY"): 4007 this.set("increment", self._parse_bitwise()) 4008 if self._match_text_seq("MINVALUE"): 4009 this.set("minvalue", self._parse_bitwise()) 4010 if self._match_text_seq("MAXVALUE"): 4011 this.set("maxvalue", self._parse_bitwise()) 4012 4013 if self._match_text_seq("CYCLE"): 4014 this.set("cycle", True) 4015 elif self._match_text_seq("NO", "CYCLE"): 4016 this.set("cycle", False) 4017 4018 if not identity: 4019 this.set("expression", self._parse_bitwise()) 4020 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4021 args = self._parse_csv(self._parse_bitwise) 4022 this.set("start", seq_get(args, 0)) 4023 this.set("increment", seq_get(args, 1)) 4024 4025 self._match_r_paren() 4026 4027 return this 4028 4029 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4030 self._match_text_seq("LENGTH") 4031 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4032 4033 def _parse_not_constraint( 4034 self, 4035 ) -> t.Optional[exp.Expression]: 4036 if self._match_text_seq("NULL"): 4037 return self.expression(exp.NotNullColumnConstraint) 4038 if self._match_text_seq("CASESPECIFIC"): 4039 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4040 if self._match_text_seq("FOR", "REPLICATION"): 4041 return self.expression(exp.NotForReplicationColumnConstraint) 4042 return None 4043 4044 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4045 if self._match(TokenType.CONSTRAINT): 4046 this = self._parse_id_var() 4047 else: 4048 this = None 4049 4050 if self._match_texts(self.CONSTRAINT_PARSERS): 4051 return self.expression( 4052 exp.ColumnConstraint, 4053 this=this, 4054 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4055 ) 4056 4057 return this 4058 4059 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4060 if not self._match(TokenType.CONSTRAINT): 4061 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4062 4063 this = self._parse_id_var() 4064 expressions = [] 4065 4066 while True: 4067 constraint = self._parse_unnamed_constraint() or self._parse_function() 4068 if not constraint: 4069 break 4070 expressions.append(constraint) 4071 4072 return self.expression(exp.Constraint, this=this, expressions=expressions) 4073 4074 def _parse_unnamed_constraint( 4075 self, constraints: t.Optional[t.Collection[str]] = None 4076 ) -> t.Optional[exp.Expression]: 4077 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4078 constraints or self.CONSTRAINT_PARSERS 4079 ): 4080 return None 4081 4082 constraint = self._prev.text.upper() 4083 if constraint not in self.CONSTRAINT_PARSERS: 4084 self.raise_error(f"No parser found for schema constraint {constraint}.") 4085 4086 return self.CONSTRAINT_PARSERS[constraint](self) 4087 4088 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4089 self._match_text_seq("KEY") 4090 return self.expression( 4091 exp.UniqueColumnConstraint, 4092 this=self._parse_schema(self._parse_id_var(any_token=False)), 4093 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4094 ) 4095 4096 def _parse_key_constraint_options(self) -> t.List[str]: 4097 options = [] 4098 while True: 4099 if not self._curr: 4100 break 4101 4102 if self._match(TokenType.ON): 4103 action = None 4104 on = self._advance_any() and self._prev.text 4105 4106 if self._match_text_seq("NO", "ACTION"): 4107 action = "NO ACTION" 4108 elif self._match_text_seq("CASCADE"): 4109 action = "CASCADE" 4110 elif self._match_text_seq("RESTRICT"): 4111 action = "RESTRICT" 4112 elif self._match_pair(TokenType.SET, TokenType.NULL): 4113 action = "SET NULL" 4114 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4115 action = "SET DEFAULT" 4116 else: 4117 self.raise_error("Invalid key constraint") 4118 4119 options.append(f"ON {on} {action}") 4120 elif self._match_text_seq("NOT", "ENFORCED"): 4121 options.append("NOT ENFORCED") 4122 elif self._match_text_seq("DEFERRABLE"): 4123 options.append("DEFERRABLE") 4124 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4125 options.append("INITIALLY DEFERRED") 4126 elif self._match_text_seq("NORELY"): 4127 options.append("NORELY") 4128 elif self._match_text_seq("MATCH", "FULL"): 4129 options.append("MATCH FULL") 4130 else: 4131 break 4132 4133 return options 4134 4135 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4136 if match and not self._match(TokenType.REFERENCES): 4137 return None 4138 4139 expressions = None 4140 this = self._parse_table(schema=True) 4141 options = self._parse_key_constraint_options() 4142 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4143 4144 def _parse_foreign_key(self) -> exp.ForeignKey: 4145 expressions = self._parse_wrapped_id_vars() 4146 reference = self._parse_references() 4147 options = {} 4148 4149 while self._match(TokenType.ON): 4150 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4151 self.raise_error("Expected DELETE or UPDATE") 4152 4153 kind = self._prev.text.lower() 4154 4155 if self._match_text_seq("NO", "ACTION"): 4156 action = "NO ACTION" 4157 elif self._match(TokenType.SET): 4158 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4159 action = "SET " + self._prev.text.upper() 4160 else: 4161 self._advance() 4162 action = self._prev.text.upper() 4163 4164 options[kind] = action 4165 4166 return self.expression( 4167 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4168 ) 4169 4170 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4171 return self._parse_field() 4172 4173 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4174 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4175 4176 id_vars = self._parse_wrapped_id_vars() 4177 return self.expression( 4178 exp.PeriodForSystemTimeConstraint, 4179 this=seq_get(id_vars, 0), 4180 expression=seq_get(id_vars, 1), 4181 ) 4182 4183 def _parse_primary_key( 4184 self, wrapped_optional: bool = False, in_props: bool = False 4185 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4186 desc = ( 4187 self._match_set((TokenType.ASC, TokenType.DESC)) 4188 and self._prev.token_type == TokenType.DESC 4189 ) 4190 4191 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4192 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4193 4194 expressions = self._parse_wrapped_csv( 4195 self._parse_primary_key_part, optional=wrapped_optional 4196 ) 4197 options = self._parse_key_constraint_options() 4198 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4199 4200 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4201 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4202 return this 4203 4204 bracket_kind = self._prev.token_type 4205 4206 if self._match(TokenType.COLON): 4207 expressions: t.List[exp.Expression] = [ 4208 self.expression(exp.Slice, expression=self._parse_conjunction()) 4209 ] 4210 else: 4211 expressions = self._parse_csv( 4212 lambda: self._parse_slice( 4213 self._parse_alias(self._parse_conjunction(), explicit=True) 4214 ) 4215 ) 4216 4217 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4218 self.raise_error("Expected ]") 4219 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4220 self.raise_error("Expected }") 4221 4222 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4223 if bracket_kind == TokenType.L_BRACE: 4224 this = self.expression(exp.Struct, expressions=expressions) 4225 elif not this or this.name.upper() == "ARRAY": 4226 this = self.expression(exp.Array, expressions=expressions) 4227 else: 4228 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4229 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4230 4231 self._add_comments(this) 4232 return self._parse_bracket(this) 4233 4234 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4235 if self._match(TokenType.COLON): 4236 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4237 return this 4238 4239 def _parse_case(self) -> t.Optional[exp.Expression]: 4240 ifs = [] 4241 default = None 4242 4243 comments = self._prev_comments 4244 expression = self._parse_conjunction() 4245 4246 while self._match(TokenType.WHEN): 4247 this = self._parse_conjunction() 4248 self._match(TokenType.THEN) 4249 then = self._parse_conjunction() 4250 ifs.append(self.expression(exp.If, this=this, true=then)) 4251 4252 if self._match(TokenType.ELSE): 4253 default = self._parse_conjunction() 4254 4255 if not self._match(TokenType.END): 4256 self.raise_error("Expected END after CASE", self._prev) 4257 4258 return self._parse_window( 4259 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4260 ) 4261 4262 def _parse_if(self) -> t.Optional[exp.Expression]: 4263 if self._match(TokenType.L_PAREN): 4264 args = self._parse_csv(self._parse_conjunction) 4265 this = self.validate_expression(exp.If.from_arg_list(args), args) 4266 self._match_r_paren() 4267 else: 4268 index = self._index - 1 4269 condition = self._parse_conjunction() 4270 4271 if not condition: 4272 self._retreat(index) 4273 return None 4274 4275 self._match(TokenType.THEN) 4276 true = self._parse_conjunction() 4277 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4278 self._match(TokenType.END) 4279 this = self.expression(exp.If, this=condition, true=true, false=false) 4280 4281 return self._parse_window(this) 4282 4283 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4284 if not self._match_text_seq("VALUE", "FOR"): 4285 self._retreat(self._index - 1) 4286 return None 4287 4288 return self.expression( 4289 exp.NextValueFor, 4290 this=self._parse_column(), 4291 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4292 ) 4293 4294 def _parse_extract(self) -> exp.Extract: 4295 this = self._parse_function() or self._parse_var() or self._parse_type() 4296 4297 if self._match(TokenType.FROM): 4298 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4299 4300 if not self._match(TokenType.COMMA): 4301 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4302 4303 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4304 4305 def _parse_any_value(self) -> exp.AnyValue: 4306 this = self._parse_lambda() 4307 is_max = None 4308 having = None 4309 4310 if self._match(TokenType.HAVING): 4311 self._match_texts(("MAX", "MIN")) 4312 is_max = self._prev.text == "MAX" 4313 having = self._parse_column() 4314 4315 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4316 4317 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4318 this = self._parse_conjunction() 4319 4320 if not self._match(TokenType.ALIAS): 4321 if self._match(TokenType.COMMA): 4322 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4323 4324 self.raise_error("Expected AS after CAST") 4325 4326 fmt = None 4327 to = self._parse_types() 4328 4329 if self._match(TokenType.FORMAT): 4330 fmt_string = self._parse_string() 4331 fmt = self._parse_at_time_zone(fmt_string) 4332 4333 if not to: 4334 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4335 if to.this in exp.DataType.TEMPORAL_TYPES: 4336 this = self.expression( 4337 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4338 this=this, 4339 format=exp.Literal.string( 4340 format_time( 4341 fmt_string.this if fmt_string else "", 4342 self.FORMAT_MAPPING or self.TIME_MAPPING, 4343 self.FORMAT_TRIE or self.TIME_TRIE, 4344 ) 4345 ), 4346 ) 4347 4348 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4349 this.set("zone", fmt.args["zone"]) 4350 return this 4351 elif not to: 4352 self.raise_error("Expected TYPE after CAST") 4353 elif isinstance(to, exp.Identifier): 4354 to = exp.DataType.build(to.name, udt=True) 4355 elif to.this == exp.DataType.Type.CHAR: 4356 if self._match(TokenType.CHARACTER_SET): 4357 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4358 4359 return self.expression( 4360 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4361 ) 4362 4363 def _parse_concat(self) -> t.Optional[exp.Expression]: 4364 args = self._parse_csv(self._parse_conjunction) 4365 if self.CONCAT_NULL_OUTPUTS_STRING: 4366 args = self._ensure_string_if_null(args) 4367 4368 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4369 # we find such a call we replace it with its argument. 4370 if len(args) == 1: 4371 return args[0] 4372 4373 return self.expression(exp.Concat, expressions=args, safe=not self.STRICT_STRING_CONCAT) 4374 4375 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4376 args = self._parse_csv(self._parse_conjunction) 4377 if len(args) < 2: 4378 return self.expression(exp.ConcatWs, expressions=args) 4379 delim, *values = args 4380 if self.CONCAT_NULL_OUTPUTS_STRING: 4381 values = self._ensure_string_if_null(values) 4382 4383 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4384 4385 def _parse_string_agg(self) -> exp.Expression: 4386 if self._match(TokenType.DISTINCT): 4387 args: t.List[t.Optional[exp.Expression]] = [ 4388 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4389 ] 4390 if self._match(TokenType.COMMA): 4391 args.extend(self._parse_csv(self._parse_conjunction)) 4392 else: 4393 args = self._parse_csv(self._parse_conjunction) # type: ignore 4394 4395 index = self._index 4396 if not self._match(TokenType.R_PAREN) and args: 4397 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4398 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4399 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4400 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4401 4402 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4403 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4404 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4405 if not self._match_text_seq("WITHIN", "GROUP"): 4406 self._retreat(index) 4407 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4408 4409 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4410 order = self._parse_order(this=seq_get(args, 0)) 4411 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4412 4413 def _parse_convert( 4414 self, strict: bool, safe: t.Optional[bool] = None 4415 ) -> t.Optional[exp.Expression]: 4416 this = self._parse_bitwise() 4417 4418 if self._match(TokenType.USING): 4419 to: t.Optional[exp.Expression] = self.expression( 4420 exp.CharacterSet, this=self._parse_var() 4421 ) 4422 elif self._match(TokenType.COMMA): 4423 to = self._parse_types() 4424 else: 4425 to = None 4426 4427 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4428 4429 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4430 """ 4431 There are generally two variants of the DECODE function: 4432 4433 - DECODE(bin, charset) 4434 - DECODE(expression, search, result [, search, result] ... [, default]) 4435 4436 The second variant will always be parsed into a CASE expression. Note that NULL 4437 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4438 instead of relying on pattern matching. 4439 """ 4440 args = self._parse_csv(self._parse_conjunction) 4441 4442 if len(args) < 3: 4443 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4444 4445 expression, *expressions = args 4446 if not expression: 4447 return None 4448 4449 ifs = [] 4450 for search, result in zip(expressions[::2], expressions[1::2]): 4451 if not search or not result: 4452 return None 4453 4454 if isinstance(search, exp.Literal): 4455 ifs.append( 4456 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4457 ) 4458 elif isinstance(search, exp.Null): 4459 ifs.append( 4460 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4461 ) 4462 else: 4463 cond = exp.or_( 4464 exp.EQ(this=expression.copy(), expression=search), 4465 exp.and_( 4466 exp.Is(this=expression.copy(), expression=exp.Null()), 4467 exp.Is(this=search.copy(), expression=exp.Null()), 4468 copy=False, 4469 ), 4470 copy=False, 4471 ) 4472 ifs.append(exp.If(this=cond, true=result)) 4473 4474 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4475 4476 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4477 self._match_text_seq("KEY") 4478 key = self._parse_column() 4479 self._match_set((TokenType.COLON, TokenType.COMMA)) 4480 self._match_text_seq("VALUE") 4481 value = self._parse_bitwise() 4482 4483 if not key and not value: 4484 return None 4485 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4486 4487 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4488 if not this or not self._match_text_seq("FORMAT", "JSON"): 4489 return this 4490 4491 return self.expression(exp.FormatJson, this=this) 4492 4493 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4494 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4495 for value in values: 4496 if self._match_text_seq(value, "ON", on): 4497 return f"{value} ON {on}" 4498 4499 return None 4500 4501 def _parse_json_object(self) -> exp.JSONObject: 4502 star = self._parse_star() 4503 expressions = ( 4504 [star] 4505 if star 4506 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4507 ) 4508 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4509 4510 unique_keys = None 4511 if self._match_text_seq("WITH", "UNIQUE"): 4512 unique_keys = True 4513 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4514 unique_keys = False 4515 4516 self._match_text_seq("KEYS") 4517 4518 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4519 self._parse_type() 4520 ) 4521 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4522 4523 return self.expression( 4524 exp.JSONObject, 4525 expressions=expressions, 4526 null_handling=null_handling, 4527 unique_keys=unique_keys, 4528 return_type=return_type, 4529 encoding=encoding, 4530 ) 4531 4532 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4533 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4534 if not self._match_text_seq("NESTED"): 4535 this = self._parse_id_var() 4536 kind = self._parse_types(allow_identifiers=False) 4537 nested = None 4538 else: 4539 this = None 4540 kind = None 4541 nested = True 4542 4543 path = self._match_text_seq("PATH") and self._parse_string() 4544 nested_schema = nested and self._parse_json_schema() 4545 4546 return self.expression( 4547 exp.JSONColumnDef, 4548 this=this, 4549 kind=kind, 4550 path=path, 4551 nested_schema=nested_schema, 4552 ) 4553 4554 def _parse_json_schema(self) -> exp.JSONSchema: 4555 self._match_text_seq("COLUMNS") 4556 return self.expression( 4557 exp.JSONSchema, 4558 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4559 ) 4560 4561 def _parse_json_table(self) -> exp.JSONTable: 4562 this = self._parse_format_json(self._parse_bitwise()) 4563 path = self._match(TokenType.COMMA) and self._parse_string() 4564 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4565 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4566 schema = self._parse_json_schema() 4567 4568 return exp.JSONTable( 4569 this=this, 4570 schema=schema, 4571 path=path, 4572 error_handling=error_handling, 4573 empty_handling=empty_handling, 4574 ) 4575 4576 def _parse_logarithm(self) -> exp.Func: 4577 # Default argument order is base, expression 4578 args = self._parse_csv(self._parse_range) 4579 4580 if len(args) > 1: 4581 if not self.LOG_BASE_FIRST: 4582 args.reverse() 4583 return exp.Log.from_arg_list(args) 4584 4585 return self.expression( 4586 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4587 ) 4588 4589 def _parse_match_against(self) -> exp.MatchAgainst: 4590 expressions = self._parse_csv(self._parse_column) 4591 4592 self._match_text_seq(")", "AGAINST", "(") 4593 4594 this = self._parse_string() 4595 4596 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4597 modifier = "IN NATURAL LANGUAGE MODE" 4598 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4599 modifier = f"{modifier} WITH QUERY EXPANSION" 4600 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4601 modifier = "IN BOOLEAN MODE" 4602 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4603 modifier = "WITH QUERY EXPANSION" 4604 else: 4605 modifier = None 4606 4607 return self.expression( 4608 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4609 ) 4610 4611 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4612 def _parse_open_json(self) -> exp.OpenJSON: 4613 this = self._parse_bitwise() 4614 path = self._match(TokenType.COMMA) and self._parse_string() 4615 4616 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4617 this = self._parse_field(any_token=True) 4618 kind = self._parse_types() 4619 path = self._parse_string() 4620 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4621 4622 return self.expression( 4623 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4624 ) 4625 4626 expressions = None 4627 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4628 self._match_l_paren() 4629 expressions = self._parse_csv(_parse_open_json_column_def) 4630 4631 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4632 4633 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4634 args = self._parse_csv(self._parse_bitwise) 4635 4636 if self._match(TokenType.IN): 4637 return self.expression( 4638 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4639 ) 4640 4641 if haystack_first: 4642 haystack = seq_get(args, 0) 4643 needle = seq_get(args, 1) 4644 else: 4645 needle = seq_get(args, 0) 4646 haystack = seq_get(args, 1) 4647 4648 return self.expression( 4649 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4650 ) 4651 4652 def _parse_predict(self) -> exp.Predict: 4653 self._match_text_seq("MODEL") 4654 this = self._parse_table() 4655 4656 self._match(TokenType.COMMA) 4657 self._match_text_seq("TABLE") 4658 4659 return self.expression( 4660 exp.Predict, 4661 this=this, 4662 expression=self._parse_table(), 4663 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4664 ) 4665 4666 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4667 args = self._parse_csv(self._parse_table) 4668 return exp.JoinHint(this=func_name.upper(), expressions=args) 4669 4670 def _parse_substring(self) -> exp.Substring: 4671 # Postgres supports the form: substring(string [from int] [for int]) 4672 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4673 4674 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4675 4676 if self._match(TokenType.FROM): 4677 args.append(self._parse_bitwise()) 4678 if self._match(TokenType.FOR): 4679 args.append(self._parse_bitwise()) 4680 4681 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4682 4683 def _parse_trim(self) -> exp.Trim: 4684 # https://www.w3resource.com/sql/character-functions/trim.php 4685 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4686 4687 position = None 4688 collation = None 4689 expression = None 4690 4691 if self._match_texts(self.TRIM_TYPES): 4692 position = self._prev.text.upper() 4693 4694 this = self._parse_bitwise() 4695 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4696 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4697 expression = self._parse_bitwise() 4698 4699 if invert_order: 4700 this, expression = expression, this 4701 4702 if self._match(TokenType.COLLATE): 4703 collation = self._parse_bitwise() 4704 4705 return self.expression( 4706 exp.Trim, this=this, position=position, expression=expression, collation=collation 4707 ) 4708 4709 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4710 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4711 4712 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4713 return self._parse_window(self._parse_id_var(), alias=True) 4714 4715 def _parse_respect_or_ignore_nulls( 4716 self, this: t.Optional[exp.Expression] 4717 ) -> t.Optional[exp.Expression]: 4718 if self._match_text_seq("IGNORE", "NULLS"): 4719 return self.expression(exp.IgnoreNulls, this=this) 4720 if self._match_text_seq("RESPECT", "NULLS"): 4721 return self.expression(exp.RespectNulls, this=this) 4722 return this 4723 4724 def _parse_window( 4725 self, this: t.Optional[exp.Expression], alias: bool = False 4726 ) -> t.Optional[exp.Expression]: 4727 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4728 self._match(TokenType.WHERE) 4729 this = self.expression( 4730 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4731 ) 4732 self._match_r_paren() 4733 4734 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4735 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4736 if self._match_text_seq("WITHIN", "GROUP"): 4737 order = self._parse_wrapped(self._parse_order) 4738 this = self.expression(exp.WithinGroup, this=this, expression=order) 4739 4740 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4741 # Some dialects choose to implement and some do not. 4742 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4743 4744 # There is some code above in _parse_lambda that handles 4745 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4746 4747 # The below changes handle 4748 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4749 4750 # Oracle allows both formats 4751 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4752 # and Snowflake chose to do the same for familiarity 4753 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4754 this = self._parse_respect_or_ignore_nulls(this) 4755 4756 # bigquery select from window x AS (partition by ...) 4757 if alias: 4758 over = None 4759 self._match(TokenType.ALIAS) 4760 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4761 return this 4762 else: 4763 over = self._prev.text.upper() 4764 4765 if not self._match(TokenType.L_PAREN): 4766 return self.expression( 4767 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4768 ) 4769 4770 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4771 4772 first = self._match(TokenType.FIRST) 4773 if self._match_text_seq("LAST"): 4774 first = False 4775 4776 partition, order = self._parse_partition_and_order() 4777 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4778 4779 if kind: 4780 self._match(TokenType.BETWEEN) 4781 start = self._parse_window_spec() 4782 self._match(TokenType.AND) 4783 end = self._parse_window_spec() 4784 4785 spec = self.expression( 4786 exp.WindowSpec, 4787 kind=kind, 4788 start=start["value"], 4789 start_side=start["side"], 4790 end=end["value"], 4791 end_side=end["side"], 4792 ) 4793 else: 4794 spec = None 4795 4796 self._match_r_paren() 4797 4798 window = self.expression( 4799 exp.Window, 4800 this=this, 4801 partition_by=partition, 4802 order=order, 4803 spec=spec, 4804 alias=window_alias, 4805 over=over, 4806 first=first, 4807 ) 4808 4809 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4810 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4811 return self._parse_window(window, alias=alias) 4812 4813 return window 4814 4815 def _parse_partition_and_order( 4816 self, 4817 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4818 return self._parse_partition_by(), self._parse_order() 4819 4820 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4821 self._match(TokenType.BETWEEN) 4822 4823 return { 4824 "value": ( 4825 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4826 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4827 or self._parse_bitwise() 4828 ), 4829 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4830 } 4831 4832 def _parse_alias( 4833 self, this: t.Optional[exp.Expression], explicit: bool = False 4834 ) -> t.Optional[exp.Expression]: 4835 any_token = self._match(TokenType.ALIAS) 4836 comments = self._prev_comments 4837 4838 if explicit and not any_token: 4839 return this 4840 4841 if self._match(TokenType.L_PAREN): 4842 aliases = self.expression( 4843 exp.Aliases, 4844 comments=comments, 4845 this=this, 4846 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4847 ) 4848 self._match_r_paren(aliases) 4849 return aliases 4850 4851 alias = self._parse_id_var(any_token) 4852 4853 if alias: 4854 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4855 4856 return this 4857 4858 def _parse_id_var( 4859 self, 4860 any_token: bool = True, 4861 tokens: t.Optional[t.Collection[TokenType]] = None, 4862 ) -> t.Optional[exp.Expression]: 4863 identifier = self._parse_identifier() 4864 4865 if identifier: 4866 return identifier 4867 4868 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4869 quoted = self._prev.token_type == TokenType.STRING 4870 return exp.Identifier(this=self._prev.text, quoted=quoted) 4871 4872 return None 4873 4874 def _parse_string(self) -> t.Optional[exp.Expression]: 4875 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4876 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4877 return self._parse_placeholder() 4878 4879 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4880 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4881 4882 def _parse_number(self) -> t.Optional[exp.Expression]: 4883 if self._match(TokenType.NUMBER): 4884 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4885 return self._parse_placeholder() 4886 4887 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4888 if self._match(TokenType.IDENTIFIER): 4889 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4890 return self._parse_placeholder() 4891 4892 def _parse_var( 4893 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4894 ) -> t.Optional[exp.Expression]: 4895 if ( 4896 (any_token and self._advance_any()) 4897 or self._match(TokenType.VAR) 4898 or (self._match_set(tokens) if tokens else False) 4899 ): 4900 return self.expression(exp.Var, this=self._prev.text) 4901 return self._parse_placeholder() 4902 4903 def _advance_any(self) -> t.Optional[Token]: 4904 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4905 self._advance() 4906 return self._prev 4907 return None 4908 4909 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4910 return self._parse_var() or self._parse_string() 4911 4912 def _parse_null(self) -> t.Optional[exp.Expression]: 4913 if self._match_set(self.NULL_TOKENS): 4914 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4915 return self._parse_placeholder() 4916 4917 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4918 if self._match(TokenType.TRUE): 4919 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4920 if self._match(TokenType.FALSE): 4921 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4922 return self._parse_placeholder() 4923 4924 def _parse_star(self) -> t.Optional[exp.Expression]: 4925 if self._match(TokenType.STAR): 4926 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4927 return self._parse_placeholder() 4928 4929 def _parse_parameter(self) -> exp.Parameter: 4930 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4931 return ( 4932 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4933 ) 4934 4935 self._match(TokenType.L_BRACE) 4936 this = _parse_parameter_part() 4937 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4938 self._match(TokenType.R_BRACE) 4939 4940 return self.expression(exp.Parameter, this=this, expression=expression) 4941 4942 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4943 if self._match_set(self.PLACEHOLDER_PARSERS): 4944 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4945 if placeholder: 4946 return placeholder 4947 self._advance(-1) 4948 return None 4949 4950 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4951 if not self._match(TokenType.EXCEPT): 4952 return None 4953 if self._match(TokenType.L_PAREN, advance=False): 4954 return self._parse_wrapped_csv(self._parse_column) 4955 4956 except_column = self._parse_column() 4957 return [except_column] if except_column else None 4958 4959 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4960 if not self._match(TokenType.REPLACE): 4961 return None 4962 if self._match(TokenType.L_PAREN, advance=False): 4963 return self._parse_wrapped_csv(self._parse_expression) 4964 4965 replace_expression = self._parse_expression() 4966 return [replace_expression] if replace_expression else None 4967 4968 def _parse_csv( 4969 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4970 ) -> t.List[exp.Expression]: 4971 parse_result = parse_method() 4972 items = [parse_result] if parse_result is not None else [] 4973 4974 while self._match(sep): 4975 self._add_comments(parse_result) 4976 parse_result = parse_method() 4977 if parse_result is not None: 4978 items.append(parse_result) 4979 4980 return items 4981 4982 def _parse_tokens( 4983 self, parse_method: t.Callable, expressions: t.Dict 4984 ) -> t.Optional[exp.Expression]: 4985 this = parse_method() 4986 4987 while self._match_set(expressions): 4988 this = self.expression( 4989 expressions[self._prev.token_type], 4990 this=this, 4991 comments=self._prev_comments, 4992 expression=parse_method(), 4993 ) 4994 4995 return this 4996 4997 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4998 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4999 5000 def _parse_wrapped_csv( 5001 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5002 ) -> t.List[exp.Expression]: 5003 return self._parse_wrapped( 5004 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5005 ) 5006 5007 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5008 wrapped = self._match(TokenType.L_PAREN) 5009 if not wrapped and not optional: 5010 self.raise_error("Expecting (") 5011 parse_result = parse_method() 5012 if wrapped: 5013 self._match_r_paren() 5014 return parse_result 5015 5016 def _parse_expressions(self) -> t.List[exp.Expression]: 5017 return self._parse_csv(self._parse_expression) 5018 5019 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5020 return self._parse_select() or self._parse_set_operations( 5021 self._parse_expression() if alias else self._parse_conjunction() 5022 ) 5023 5024 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5025 return self._parse_query_modifiers( 5026 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5027 ) 5028 5029 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5030 this = None 5031 if self._match_texts(self.TRANSACTION_KIND): 5032 this = self._prev.text 5033 5034 self._match_texts(("TRANSACTION", "WORK")) 5035 5036 modes = [] 5037 while True: 5038 mode = [] 5039 while self._match(TokenType.VAR): 5040 mode.append(self._prev.text) 5041 5042 if mode: 5043 modes.append(" ".join(mode)) 5044 if not self._match(TokenType.COMMA): 5045 break 5046 5047 return self.expression(exp.Transaction, this=this, modes=modes) 5048 5049 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5050 chain = None 5051 savepoint = None 5052 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5053 5054 self._match_texts(("TRANSACTION", "WORK")) 5055 5056 if self._match_text_seq("TO"): 5057 self._match_text_seq("SAVEPOINT") 5058 savepoint = self._parse_id_var() 5059 5060 if self._match(TokenType.AND): 5061 chain = not self._match_text_seq("NO") 5062 self._match_text_seq("CHAIN") 5063 5064 if is_rollback: 5065 return self.expression(exp.Rollback, savepoint=savepoint) 5066 5067 return self.expression(exp.Commit, chain=chain) 5068 5069 def _parse_refresh(self) -> exp.Refresh: 5070 self._match(TokenType.TABLE) 5071 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5072 5073 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5074 if not self._match_text_seq("ADD"): 5075 return None 5076 5077 self._match(TokenType.COLUMN) 5078 exists_column = self._parse_exists(not_=True) 5079 expression = self._parse_field_def() 5080 5081 if expression: 5082 expression.set("exists", exists_column) 5083 5084 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5085 if self._match_texts(("FIRST", "AFTER")): 5086 position = self._prev.text 5087 column_position = self.expression( 5088 exp.ColumnPosition, this=self._parse_column(), position=position 5089 ) 5090 expression.set("position", column_position) 5091 5092 return expression 5093 5094 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5095 drop = self._match(TokenType.DROP) and self._parse_drop() 5096 if drop and not isinstance(drop, exp.Command): 5097 drop.set("kind", drop.args.get("kind", "COLUMN")) 5098 return drop 5099 5100 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5101 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5102 return self.expression( 5103 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5104 ) 5105 5106 def _parse_add_constraint(self) -> exp.AddConstraint: 5107 this = None 5108 kind = self._prev.token_type 5109 5110 if kind == TokenType.CONSTRAINT: 5111 this = self._parse_id_var() 5112 5113 if self._match_text_seq("CHECK"): 5114 expression = self._parse_wrapped(self._parse_conjunction) 5115 enforced = self._match_text_seq("ENFORCED") 5116 5117 return self.expression( 5118 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5119 ) 5120 5121 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5122 expression = self._parse_foreign_key() 5123 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5124 expression = self._parse_primary_key() 5125 else: 5126 expression = None 5127 5128 return self.expression(exp.AddConstraint, this=this, expression=expression) 5129 5130 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5131 index = self._index - 1 5132 5133 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5134 return self._parse_csv(self._parse_add_constraint) 5135 5136 self._retreat(index) 5137 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5138 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5139 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5140 5141 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5142 self._match(TokenType.COLUMN) 5143 column = self._parse_field(any_token=True) 5144 5145 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5146 return self.expression(exp.AlterColumn, this=column, drop=True) 5147 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5148 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5149 5150 self._match_text_seq("SET", "DATA") 5151 return self.expression( 5152 exp.AlterColumn, 5153 this=column, 5154 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5155 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5156 using=self._match(TokenType.USING) and self._parse_conjunction(), 5157 ) 5158 5159 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5160 index = self._index - 1 5161 5162 partition_exists = self._parse_exists() 5163 if self._match(TokenType.PARTITION, advance=False): 5164 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5165 5166 self._retreat(index) 5167 return self._parse_csv(self._parse_drop_column) 5168 5169 def _parse_alter_table_rename(self) -> exp.RenameTable: 5170 self._match_text_seq("TO") 5171 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5172 5173 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5174 start = self._prev 5175 5176 if not self._match(TokenType.TABLE): 5177 return self._parse_as_command(start) 5178 5179 exists = self._parse_exists() 5180 only = self._match_text_seq("ONLY") 5181 this = self._parse_table(schema=True) 5182 5183 if self._next: 5184 self._advance() 5185 5186 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5187 if parser: 5188 actions = ensure_list(parser(self)) 5189 5190 if not self._curr: 5191 return self.expression( 5192 exp.AlterTable, 5193 this=this, 5194 exists=exists, 5195 actions=actions, 5196 only=only, 5197 ) 5198 5199 return self._parse_as_command(start) 5200 5201 def _parse_merge(self) -> exp.Merge: 5202 self._match(TokenType.INTO) 5203 target = self._parse_table() 5204 5205 if target and self._match(TokenType.ALIAS, advance=False): 5206 target.set("alias", self._parse_table_alias()) 5207 5208 self._match(TokenType.USING) 5209 using = self._parse_table() 5210 5211 self._match(TokenType.ON) 5212 on = self._parse_conjunction() 5213 5214 return self.expression( 5215 exp.Merge, 5216 this=target, 5217 using=using, 5218 on=on, 5219 expressions=self._parse_when_matched(), 5220 ) 5221 5222 def _parse_when_matched(self) -> t.List[exp.When]: 5223 whens = [] 5224 5225 while self._match(TokenType.WHEN): 5226 matched = not self._match(TokenType.NOT) 5227 self._match_text_seq("MATCHED") 5228 source = ( 5229 False 5230 if self._match_text_seq("BY", "TARGET") 5231 else self._match_text_seq("BY", "SOURCE") 5232 ) 5233 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5234 5235 self._match(TokenType.THEN) 5236 5237 if self._match(TokenType.INSERT): 5238 _this = self._parse_star() 5239 if _this: 5240 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5241 else: 5242 then = self.expression( 5243 exp.Insert, 5244 this=self._parse_value(), 5245 expression=self._match(TokenType.VALUES) and self._parse_value(), 5246 ) 5247 elif self._match(TokenType.UPDATE): 5248 expressions = self._parse_star() 5249 if expressions: 5250 then = self.expression(exp.Update, expressions=expressions) 5251 else: 5252 then = self.expression( 5253 exp.Update, 5254 expressions=self._match(TokenType.SET) 5255 and self._parse_csv(self._parse_equality), 5256 ) 5257 elif self._match(TokenType.DELETE): 5258 then = self.expression(exp.Var, this=self._prev.text) 5259 else: 5260 then = None 5261 5262 whens.append( 5263 self.expression( 5264 exp.When, 5265 matched=matched, 5266 source=source, 5267 condition=condition, 5268 then=then, 5269 ) 5270 ) 5271 return whens 5272 5273 def _parse_show(self) -> t.Optional[exp.Expression]: 5274 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5275 if parser: 5276 return parser(self) 5277 return self._parse_as_command(self._prev) 5278 5279 def _parse_set_item_assignment( 5280 self, kind: t.Optional[str] = None 5281 ) -> t.Optional[exp.Expression]: 5282 index = self._index 5283 5284 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5285 return self._parse_set_transaction(global_=kind == "GLOBAL") 5286 5287 left = self._parse_primary() or self._parse_id_var() 5288 assignment_delimiter = self._match_texts(("=", "TO")) 5289 5290 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5291 self._retreat(index) 5292 return None 5293 5294 right = self._parse_statement() or self._parse_id_var() 5295 this = self.expression(exp.EQ, this=left, expression=right) 5296 5297 return self.expression(exp.SetItem, this=this, kind=kind) 5298 5299 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5300 self._match_text_seq("TRANSACTION") 5301 characteristics = self._parse_csv( 5302 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5303 ) 5304 return self.expression( 5305 exp.SetItem, 5306 expressions=characteristics, 5307 kind="TRANSACTION", 5308 **{"global": global_}, # type: ignore 5309 ) 5310 5311 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5312 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5313 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5314 5315 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5316 index = self._index 5317 set_ = self.expression( 5318 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5319 ) 5320 5321 if self._curr: 5322 self._retreat(index) 5323 return self._parse_as_command(self._prev) 5324 5325 return set_ 5326 5327 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5328 for option in options: 5329 if self._match_text_seq(*option.split(" ")): 5330 return exp.var(option) 5331 return None 5332 5333 def _parse_as_command(self, start: Token) -> exp.Command: 5334 while self._curr: 5335 self._advance() 5336 text = self._find_sql(start, self._prev) 5337 size = len(start.text) 5338 return exp.Command(this=text[:size], expression=text[size:]) 5339 5340 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5341 settings = [] 5342 5343 self._match_l_paren() 5344 kind = self._parse_id_var() 5345 5346 if self._match(TokenType.L_PAREN): 5347 while True: 5348 key = self._parse_id_var() 5349 value = self._parse_primary() 5350 5351 if not key and value is None: 5352 break 5353 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5354 self._match(TokenType.R_PAREN) 5355 5356 self._match_r_paren() 5357 5358 return self.expression( 5359 exp.DictProperty, 5360 this=this, 5361 kind=kind.this if kind else None, 5362 settings=settings, 5363 ) 5364 5365 def _parse_dict_range(self, this: str) -> exp.DictRange: 5366 self._match_l_paren() 5367 has_min = self._match_text_seq("MIN") 5368 if has_min: 5369 min = self._parse_var() or self._parse_primary() 5370 self._match_text_seq("MAX") 5371 max = self._parse_var() or self._parse_primary() 5372 else: 5373 max = self._parse_var() or self._parse_primary() 5374 min = exp.Literal.number(0) 5375 self._match_r_paren() 5376 return self.expression(exp.DictRange, this=this, min=min, max=max) 5377 5378 def _parse_comprehension( 5379 self, this: t.Optional[exp.Expression] 5380 ) -> t.Optional[exp.Comprehension]: 5381 index = self._index 5382 expression = self._parse_column() 5383 if not self._match(TokenType.IN): 5384 self._retreat(index - 1) 5385 return None 5386 iterator = self._parse_column() 5387 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5388 return self.expression( 5389 exp.Comprehension, 5390 this=this, 5391 expression=expression, 5392 iterator=iterator, 5393 condition=condition, 5394 ) 5395 5396 def _find_parser( 5397 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5398 ) -> t.Optional[t.Callable]: 5399 if not self._curr: 5400 return None 5401 5402 index = self._index 5403 this = [] 5404 while True: 5405 # The current token might be multiple words 5406 curr = self._curr.text.upper() 5407 key = curr.split(" ") 5408 this.append(curr) 5409 5410 self._advance() 5411 result, trie = in_trie(trie, key) 5412 if result == TrieResult.FAILED: 5413 break 5414 5415 if result == TrieResult.EXISTS: 5416 subparser = parsers[" ".join(this)] 5417 return subparser 5418 5419 self._retreat(index) 5420 return None 5421 5422 def _match(self, token_type, advance=True, expression=None): 5423 if not self._curr: 5424 return None 5425 5426 if self._curr.token_type == token_type: 5427 if advance: 5428 self._advance() 5429 self._add_comments(expression) 5430 return True 5431 5432 return None 5433 5434 def _match_set(self, types, advance=True): 5435 if not self._curr: 5436 return None 5437 5438 if self._curr.token_type in types: 5439 if advance: 5440 self._advance() 5441 return True 5442 5443 return None 5444 5445 def _match_pair(self, token_type_a, token_type_b, advance=True): 5446 if not self._curr or not self._next: 5447 return None 5448 5449 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5450 if advance: 5451 self._advance(2) 5452 return True 5453 5454 return None 5455 5456 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5457 if not self._match(TokenType.L_PAREN, expression=expression): 5458 self.raise_error("Expecting (") 5459 5460 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5461 if not self._match(TokenType.R_PAREN, expression=expression): 5462 self.raise_error("Expecting )") 5463 5464 def _match_texts(self, texts, advance=True): 5465 if self._curr and self._curr.text.upper() in texts: 5466 if advance: 5467 self._advance() 5468 return True 5469 return False 5470 5471 def _match_text_seq(self, *texts, advance=True): 5472 index = self._index 5473 for text in texts: 5474 if self._curr and self._curr.text.upper() == text: 5475 self._advance() 5476 else: 5477 self._retreat(index) 5478 return False 5479 5480 if not advance: 5481 self._retreat(index) 5482 5483 return True 5484 5485 @t.overload 5486 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5487 ... 5488 5489 @t.overload 5490 def _replace_columns_with_dots( 5491 self, this: t.Optional[exp.Expression] 5492 ) -> t.Optional[exp.Expression]: 5493 ... 5494 5495 def _replace_columns_with_dots(self, this): 5496 if isinstance(this, exp.Dot): 5497 exp.replace_children(this, self._replace_columns_with_dots) 5498 elif isinstance(this, exp.Column): 5499 exp.replace_children(this, self._replace_columns_with_dots) 5500 table = this.args.get("table") 5501 this = ( 5502 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5503 ) 5504 5505 return this 5506 5507 def _replace_lambda( 5508 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5509 ) -> t.Optional[exp.Expression]: 5510 if not node: 5511 return node 5512 5513 for column in node.find_all(exp.Column): 5514 if column.parts[0].name in lambda_variables: 5515 dot_or_id = column.to_dot() if column.table else column.this 5516 parent = column.parent 5517 5518 while isinstance(parent, exp.Dot): 5519 if not isinstance(parent.parent, exp.Dot): 5520 parent.replace(dot_or_id) 5521 break 5522 parent = parent.parent 5523 else: 5524 if column is node: 5525 node = dot_or_id 5526 else: 5527 column.replace(dot_or_id) 5528 return node 5529 5530 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5531 return [ 5532 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5533 for value in values 5534 if value 5535 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.CONSTRAINT, 250 TokenType.FUNCTION, 251 TokenType.INDEX, 252 TokenType.PROCEDURE, 253 TokenType.FOREIGN_KEY, 254 *DB_CREATABLES, 255 } 256 257 # Tokens that can represent identifiers 258 ID_VAR_TOKENS = { 259 TokenType.VAR, 260 TokenType.ANTI, 261 TokenType.APPLY, 262 TokenType.ASC, 263 TokenType.AUTO_INCREMENT, 264 TokenType.BEGIN, 265 TokenType.CACHE, 266 TokenType.CASE, 267 TokenType.COLLATE, 268 TokenType.COMMAND, 269 TokenType.COMMENT, 270 TokenType.COMMIT, 271 TokenType.CONSTRAINT, 272 TokenType.DEFAULT, 273 TokenType.DELETE, 274 TokenType.DESC, 275 TokenType.DESCRIBE, 276 TokenType.DICTIONARY, 277 TokenType.DIV, 278 TokenType.END, 279 TokenType.EXECUTE, 280 TokenType.ESCAPE, 281 TokenType.FALSE, 282 TokenType.FIRST, 283 TokenType.FILTER, 284 TokenType.FORMAT, 285 TokenType.FULL, 286 TokenType.IS, 287 TokenType.ISNULL, 288 TokenType.INTERVAL, 289 TokenType.KEEP, 290 TokenType.KILL, 291 TokenType.LEFT, 292 TokenType.LOAD, 293 TokenType.MERGE, 294 TokenType.NATURAL, 295 TokenType.NEXT, 296 TokenType.OFFSET, 297 TokenType.OPERATOR, 298 TokenType.ORDINALITY, 299 TokenType.OVERLAPS, 300 TokenType.OVERWRITE, 301 TokenType.PARTITION, 302 TokenType.PERCENT, 303 TokenType.PIVOT, 304 TokenType.PRAGMA, 305 TokenType.RANGE, 306 TokenType.RECURSIVE, 307 TokenType.REFERENCES, 308 TokenType.REFRESH, 309 TokenType.REPLACE, 310 TokenType.RIGHT, 311 TokenType.ROW, 312 TokenType.ROWS, 313 TokenType.SEMI, 314 TokenType.SET, 315 TokenType.SETTINGS, 316 TokenType.SHOW, 317 TokenType.TEMPORARY, 318 TokenType.TOP, 319 TokenType.TRUE, 320 TokenType.UNIQUE, 321 TokenType.UNPIVOT, 322 TokenType.UPDATE, 323 TokenType.USE, 324 TokenType.VOLATILE, 325 TokenType.WINDOW, 326 *CREATABLES, 327 *SUBQUERY_PREDICATES, 328 *TYPE_TOKENS, 329 *NO_PAREN_FUNCTIONS, 330 } 331 332 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 333 334 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 335 TokenType.ANTI, 336 TokenType.APPLY, 337 TokenType.ASOF, 338 TokenType.FULL, 339 TokenType.LEFT, 340 TokenType.LOCK, 341 TokenType.NATURAL, 342 TokenType.OFFSET, 343 TokenType.RIGHT, 344 TokenType.SEMI, 345 TokenType.WINDOW, 346 } 347 348 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 349 350 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 351 352 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 353 354 FUNC_TOKENS = { 355 TokenType.COLLATE, 356 TokenType.COMMAND, 357 TokenType.CURRENT_DATE, 358 TokenType.CURRENT_DATETIME, 359 TokenType.CURRENT_TIMESTAMP, 360 TokenType.CURRENT_TIME, 361 TokenType.CURRENT_USER, 362 TokenType.FILTER, 363 TokenType.FIRST, 364 TokenType.FORMAT, 365 TokenType.GLOB, 366 TokenType.IDENTIFIER, 367 TokenType.INDEX, 368 TokenType.ISNULL, 369 TokenType.ILIKE, 370 TokenType.INSERT, 371 TokenType.LIKE, 372 TokenType.MERGE, 373 TokenType.OFFSET, 374 TokenType.PRIMARY_KEY, 375 TokenType.RANGE, 376 TokenType.REPLACE, 377 TokenType.RLIKE, 378 TokenType.ROW, 379 TokenType.UNNEST, 380 TokenType.VAR, 381 TokenType.LEFT, 382 TokenType.RIGHT, 383 TokenType.DATE, 384 TokenType.DATETIME, 385 TokenType.TABLE, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.WINDOW, 389 TokenType.XOR, 390 *TYPE_TOKENS, 391 *SUBQUERY_PREDICATES, 392 } 393 394 CONJUNCTION = { 395 TokenType.AND: exp.And, 396 TokenType.OR: exp.Or, 397 } 398 399 EQUALITY = { 400 TokenType.COLON_EQ: exp.PropertyEQ, 401 TokenType.EQ: exp.EQ, 402 TokenType.NEQ: exp.NEQ, 403 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 404 } 405 406 COMPARISON = { 407 TokenType.GT: exp.GT, 408 TokenType.GTE: exp.GTE, 409 TokenType.LT: exp.LT, 410 TokenType.LTE: exp.LTE, 411 } 412 413 BITWISE = { 414 TokenType.AMP: exp.BitwiseAnd, 415 TokenType.CARET: exp.BitwiseXor, 416 TokenType.PIPE: exp.BitwiseOr, 417 } 418 419 TERM = { 420 TokenType.DASH: exp.Sub, 421 TokenType.PLUS: exp.Add, 422 TokenType.MOD: exp.Mod, 423 TokenType.COLLATE: exp.Collate, 424 } 425 426 FACTOR = { 427 TokenType.DIV: exp.IntDiv, 428 TokenType.LR_ARROW: exp.Distance, 429 TokenType.SLASH: exp.Div, 430 TokenType.STAR: exp.Mul, 431 } 432 433 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 434 435 TIMES = { 436 TokenType.TIME, 437 TokenType.TIMETZ, 438 } 439 440 TIMESTAMPS = { 441 TokenType.TIMESTAMP, 442 TokenType.TIMESTAMPTZ, 443 TokenType.TIMESTAMPLTZ, 444 *TIMES, 445 } 446 447 SET_OPERATIONS = { 448 TokenType.UNION, 449 TokenType.INTERSECT, 450 TokenType.EXCEPT, 451 } 452 453 JOIN_METHODS = { 454 TokenType.NATURAL, 455 TokenType.ASOF, 456 } 457 458 JOIN_SIDES = { 459 TokenType.LEFT, 460 TokenType.RIGHT, 461 TokenType.FULL, 462 } 463 464 JOIN_KINDS = { 465 TokenType.INNER, 466 TokenType.OUTER, 467 TokenType.CROSS, 468 TokenType.SEMI, 469 TokenType.ANTI, 470 } 471 472 JOIN_HINTS: t.Set[str] = set() 473 474 LAMBDAS = { 475 TokenType.ARROW: lambda self, expressions: self.expression( 476 exp.Lambda, 477 this=self._replace_lambda( 478 self._parse_conjunction(), 479 {node.name for node in expressions}, 480 ), 481 expressions=expressions, 482 ), 483 TokenType.FARROW: lambda self, expressions: self.expression( 484 exp.Kwarg, 485 this=exp.var(expressions[0].name), 486 expression=self._parse_conjunction(), 487 ), 488 } 489 490 COLUMN_OPERATORS = { 491 TokenType.DOT: None, 492 TokenType.DCOLON: lambda self, this, to: self.expression( 493 exp.Cast if self.STRICT_CAST else exp.TryCast, 494 this=this, 495 to=to, 496 ), 497 TokenType.ARROW: lambda self, this, path: self.expression( 498 exp.JSONExtract, 499 this=this, 500 expression=path, 501 ), 502 TokenType.DARROW: lambda self, this, path: self.expression( 503 exp.JSONExtractScalar, 504 this=this, 505 expression=path, 506 ), 507 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 508 exp.JSONBExtract, 509 this=this, 510 expression=path, 511 ), 512 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 513 exp.JSONBExtractScalar, 514 this=this, 515 expression=path, 516 ), 517 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 518 exp.JSONBContains, 519 this=this, 520 expression=key, 521 ), 522 } 523 524 EXPRESSION_PARSERS = { 525 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 526 exp.Column: lambda self: self._parse_column(), 527 exp.Condition: lambda self: self._parse_conjunction(), 528 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 529 exp.Expression: lambda self: self._parse_statement(), 530 exp.From: lambda self: self._parse_from(), 531 exp.Group: lambda self: self._parse_group(), 532 exp.Having: lambda self: self._parse_having(), 533 exp.Identifier: lambda self: self._parse_id_var(), 534 exp.Join: lambda self: self._parse_join(), 535 exp.Lambda: lambda self: self._parse_lambda(), 536 exp.Lateral: lambda self: self._parse_lateral(), 537 exp.Limit: lambda self: self._parse_limit(), 538 exp.Offset: lambda self: self._parse_offset(), 539 exp.Order: lambda self: self._parse_order(), 540 exp.Ordered: lambda self: self._parse_ordered(), 541 exp.Properties: lambda self: self._parse_properties(), 542 exp.Qualify: lambda self: self._parse_qualify(), 543 exp.Returning: lambda self: self._parse_returning(), 544 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 545 exp.Table: lambda self: self._parse_table_parts(), 546 exp.TableAlias: lambda self: self._parse_table_alias(), 547 exp.Where: lambda self: self._parse_where(), 548 exp.Window: lambda self: self._parse_named_window(), 549 exp.With: lambda self: self._parse_with(), 550 "JOIN_TYPE": lambda self: self._parse_join_parts(), 551 } 552 553 STATEMENT_PARSERS = { 554 TokenType.ALTER: lambda self: self._parse_alter(), 555 TokenType.BEGIN: lambda self: self._parse_transaction(), 556 TokenType.CACHE: lambda self: self._parse_cache(), 557 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 558 TokenType.COMMENT: lambda self: self._parse_comment(), 559 TokenType.CREATE: lambda self: self._parse_create(), 560 TokenType.DELETE: lambda self: self._parse_delete(), 561 TokenType.DESC: lambda self: self._parse_describe(), 562 TokenType.DESCRIBE: lambda self: self._parse_describe(), 563 TokenType.DROP: lambda self: self._parse_drop(), 564 TokenType.INSERT: lambda self: self._parse_insert(), 565 TokenType.KILL: lambda self: self._parse_kill(), 566 TokenType.LOAD: lambda self: self._parse_load(), 567 TokenType.MERGE: lambda self: self._parse_merge(), 568 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 569 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 570 TokenType.REFRESH: lambda self: self._parse_refresh(), 571 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 572 TokenType.SET: lambda self: self._parse_set(), 573 TokenType.UNCACHE: lambda self: self._parse_uncache(), 574 TokenType.UPDATE: lambda self: self._parse_update(), 575 TokenType.USE: lambda self: self.expression( 576 exp.Use, 577 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 578 and exp.var(self._prev.text), 579 this=self._parse_table(schema=False), 580 ), 581 } 582 583 UNARY_PARSERS = { 584 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 585 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 586 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 587 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 588 } 589 590 PRIMARY_PARSERS = { 591 TokenType.STRING: lambda self, token: self.expression( 592 exp.Literal, this=token.text, is_string=True 593 ), 594 TokenType.NUMBER: lambda self, token: self.expression( 595 exp.Literal, this=token.text, is_string=False 596 ), 597 TokenType.STAR: lambda self, _: self.expression( 598 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 599 ), 600 TokenType.NULL: lambda self, _: self.expression(exp.Null), 601 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 602 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 603 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 604 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 605 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 606 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 607 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 608 exp.National, this=token.text 609 ), 610 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 611 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 612 exp.RawString, this=token.text 613 ), 614 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 615 } 616 617 PLACEHOLDER_PARSERS = { 618 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 619 TokenType.PARAMETER: lambda self: self._parse_parameter(), 620 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 621 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 622 else None, 623 } 624 625 RANGE_PARSERS = { 626 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 627 TokenType.GLOB: binary_range_parser(exp.Glob), 628 TokenType.ILIKE: binary_range_parser(exp.ILike), 629 TokenType.IN: lambda self, this: self._parse_in(this), 630 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 631 TokenType.IS: lambda self, this: self._parse_is(this), 632 TokenType.LIKE: binary_range_parser(exp.Like), 633 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 634 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 635 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 636 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 637 } 638 639 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 640 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 641 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 642 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 643 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 644 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 645 "CHECKSUM": lambda self: self._parse_checksum(), 646 "CLUSTER BY": lambda self: self._parse_cluster(), 647 "CLUSTERED": lambda self: self._parse_clustered_by(), 648 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 649 exp.CollateProperty, **kwargs 650 ), 651 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 652 "COPY": lambda self: self._parse_copy_property(), 653 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 654 "DEFINER": lambda self: self._parse_definer(), 655 "DETERMINISTIC": lambda self: self.expression( 656 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 657 ), 658 "DISTKEY": lambda self: self._parse_distkey(), 659 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 660 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 661 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 662 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 663 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 664 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 665 "FREESPACE": lambda self: self._parse_freespace(), 666 "HEAP": lambda self: self.expression(exp.HeapProperty), 667 "IMMUTABLE": lambda self: self.expression( 668 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 669 ), 670 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 671 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 672 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 673 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 674 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 675 "LIKE": lambda self: self._parse_create_like(), 676 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 677 "LOCK": lambda self: self._parse_locking(), 678 "LOCKING": lambda self: self._parse_locking(), 679 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 680 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 681 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 682 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 683 "NO": lambda self: self._parse_no_property(), 684 "ON": lambda self: self._parse_on_property(), 685 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 686 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 687 "PARTITION": lambda self: self._parse_partitioned_of(), 688 "PARTITION BY": lambda self: self._parse_partitioned_by(), 689 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 690 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 691 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 692 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 693 "REMOTE": lambda self: self._parse_remote_with_connection(), 694 "RETURNS": lambda self: self._parse_returns(), 695 "ROW": lambda self: self._parse_row(), 696 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 697 "SAMPLE": lambda self: self.expression( 698 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 699 ), 700 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 701 "SETTINGS": lambda self: self.expression( 702 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 703 ), 704 "SORTKEY": lambda self: self._parse_sortkey(), 705 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 706 "STABLE": lambda self: self.expression( 707 exp.StabilityProperty, this=exp.Literal.string("STABLE") 708 ), 709 "STORED": lambda self: self._parse_stored(), 710 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 711 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 712 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 713 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 714 "TO": lambda self: self._parse_to_table(), 715 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 716 "TRANSFORM": lambda self: self.expression( 717 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 718 ), 719 "TTL": lambda self: self._parse_ttl(), 720 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 721 "VOLATILE": lambda self: self._parse_volatile_property(), 722 "WITH": lambda self: self._parse_with_property(), 723 } 724 725 CONSTRAINT_PARSERS = { 726 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 727 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 728 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 729 "CHARACTER SET": lambda self: self.expression( 730 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 731 ), 732 "CHECK": lambda self: self.expression( 733 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 734 ), 735 "COLLATE": lambda self: self.expression( 736 exp.CollateColumnConstraint, this=self._parse_var() 737 ), 738 "COMMENT": lambda self: self.expression( 739 exp.CommentColumnConstraint, this=self._parse_string() 740 ), 741 "COMPRESS": lambda self: self._parse_compress(), 742 "CLUSTERED": lambda self: self.expression( 743 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 744 ), 745 "NONCLUSTERED": lambda self: self.expression( 746 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 747 ), 748 "DEFAULT": lambda self: self.expression( 749 exp.DefaultColumnConstraint, this=self._parse_bitwise() 750 ), 751 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 752 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 753 "FORMAT": lambda self: self.expression( 754 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 755 ), 756 "GENERATED": lambda self: self._parse_generated_as_identity(), 757 "IDENTITY": lambda self: self._parse_auto_increment(), 758 "INLINE": lambda self: self._parse_inline(), 759 "LIKE": lambda self: self._parse_create_like(), 760 "NOT": lambda self: self._parse_not_constraint(), 761 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 762 "ON": lambda self: ( 763 self._match(TokenType.UPDATE) 764 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 765 ) 766 or self.expression(exp.OnProperty, this=self._parse_id_var()), 767 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 768 "PERIOD": lambda self: self._parse_period_for_system_time(), 769 "PRIMARY KEY": lambda self: self._parse_primary_key(), 770 "REFERENCES": lambda self: self._parse_references(match=False), 771 "TITLE": lambda self: self.expression( 772 exp.TitleColumnConstraint, this=self._parse_var_or_string() 773 ), 774 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 775 "UNIQUE": lambda self: self._parse_unique(), 776 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 777 "WITH": lambda self: self.expression( 778 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 779 ), 780 } 781 782 ALTER_PARSERS = { 783 "ADD": lambda self: self._parse_alter_table_add(), 784 "ALTER": lambda self: self._parse_alter_table_alter(), 785 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 786 "DROP": lambda self: self._parse_alter_table_drop(), 787 "RENAME": lambda self: self._parse_alter_table_rename(), 788 } 789 790 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 791 792 NO_PAREN_FUNCTION_PARSERS = { 793 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 794 "CASE": lambda self: self._parse_case(), 795 "IF": lambda self: self._parse_if(), 796 "NEXT": lambda self: self._parse_next_value_for(), 797 } 798 799 INVALID_FUNC_NAME_TOKENS = { 800 TokenType.IDENTIFIER, 801 TokenType.STRING, 802 } 803 804 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 805 806 FUNCTION_PARSERS = { 807 "ANY_VALUE": lambda self: self._parse_any_value(), 808 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 809 "CONCAT": lambda self: self._parse_concat(), 810 "CONCAT_WS": lambda self: self._parse_concat_ws(), 811 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 812 "DECODE": lambda self: self._parse_decode(), 813 "EXTRACT": lambda self: self._parse_extract(), 814 "JSON_OBJECT": lambda self: self._parse_json_object(), 815 "JSON_TABLE": lambda self: self._parse_json_table(), 816 "LOG": lambda self: self._parse_logarithm(), 817 "MATCH": lambda self: self._parse_match_against(), 818 "OPENJSON": lambda self: self._parse_open_json(), 819 "POSITION": lambda self: self._parse_position(), 820 "PREDICT": lambda self: self._parse_predict(), 821 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 822 "STRING_AGG": lambda self: self._parse_string_agg(), 823 "SUBSTRING": lambda self: self._parse_substring(), 824 "TRIM": lambda self: self._parse_trim(), 825 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 826 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 827 } 828 829 QUERY_MODIFIER_PARSERS = { 830 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 831 TokenType.WHERE: lambda self: ("where", self._parse_where()), 832 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 833 TokenType.HAVING: lambda self: ("having", self._parse_having()), 834 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 835 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 836 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 837 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 838 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 839 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 840 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 841 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 842 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 843 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 844 TokenType.CLUSTER_BY: lambda self: ( 845 "cluster", 846 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 847 ), 848 TokenType.DISTRIBUTE_BY: lambda self: ( 849 "distribute", 850 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 851 ), 852 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 853 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 854 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 855 } 856 857 SET_PARSERS = { 858 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 859 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 860 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 861 "TRANSACTION": lambda self: self._parse_set_transaction(), 862 } 863 864 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 865 866 TYPE_LITERAL_PARSERS = { 867 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 868 } 869 870 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 871 872 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 873 874 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 875 876 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 877 TRANSACTION_CHARACTERISTICS = { 878 "ISOLATION LEVEL REPEATABLE READ", 879 "ISOLATION LEVEL READ COMMITTED", 880 "ISOLATION LEVEL READ UNCOMMITTED", 881 "ISOLATION LEVEL SERIALIZABLE", 882 "READ WRITE", 883 "READ ONLY", 884 } 885 886 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 887 888 CLONE_KEYWORDS = {"CLONE", "COPY"} 889 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 890 891 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 892 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 893 894 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 895 896 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 897 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 898 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 899 900 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 901 902 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 903 904 DISTINCT_TOKENS = {TokenType.DISTINCT} 905 906 NULL_TOKENS = {TokenType.NULL} 907 908 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 909 910 STRICT_CAST = True 911 912 # A NULL arg in CONCAT yields NULL by default 913 CONCAT_NULL_OUTPUTS_STRING = False 914 915 PREFIXED_PIVOT_COLUMNS = False 916 IDENTIFY_PIVOT_STRINGS = False 917 918 LOG_BASE_FIRST = True 919 LOG_DEFAULTS_TO_LN = False 920 921 # Whether or not ADD is present for each column added by ALTER TABLE 922 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 923 924 # Whether or not the table sample clause expects CSV syntax 925 TABLESAMPLE_CSV = False 926 927 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 928 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 929 930 # Whether the TRIM function expects the characters to trim as its first argument 931 TRIM_PATTERN_FIRST = False 932 933 # Whether the behavior of a / b depends on the types of a and b. 934 # False means a / b is always float division. 935 # True means a / b is integer division if both a and b are integers. 936 TYPED_DIVISION = False 937 938 # False means 1 / 0 throws an error. 939 # True means 1 / 0 returns null. 940 SAFE_DIVISION = False 941 942 __slots__ = ( 943 "error_level", 944 "error_message_context", 945 "max_errors", 946 "sql", 947 "errors", 948 "_tokens", 949 "_index", 950 "_curr", 951 "_next", 952 "_prev", 953 "_prev_comments", 954 "_tokenizer", 955 ) 956 957 # Autofilled 958 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 959 INDEX_OFFSET: int = 0 960 UNNEST_COLUMN_ONLY: bool = False 961 ALIAS_POST_TABLESAMPLE: bool = False 962 STRICT_STRING_CONCAT = False 963 DPIPE_IS_STRING_CONCAT = True 964 SUPPORTS_USER_DEFINED_TYPES = True 965 NORMALIZE_FUNCTIONS = "upper" 966 NULL_ORDERING: str = "nulls_are_small" 967 SHOW_TRIE: t.Dict = {} 968 SET_TRIE: t.Dict = {} 969 FORMAT_MAPPING: t.Dict[str, str] = {} 970 FORMAT_TRIE: t.Dict = {} 971 TIME_MAPPING: t.Dict[str, str] = {} 972 TIME_TRIE: t.Dict = {} 973 974 def __init__( 975 self, 976 error_level: t.Optional[ErrorLevel] = None, 977 error_message_context: int = 100, 978 max_errors: int = 3, 979 ): 980 self.error_level = error_level or ErrorLevel.IMMEDIATE 981 self.error_message_context = error_message_context 982 self.max_errors = max_errors 983 self._tokenizer = self.TOKENIZER_CLASS() 984 self.reset() 985 986 def reset(self): 987 self.sql = "" 988 self.errors = [] 989 self._tokens = [] 990 self._index = 0 991 self._curr = None 992 self._next = None 993 self._prev = None 994 self._prev_comments = None 995 996 def parse( 997 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 998 ) -> t.List[t.Optional[exp.Expression]]: 999 """ 1000 Parses a list of tokens and returns a list of syntax trees, one tree 1001 per parsed SQL statement. 1002 1003 Args: 1004 raw_tokens: The list of tokens. 1005 sql: The original SQL string, used to produce helpful debug messages. 1006 1007 Returns: 1008 The list of the produced syntax trees. 1009 """ 1010 return self._parse( 1011 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1012 ) 1013 1014 def parse_into( 1015 self, 1016 expression_types: exp.IntoType, 1017 raw_tokens: t.List[Token], 1018 sql: t.Optional[str] = None, 1019 ) -> t.List[t.Optional[exp.Expression]]: 1020 """ 1021 Parses a list of tokens into a given Expression type. If a collection of Expression 1022 types is given instead, this method will try to parse the token list into each one 1023 of them, stopping at the first for which the parsing succeeds. 1024 1025 Args: 1026 expression_types: The expression type(s) to try and parse the token list into. 1027 raw_tokens: The list of tokens. 1028 sql: The original SQL string, used to produce helpful debug messages. 1029 1030 Returns: 1031 The target Expression. 1032 """ 1033 errors = [] 1034 for expression_type in ensure_list(expression_types): 1035 parser = self.EXPRESSION_PARSERS.get(expression_type) 1036 if not parser: 1037 raise TypeError(f"No parser registered for {expression_type}") 1038 1039 try: 1040 return self._parse(parser, raw_tokens, sql) 1041 except ParseError as e: 1042 e.errors[0]["into_expression"] = expression_type 1043 errors.append(e) 1044 1045 raise ParseError( 1046 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1047 errors=merge_errors(errors), 1048 ) from errors[-1] 1049 1050 def _parse( 1051 self, 1052 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1053 raw_tokens: t.List[Token], 1054 sql: t.Optional[str] = None, 1055 ) -> t.List[t.Optional[exp.Expression]]: 1056 self.reset() 1057 self.sql = sql or "" 1058 1059 total = len(raw_tokens) 1060 chunks: t.List[t.List[Token]] = [[]] 1061 1062 for i, token in enumerate(raw_tokens): 1063 if token.token_type == TokenType.SEMICOLON: 1064 if i < total - 1: 1065 chunks.append([]) 1066 else: 1067 chunks[-1].append(token) 1068 1069 expressions = [] 1070 1071 for tokens in chunks: 1072 self._index = -1 1073 self._tokens = tokens 1074 self._advance() 1075 1076 expressions.append(parse_method(self)) 1077 1078 if self._index < len(self._tokens): 1079 self.raise_error("Invalid expression / Unexpected token") 1080 1081 self.check_errors() 1082 1083 return expressions 1084 1085 def check_errors(self) -> None: 1086 """Logs or raises any found errors, depending on the chosen error level setting.""" 1087 if self.error_level == ErrorLevel.WARN: 1088 for error in self.errors: 1089 logger.error(str(error)) 1090 elif self.error_level == ErrorLevel.RAISE and self.errors: 1091 raise ParseError( 1092 concat_messages(self.errors, self.max_errors), 1093 errors=merge_errors(self.errors), 1094 ) 1095 1096 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1097 """ 1098 Appends an error in the list of recorded errors or raises it, depending on the chosen 1099 error level setting. 1100 """ 1101 token = token or self._curr or self._prev or Token.string("") 1102 start = token.start 1103 end = token.end + 1 1104 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1105 highlight = self.sql[start:end] 1106 end_context = self.sql[end : end + self.error_message_context] 1107 1108 error = ParseError.new( 1109 f"{message}. Line {token.line}, Col: {token.col}.\n" 1110 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1111 description=message, 1112 line=token.line, 1113 col=token.col, 1114 start_context=start_context, 1115 highlight=highlight, 1116 end_context=end_context, 1117 ) 1118 1119 if self.error_level == ErrorLevel.IMMEDIATE: 1120 raise error 1121 1122 self.errors.append(error) 1123 1124 def expression( 1125 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1126 ) -> E: 1127 """ 1128 Creates a new, validated Expression. 1129 1130 Args: 1131 exp_class: The expression class to instantiate. 1132 comments: An optional list of comments to attach to the expression. 1133 kwargs: The arguments to set for the expression along with their respective values. 1134 1135 Returns: 1136 The target expression. 1137 """ 1138 instance = exp_class(**kwargs) 1139 instance.add_comments(comments) if comments else self._add_comments(instance) 1140 return self.validate_expression(instance) 1141 1142 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1143 if expression and self._prev_comments: 1144 expression.add_comments(self._prev_comments) 1145 self._prev_comments = None 1146 1147 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1148 """ 1149 Validates an Expression, making sure that all its mandatory arguments are set. 1150 1151 Args: 1152 expression: The expression to validate. 1153 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1154 1155 Returns: 1156 The validated expression. 1157 """ 1158 if self.error_level != ErrorLevel.IGNORE: 1159 for error_message in expression.error_messages(args): 1160 self.raise_error(error_message) 1161 1162 return expression 1163 1164 def _find_sql(self, start: Token, end: Token) -> str: 1165 return self.sql[start.start : end.end + 1] 1166 1167 def _advance(self, times: int = 1) -> None: 1168 self._index += times 1169 self._curr = seq_get(self._tokens, self._index) 1170 self._next = seq_get(self._tokens, self._index + 1) 1171 1172 if self._index > 0: 1173 self._prev = self._tokens[self._index - 1] 1174 self._prev_comments = self._prev.comments 1175 else: 1176 self._prev = None 1177 self._prev_comments = None 1178 1179 def _retreat(self, index: int) -> None: 1180 if index != self._index: 1181 self._advance(index - self._index) 1182 1183 def _parse_command(self) -> exp.Command: 1184 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1185 1186 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1187 start = self._prev 1188 exists = self._parse_exists() if allow_exists else None 1189 1190 self._match(TokenType.ON) 1191 1192 kind = self._match_set(self.CREATABLES) and self._prev 1193 if not kind: 1194 return self._parse_as_command(start) 1195 1196 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1197 this = self._parse_user_defined_function(kind=kind.token_type) 1198 elif kind.token_type == TokenType.TABLE: 1199 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1200 elif kind.token_type == TokenType.COLUMN: 1201 this = self._parse_column() 1202 else: 1203 this = self._parse_id_var() 1204 1205 self._match(TokenType.IS) 1206 1207 return self.expression( 1208 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1209 ) 1210 1211 def _parse_to_table( 1212 self, 1213 ) -> exp.ToTableProperty: 1214 table = self._parse_table_parts(schema=True) 1215 return self.expression(exp.ToTableProperty, this=table) 1216 1217 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1218 def _parse_ttl(self) -> exp.Expression: 1219 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1220 this = self._parse_bitwise() 1221 1222 if self._match_text_seq("DELETE"): 1223 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1224 if self._match_text_seq("RECOMPRESS"): 1225 return self.expression( 1226 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1227 ) 1228 if self._match_text_seq("TO", "DISK"): 1229 return self.expression( 1230 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1231 ) 1232 if self._match_text_seq("TO", "VOLUME"): 1233 return self.expression( 1234 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1235 ) 1236 1237 return this 1238 1239 expressions = self._parse_csv(_parse_ttl_action) 1240 where = self._parse_where() 1241 group = self._parse_group() 1242 1243 aggregates = None 1244 if group and self._match(TokenType.SET): 1245 aggregates = self._parse_csv(self._parse_set_item) 1246 1247 return self.expression( 1248 exp.MergeTreeTTL, 1249 expressions=expressions, 1250 where=where, 1251 group=group, 1252 aggregates=aggregates, 1253 ) 1254 1255 def _parse_statement(self) -> t.Optional[exp.Expression]: 1256 if self._curr is None: 1257 return None 1258 1259 if self._match_set(self.STATEMENT_PARSERS): 1260 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1261 1262 if self._match_set(Tokenizer.COMMANDS): 1263 return self._parse_command() 1264 1265 expression = self._parse_expression() 1266 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1267 return self._parse_query_modifiers(expression) 1268 1269 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1270 start = self._prev 1271 temporary = self._match(TokenType.TEMPORARY) 1272 materialized = self._match_text_seq("MATERIALIZED") 1273 1274 kind = self._match_set(self.CREATABLES) and self._prev.text 1275 if not kind: 1276 return self._parse_as_command(start) 1277 1278 return self.expression( 1279 exp.Drop, 1280 comments=start.comments, 1281 exists=exists or self._parse_exists(), 1282 this=self._parse_table(schema=True), 1283 kind=kind, 1284 temporary=temporary, 1285 materialized=materialized, 1286 cascade=self._match_text_seq("CASCADE"), 1287 constraints=self._match_text_seq("CONSTRAINTS"), 1288 purge=self._match_text_seq("PURGE"), 1289 ) 1290 1291 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1292 return ( 1293 self._match_text_seq("IF") 1294 and (not not_ or self._match(TokenType.NOT)) 1295 and self._match(TokenType.EXISTS) 1296 ) 1297 1298 def _parse_create(self) -> exp.Create | exp.Command: 1299 # Note: this can't be None because we've matched a statement parser 1300 start = self._prev 1301 comments = self._prev_comments 1302 1303 replace = start.text.upper() == "REPLACE" or self._match_pair( 1304 TokenType.OR, TokenType.REPLACE 1305 ) 1306 unique = self._match(TokenType.UNIQUE) 1307 1308 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1309 self._advance() 1310 1311 properties = None 1312 create_token = self._match_set(self.CREATABLES) and self._prev 1313 1314 if not create_token: 1315 # exp.Properties.Location.POST_CREATE 1316 properties = self._parse_properties() 1317 create_token = self._match_set(self.CREATABLES) and self._prev 1318 1319 if not properties or not create_token: 1320 return self._parse_as_command(start) 1321 1322 exists = self._parse_exists(not_=True) 1323 this = None 1324 expression: t.Optional[exp.Expression] = None 1325 indexes = None 1326 no_schema_binding = None 1327 begin = None 1328 end = None 1329 clone = None 1330 1331 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1332 nonlocal properties 1333 if properties and temp_props: 1334 properties.expressions.extend(temp_props.expressions) 1335 elif temp_props: 1336 properties = temp_props 1337 1338 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1339 this = self._parse_user_defined_function(kind=create_token.token_type) 1340 1341 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1342 extend_props(self._parse_properties()) 1343 1344 self._match(TokenType.ALIAS) 1345 1346 if self._match(TokenType.COMMAND): 1347 expression = self._parse_as_command(self._prev) 1348 else: 1349 begin = self._match(TokenType.BEGIN) 1350 return_ = self._match_text_seq("RETURN") 1351 1352 if self._match(TokenType.STRING, advance=False): 1353 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1354 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1355 expression = self._parse_string() 1356 extend_props(self._parse_properties()) 1357 else: 1358 expression = self._parse_statement() 1359 1360 end = self._match_text_seq("END") 1361 1362 if return_: 1363 expression = self.expression(exp.Return, this=expression) 1364 elif create_token.token_type == TokenType.INDEX: 1365 this = self._parse_index(index=self._parse_id_var()) 1366 elif create_token.token_type in self.DB_CREATABLES: 1367 table_parts = self._parse_table_parts(schema=True) 1368 1369 # exp.Properties.Location.POST_NAME 1370 self._match(TokenType.COMMA) 1371 extend_props(self._parse_properties(before=True)) 1372 1373 this = self._parse_schema(this=table_parts) 1374 1375 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1376 extend_props(self._parse_properties()) 1377 1378 self._match(TokenType.ALIAS) 1379 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1380 # exp.Properties.Location.POST_ALIAS 1381 extend_props(self._parse_properties()) 1382 1383 expression = self._parse_ddl_select() 1384 1385 if create_token.token_type == TokenType.TABLE: 1386 # exp.Properties.Location.POST_EXPRESSION 1387 extend_props(self._parse_properties()) 1388 1389 indexes = [] 1390 while True: 1391 index = self._parse_index() 1392 1393 # exp.Properties.Location.POST_INDEX 1394 extend_props(self._parse_properties()) 1395 1396 if not index: 1397 break 1398 else: 1399 self._match(TokenType.COMMA) 1400 indexes.append(index) 1401 elif create_token.token_type == TokenType.VIEW: 1402 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1403 no_schema_binding = True 1404 1405 shallow = self._match_text_seq("SHALLOW") 1406 1407 if self._match_texts(self.CLONE_KEYWORDS): 1408 copy = self._prev.text.lower() == "copy" 1409 clone = self._parse_table(schema=True) 1410 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1411 clone_kind = ( 1412 self._match(TokenType.L_PAREN) 1413 and self._match_texts(self.CLONE_KINDS) 1414 and self._prev.text.upper() 1415 ) 1416 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1417 self._match(TokenType.R_PAREN) 1418 clone = self.expression( 1419 exp.Clone, 1420 this=clone, 1421 when=when, 1422 kind=clone_kind, 1423 shallow=shallow, 1424 expression=clone_expression, 1425 copy=copy, 1426 ) 1427 1428 return self.expression( 1429 exp.Create, 1430 comments=comments, 1431 this=this, 1432 kind=create_token.text, 1433 replace=replace, 1434 unique=unique, 1435 expression=expression, 1436 exists=exists, 1437 properties=properties, 1438 indexes=indexes, 1439 no_schema_binding=no_schema_binding, 1440 begin=begin, 1441 end=end, 1442 clone=clone, 1443 ) 1444 1445 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1446 # only used for teradata currently 1447 self._match(TokenType.COMMA) 1448 1449 kwargs = { 1450 "no": self._match_text_seq("NO"), 1451 "dual": self._match_text_seq("DUAL"), 1452 "before": self._match_text_seq("BEFORE"), 1453 "default": self._match_text_seq("DEFAULT"), 1454 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1455 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1456 "after": self._match_text_seq("AFTER"), 1457 "minimum": self._match_texts(("MIN", "MINIMUM")), 1458 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1459 } 1460 1461 if self._match_texts(self.PROPERTY_PARSERS): 1462 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1463 try: 1464 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1465 except TypeError: 1466 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1467 1468 return None 1469 1470 def _parse_property(self) -> t.Optional[exp.Expression]: 1471 if self._match_texts(self.PROPERTY_PARSERS): 1472 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1473 1474 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1475 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1476 1477 if self._match_text_seq("COMPOUND", "SORTKEY"): 1478 return self._parse_sortkey(compound=True) 1479 1480 if self._match_text_seq("SQL", "SECURITY"): 1481 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1482 1483 index = self._index 1484 key = self._parse_column() 1485 1486 if not self._match(TokenType.EQ): 1487 self._retreat(index) 1488 return None 1489 1490 return self.expression( 1491 exp.Property, 1492 this=key.to_dot() if isinstance(key, exp.Column) else key, 1493 value=self._parse_column() or self._parse_var(any_token=True), 1494 ) 1495 1496 def _parse_stored(self) -> exp.FileFormatProperty: 1497 self._match(TokenType.ALIAS) 1498 1499 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1500 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1501 1502 return self.expression( 1503 exp.FileFormatProperty, 1504 this=self.expression( 1505 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1506 ) 1507 if input_format or output_format 1508 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1509 ) 1510 1511 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1512 self._match(TokenType.EQ) 1513 self._match(TokenType.ALIAS) 1514 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1515 1516 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1517 properties = [] 1518 while True: 1519 if before: 1520 prop = self._parse_property_before() 1521 else: 1522 prop = self._parse_property() 1523 1524 if not prop: 1525 break 1526 for p in ensure_list(prop): 1527 properties.append(p) 1528 1529 if properties: 1530 return self.expression(exp.Properties, expressions=properties) 1531 1532 return None 1533 1534 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1535 return self.expression( 1536 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1537 ) 1538 1539 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1540 if self._index >= 2: 1541 pre_volatile_token = self._tokens[self._index - 2] 1542 else: 1543 pre_volatile_token = None 1544 1545 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1546 return exp.VolatileProperty() 1547 1548 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1549 1550 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1551 self._match_pair(TokenType.EQ, TokenType.ON) 1552 1553 prop = self.expression(exp.WithSystemVersioningProperty) 1554 if self._match(TokenType.L_PAREN): 1555 self._match_text_seq("HISTORY_TABLE", "=") 1556 prop.set("this", self._parse_table_parts()) 1557 1558 if self._match(TokenType.COMMA): 1559 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1560 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1561 1562 self._match_r_paren() 1563 1564 return prop 1565 1566 def _parse_with_property( 1567 self, 1568 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1569 if self._match(TokenType.L_PAREN, advance=False): 1570 return self._parse_wrapped_csv(self._parse_property) 1571 1572 if self._match_text_seq("JOURNAL"): 1573 return self._parse_withjournaltable() 1574 1575 if self._match_text_seq("DATA"): 1576 return self._parse_withdata(no=False) 1577 elif self._match_text_seq("NO", "DATA"): 1578 return self._parse_withdata(no=True) 1579 1580 if not self._next: 1581 return None 1582 1583 return self._parse_withisolatedloading() 1584 1585 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1586 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1587 self._match(TokenType.EQ) 1588 1589 user = self._parse_id_var() 1590 self._match(TokenType.PARAMETER) 1591 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1592 1593 if not user or not host: 1594 return None 1595 1596 return exp.DefinerProperty(this=f"{user}@{host}") 1597 1598 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1599 self._match(TokenType.TABLE) 1600 self._match(TokenType.EQ) 1601 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1602 1603 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1604 return self.expression(exp.LogProperty, no=no) 1605 1606 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1607 return self.expression(exp.JournalProperty, **kwargs) 1608 1609 def _parse_checksum(self) -> exp.ChecksumProperty: 1610 self._match(TokenType.EQ) 1611 1612 on = None 1613 if self._match(TokenType.ON): 1614 on = True 1615 elif self._match_text_seq("OFF"): 1616 on = False 1617 1618 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1619 1620 def _parse_cluster(self) -> exp.Cluster: 1621 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1622 1623 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1624 self._match_text_seq("BY") 1625 1626 self._match_l_paren() 1627 expressions = self._parse_csv(self._parse_column) 1628 self._match_r_paren() 1629 1630 if self._match_text_seq("SORTED", "BY"): 1631 self._match_l_paren() 1632 sorted_by = self._parse_csv(self._parse_ordered) 1633 self._match_r_paren() 1634 else: 1635 sorted_by = None 1636 1637 self._match(TokenType.INTO) 1638 buckets = self._parse_number() 1639 self._match_text_seq("BUCKETS") 1640 1641 return self.expression( 1642 exp.ClusteredByProperty, 1643 expressions=expressions, 1644 sorted_by=sorted_by, 1645 buckets=buckets, 1646 ) 1647 1648 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1649 if not self._match_text_seq("GRANTS"): 1650 self._retreat(self._index - 1) 1651 return None 1652 1653 return self.expression(exp.CopyGrantsProperty) 1654 1655 def _parse_freespace(self) -> exp.FreespaceProperty: 1656 self._match(TokenType.EQ) 1657 return self.expression( 1658 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1659 ) 1660 1661 def _parse_mergeblockratio( 1662 self, no: bool = False, default: bool = False 1663 ) -> exp.MergeBlockRatioProperty: 1664 if self._match(TokenType.EQ): 1665 return self.expression( 1666 exp.MergeBlockRatioProperty, 1667 this=self._parse_number(), 1668 percent=self._match(TokenType.PERCENT), 1669 ) 1670 1671 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1672 1673 def _parse_datablocksize( 1674 self, 1675 default: t.Optional[bool] = None, 1676 minimum: t.Optional[bool] = None, 1677 maximum: t.Optional[bool] = None, 1678 ) -> exp.DataBlocksizeProperty: 1679 self._match(TokenType.EQ) 1680 size = self._parse_number() 1681 1682 units = None 1683 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1684 units = self._prev.text 1685 1686 return self.expression( 1687 exp.DataBlocksizeProperty, 1688 size=size, 1689 units=units, 1690 default=default, 1691 minimum=minimum, 1692 maximum=maximum, 1693 ) 1694 1695 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1696 self._match(TokenType.EQ) 1697 always = self._match_text_seq("ALWAYS") 1698 manual = self._match_text_seq("MANUAL") 1699 never = self._match_text_seq("NEVER") 1700 default = self._match_text_seq("DEFAULT") 1701 1702 autotemp = None 1703 if self._match_text_seq("AUTOTEMP"): 1704 autotemp = self._parse_schema() 1705 1706 return self.expression( 1707 exp.BlockCompressionProperty, 1708 always=always, 1709 manual=manual, 1710 never=never, 1711 default=default, 1712 autotemp=autotemp, 1713 ) 1714 1715 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1716 no = self._match_text_seq("NO") 1717 concurrent = self._match_text_seq("CONCURRENT") 1718 self._match_text_seq("ISOLATED", "LOADING") 1719 for_all = self._match_text_seq("FOR", "ALL") 1720 for_insert = self._match_text_seq("FOR", "INSERT") 1721 for_none = self._match_text_seq("FOR", "NONE") 1722 return self.expression( 1723 exp.IsolatedLoadingProperty, 1724 no=no, 1725 concurrent=concurrent, 1726 for_all=for_all, 1727 for_insert=for_insert, 1728 for_none=for_none, 1729 ) 1730 1731 def _parse_locking(self) -> exp.LockingProperty: 1732 if self._match(TokenType.TABLE): 1733 kind = "TABLE" 1734 elif self._match(TokenType.VIEW): 1735 kind = "VIEW" 1736 elif self._match(TokenType.ROW): 1737 kind = "ROW" 1738 elif self._match_text_seq("DATABASE"): 1739 kind = "DATABASE" 1740 else: 1741 kind = None 1742 1743 if kind in ("DATABASE", "TABLE", "VIEW"): 1744 this = self._parse_table_parts() 1745 else: 1746 this = None 1747 1748 if self._match(TokenType.FOR): 1749 for_or_in = "FOR" 1750 elif self._match(TokenType.IN): 1751 for_or_in = "IN" 1752 else: 1753 for_or_in = None 1754 1755 if self._match_text_seq("ACCESS"): 1756 lock_type = "ACCESS" 1757 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1758 lock_type = "EXCLUSIVE" 1759 elif self._match_text_seq("SHARE"): 1760 lock_type = "SHARE" 1761 elif self._match_text_seq("READ"): 1762 lock_type = "READ" 1763 elif self._match_text_seq("WRITE"): 1764 lock_type = "WRITE" 1765 elif self._match_text_seq("CHECKSUM"): 1766 lock_type = "CHECKSUM" 1767 else: 1768 lock_type = None 1769 1770 override = self._match_text_seq("OVERRIDE") 1771 1772 return self.expression( 1773 exp.LockingProperty, 1774 this=this, 1775 kind=kind, 1776 for_or_in=for_or_in, 1777 lock_type=lock_type, 1778 override=override, 1779 ) 1780 1781 def _parse_partition_by(self) -> t.List[exp.Expression]: 1782 if self._match(TokenType.PARTITION_BY): 1783 return self._parse_csv(self._parse_conjunction) 1784 return [] 1785 1786 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1787 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1788 if self._match_text_seq("MINVALUE"): 1789 return exp.var("MINVALUE") 1790 if self._match_text_seq("MAXVALUE"): 1791 return exp.var("MAXVALUE") 1792 return self._parse_bitwise() 1793 1794 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1795 expression = None 1796 from_expressions = None 1797 to_expressions = None 1798 1799 if self._match(TokenType.IN): 1800 this = self._parse_wrapped_csv(self._parse_bitwise) 1801 elif self._match(TokenType.FROM): 1802 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1803 self._match_text_seq("TO") 1804 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1805 elif self._match_text_seq("WITH", "(", "MODULUS"): 1806 this = self._parse_number() 1807 self._match_text_seq(",", "REMAINDER") 1808 expression = self._parse_number() 1809 self._match_r_paren() 1810 else: 1811 self.raise_error("Failed to parse partition bound spec.") 1812 1813 return self.expression( 1814 exp.PartitionBoundSpec, 1815 this=this, 1816 expression=expression, 1817 from_expressions=from_expressions, 1818 to_expressions=to_expressions, 1819 ) 1820 1821 # https://www.postgresql.org/docs/current/sql-createtable.html 1822 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1823 if not self._match_text_seq("OF"): 1824 self._retreat(self._index - 1) 1825 return None 1826 1827 this = self._parse_table(schema=True) 1828 1829 if self._match(TokenType.DEFAULT): 1830 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1831 elif self._match_text_seq("FOR", "VALUES"): 1832 expression = self._parse_partition_bound_spec() 1833 else: 1834 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1835 1836 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1837 1838 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1839 self._match(TokenType.EQ) 1840 return self.expression( 1841 exp.PartitionedByProperty, 1842 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1843 ) 1844 1845 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1846 if self._match_text_seq("AND", "STATISTICS"): 1847 statistics = True 1848 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1849 statistics = False 1850 else: 1851 statistics = None 1852 1853 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1854 1855 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1856 if self._match_text_seq("PRIMARY", "INDEX"): 1857 return exp.NoPrimaryIndexProperty() 1858 return None 1859 1860 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1861 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1862 return exp.OnCommitProperty() 1863 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1864 return exp.OnCommitProperty(delete=True) 1865 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1866 1867 def _parse_distkey(self) -> exp.DistKeyProperty: 1868 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1869 1870 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1871 table = self._parse_table(schema=True) 1872 1873 options = [] 1874 while self._match_texts(("INCLUDING", "EXCLUDING")): 1875 this = self._prev.text.upper() 1876 1877 id_var = self._parse_id_var() 1878 if not id_var: 1879 return None 1880 1881 options.append( 1882 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1883 ) 1884 1885 return self.expression(exp.LikeProperty, this=table, expressions=options) 1886 1887 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1888 return self.expression( 1889 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1890 ) 1891 1892 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1893 self._match(TokenType.EQ) 1894 return self.expression( 1895 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1896 ) 1897 1898 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1899 self._match_text_seq("WITH", "CONNECTION") 1900 return self.expression( 1901 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1902 ) 1903 1904 def _parse_returns(self) -> exp.ReturnsProperty: 1905 value: t.Optional[exp.Expression] 1906 is_table = self._match(TokenType.TABLE) 1907 1908 if is_table: 1909 if self._match(TokenType.LT): 1910 value = self.expression( 1911 exp.Schema, 1912 this="TABLE", 1913 expressions=self._parse_csv(self._parse_struct_types), 1914 ) 1915 if not self._match(TokenType.GT): 1916 self.raise_error("Expecting >") 1917 else: 1918 value = self._parse_schema(exp.var("TABLE")) 1919 else: 1920 value = self._parse_types() 1921 1922 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1923 1924 def _parse_describe(self) -> exp.Describe: 1925 kind = self._match_set(self.CREATABLES) and self._prev.text 1926 this = self._parse_table(schema=True) 1927 properties = self._parse_properties() 1928 expressions = properties.expressions if properties else None 1929 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1930 1931 def _parse_insert(self) -> exp.Insert: 1932 comments = ensure_list(self._prev_comments) 1933 overwrite = self._match(TokenType.OVERWRITE) 1934 ignore = self._match(TokenType.IGNORE) 1935 local = self._match_text_seq("LOCAL") 1936 alternative = None 1937 1938 if self._match_text_seq("DIRECTORY"): 1939 this: t.Optional[exp.Expression] = self.expression( 1940 exp.Directory, 1941 this=self._parse_var_or_string(), 1942 local=local, 1943 row_format=self._parse_row_format(match_row=True), 1944 ) 1945 else: 1946 if self._match(TokenType.OR): 1947 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1948 1949 self._match(TokenType.INTO) 1950 comments += ensure_list(self._prev_comments) 1951 self._match(TokenType.TABLE) 1952 this = self._parse_table(schema=True) 1953 1954 returning = self._parse_returning() 1955 1956 return self.expression( 1957 exp.Insert, 1958 comments=comments, 1959 this=this, 1960 by_name=self._match_text_seq("BY", "NAME"), 1961 exists=self._parse_exists(), 1962 partition=self._parse_partition(), 1963 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1964 and self._parse_conjunction(), 1965 expression=self._parse_ddl_select(), 1966 conflict=self._parse_on_conflict(), 1967 returning=returning or self._parse_returning(), 1968 overwrite=overwrite, 1969 alternative=alternative, 1970 ignore=ignore, 1971 ) 1972 1973 def _parse_kill(self) -> exp.Kill: 1974 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1975 1976 return self.expression( 1977 exp.Kill, 1978 this=self._parse_primary(), 1979 kind=kind, 1980 ) 1981 1982 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1983 conflict = self._match_text_seq("ON", "CONFLICT") 1984 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1985 1986 if not conflict and not duplicate: 1987 return None 1988 1989 nothing = None 1990 expressions = None 1991 key = None 1992 constraint = None 1993 1994 if conflict: 1995 if self._match_text_seq("ON", "CONSTRAINT"): 1996 constraint = self._parse_id_var() 1997 else: 1998 key = self._parse_csv(self._parse_value) 1999 2000 self._match_text_seq("DO") 2001 if self._match_text_seq("NOTHING"): 2002 nothing = True 2003 else: 2004 self._match(TokenType.UPDATE) 2005 self._match(TokenType.SET) 2006 expressions = self._parse_csv(self._parse_equality) 2007 2008 return self.expression( 2009 exp.OnConflict, 2010 duplicate=duplicate, 2011 expressions=expressions, 2012 nothing=nothing, 2013 key=key, 2014 constraint=constraint, 2015 ) 2016 2017 def _parse_returning(self) -> t.Optional[exp.Returning]: 2018 if not self._match(TokenType.RETURNING): 2019 return None 2020 return self.expression( 2021 exp.Returning, 2022 expressions=self._parse_csv(self._parse_expression), 2023 into=self._match(TokenType.INTO) and self._parse_table_part(), 2024 ) 2025 2026 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2027 if not self._match(TokenType.FORMAT): 2028 return None 2029 return self._parse_row_format() 2030 2031 def _parse_row_format( 2032 self, match_row: bool = False 2033 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2034 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2035 return None 2036 2037 if self._match_text_seq("SERDE"): 2038 this = self._parse_string() 2039 2040 serde_properties = None 2041 if self._match(TokenType.SERDE_PROPERTIES): 2042 serde_properties = self.expression( 2043 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2044 ) 2045 2046 return self.expression( 2047 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2048 ) 2049 2050 self._match_text_seq("DELIMITED") 2051 2052 kwargs = {} 2053 2054 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2055 kwargs["fields"] = self._parse_string() 2056 if self._match_text_seq("ESCAPED", "BY"): 2057 kwargs["escaped"] = self._parse_string() 2058 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2059 kwargs["collection_items"] = self._parse_string() 2060 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2061 kwargs["map_keys"] = self._parse_string() 2062 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2063 kwargs["lines"] = self._parse_string() 2064 if self._match_text_seq("NULL", "DEFINED", "AS"): 2065 kwargs["null"] = self._parse_string() 2066 2067 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2068 2069 def _parse_load(self) -> exp.LoadData | exp.Command: 2070 if self._match_text_seq("DATA"): 2071 local = self._match_text_seq("LOCAL") 2072 self._match_text_seq("INPATH") 2073 inpath = self._parse_string() 2074 overwrite = self._match(TokenType.OVERWRITE) 2075 self._match_pair(TokenType.INTO, TokenType.TABLE) 2076 2077 return self.expression( 2078 exp.LoadData, 2079 this=self._parse_table(schema=True), 2080 local=local, 2081 overwrite=overwrite, 2082 inpath=inpath, 2083 partition=self._parse_partition(), 2084 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2085 serde=self._match_text_seq("SERDE") and self._parse_string(), 2086 ) 2087 return self._parse_as_command(self._prev) 2088 2089 def _parse_delete(self) -> exp.Delete: 2090 # This handles MySQL's "Multiple-Table Syntax" 2091 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2092 tables = None 2093 comments = self._prev_comments 2094 if not self._match(TokenType.FROM, advance=False): 2095 tables = self._parse_csv(self._parse_table) or None 2096 2097 returning = self._parse_returning() 2098 2099 return self.expression( 2100 exp.Delete, 2101 comments=comments, 2102 tables=tables, 2103 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2104 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2105 where=self._parse_where(), 2106 returning=returning or self._parse_returning(), 2107 limit=self._parse_limit(), 2108 ) 2109 2110 def _parse_update(self) -> exp.Update: 2111 comments = self._prev_comments 2112 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2113 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2114 returning = self._parse_returning() 2115 return self.expression( 2116 exp.Update, 2117 comments=comments, 2118 **{ # type: ignore 2119 "this": this, 2120 "expressions": expressions, 2121 "from": self._parse_from(joins=True), 2122 "where": self._parse_where(), 2123 "returning": returning or self._parse_returning(), 2124 "order": self._parse_order(), 2125 "limit": self._parse_limit(), 2126 }, 2127 ) 2128 2129 def _parse_uncache(self) -> exp.Uncache: 2130 if not self._match(TokenType.TABLE): 2131 self.raise_error("Expecting TABLE after UNCACHE") 2132 2133 return self.expression( 2134 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2135 ) 2136 2137 def _parse_cache(self) -> exp.Cache: 2138 lazy = self._match_text_seq("LAZY") 2139 self._match(TokenType.TABLE) 2140 table = self._parse_table(schema=True) 2141 2142 options = [] 2143 if self._match_text_seq("OPTIONS"): 2144 self._match_l_paren() 2145 k = self._parse_string() 2146 self._match(TokenType.EQ) 2147 v = self._parse_string() 2148 options = [k, v] 2149 self._match_r_paren() 2150 2151 self._match(TokenType.ALIAS) 2152 return self.expression( 2153 exp.Cache, 2154 this=table, 2155 lazy=lazy, 2156 options=options, 2157 expression=self._parse_select(nested=True), 2158 ) 2159 2160 def _parse_partition(self) -> t.Optional[exp.Partition]: 2161 if not self._match(TokenType.PARTITION): 2162 return None 2163 2164 return self.expression( 2165 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2166 ) 2167 2168 def _parse_value(self) -> exp.Tuple: 2169 if self._match(TokenType.L_PAREN): 2170 expressions = self._parse_csv(self._parse_conjunction) 2171 self._match_r_paren() 2172 return self.expression(exp.Tuple, expressions=expressions) 2173 2174 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2175 # https://prestodb.io/docs/current/sql/values.html 2176 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2177 2178 def _parse_projections(self) -> t.List[exp.Expression]: 2179 return self._parse_expressions() 2180 2181 def _parse_select( 2182 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2183 ) -> t.Optional[exp.Expression]: 2184 cte = self._parse_with() 2185 2186 if cte: 2187 this = self._parse_statement() 2188 2189 if not this: 2190 self.raise_error("Failed to parse any statement following CTE") 2191 return cte 2192 2193 if "with" in this.arg_types: 2194 this.set("with", cte) 2195 else: 2196 self.raise_error(f"{this.key} does not support CTE") 2197 this = cte 2198 2199 return this 2200 2201 # duckdb supports leading with FROM x 2202 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2203 2204 if self._match(TokenType.SELECT): 2205 comments = self._prev_comments 2206 2207 hint = self._parse_hint() 2208 all_ = self._match(TokenType.ALL) 2209 distinct = self._match_set(self.DISTINCT_TOKENS) 2210 2211 kind = ( 2212 self._match(TokenType.ALIAS) 2213 and self._match_texts(("STRUCT", "VALUE")) 2214 and self._prev.text 2215 ) 2216 2217 if distinct: 2218 distinct = self.expression( 2219 exp.Distinct, 2220 on=self._parse_value() if self._match(TokenType.ON) else None, 2221 ) 2222 2223 if all_ and distinct: 2224 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2225 2226 limit = self._parse_limit(top=True) 2227 projections = self._parse_projections() 2228 2229 this = self.expression( 2230 exp.Select, 2231 kind=kind, 2232 hint=hint, 2233 distinct=distinct, 2234 expressions=projections, 2235 limit=limit, 2236 ) 2237 this.comments = comments 2238 2239 into = self._parse_into() 2240 if into: 2241 this.set("into", into) 2242 2243 if not from_: 2244 from_ = self._parse_from() 2245 2246 if from_: 2247 this.set("from", from_) 2248 2249 this = self._parse_query_modifiers(this) 2250 elif (table or nested) and self._match(TokenType.L_PAREN): 2251 if self._match(TokenType.PIVOT): 2252 this = self._parse_simplified_pivot() 2253 elif self._match(TokenType.FROM): 2254 this = exp.select("*").from_( 2255 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2256 ) 2257 else: 2258 this = self._parse_table() if table else self._parse_select(nested=True) 2259 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2260 2261 self._match_r_paren() 2262 2263 # We return early here so that the UNION isn't attached to the subquery by the 2264 # following call to _parse_set_operations, but instead becomes the parent node 2265 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2266 elif self._match(TokenType.VALUES): 2267 this = self.expression( 2268 exp.Values, 2269 expressions=self._parse_csv(self._parse_value), 2270 alias=self._parse_table_alias(), 2271 ) 2272 elif from_: 2273 this = exp.select("*").from_(from_.this, copy=False) 2274 else: 2275 this = None 2276 2277 return self._parse_set_operations(this) 2278 2279 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2280 if not skip_with_token and not self._match(TokenType.WITH): 2281 return None 2282 2283 comments = self._prev_comments 2284 recursive = self._match(TokenType.RECURSIVE) 2285 2286 expressions = [] 2287 while True: 2288 expressions.append(self._parse_cte()) 2289 2290 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2291 break 2292 else: 2293 self._match(TokenType.WITH) 2294 2295 return self.expression( 2296 exp.With, comments=comments, expressions=expressions, recursive=recursive 2297 ) 2298 2299 def _parse_cte(self) -> exp.CTE: 2300 alias = self._parse_table_alias() 2301 if not alias or not alias.this: 2302 self.raise_error("Expected CTE to have alias") 2303 2304 self._match(TokenType.ALIAS) 2305 return self.expression( 2306 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2307 ) 2308 2309 def _parse_table_alias( 2310 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2311 ) -> t.Optional[exp.TableAlias]: 2312 any_token = self._match(TokenType.ALIAS) 2313 alias = ( 2314 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2315 or self._parse_string_as_identifier() 2316 ) 2317 2318 index = self._index 2319 if self._match(TokenType.L_PAREN): 2320 columns = self._parse_csv(self._parse_function_parameter) 2321 self._match_r_paren() if columns else self._retreat(index) 2322 else: 2323 columns = None 2324 2325 if not alias and not columns: 2326 return None 2327 2328 return self.expression(exp.TableAlias, this=alias, columns=columns) 2329 2330 def _parse_subquery( 2331 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2332 ) -> t.Optional[exp.Subquery]: 2333 if not this: 2334 return None 2335 2336 return self.expression( 2337 exp.Subquery, 2338 this=this, 2339 pivots=self._parse_pivots(), 2340 alias=self._parse_table_alias() if parse_alias else None, 2341 ) 2342 2343 def _parse_query_modifiers( 2344 self, this: t.Optional[exp.Expression] 2345 ) -> t.Optional[exp.Expression]: 2346 if isinstance(this, self.MODIFIABLES): 2347 for join in iter(self._parse_join, None): 2348 this.append("joins", join) 2349 for lateral in iter(self._parse_lateral, None): 2350 this.append("laterals", lateral) 2351 2352 while True: 2353 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2354 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2355 key, expression = parser(self) 2356 2357 if expression: 2358 this.set(key, expression) 2359 if key == "limit": 2360 offset = expression.args.pop("offset", None) 2361 if offset: 2362 this.set("offset", exp.Offset(expression=offset)) 2363 continue 2364 break 2365 return this 2366 2367 def _parse_hint(self) -> t.Optional[exp.Hint]: 2368 if self._match(TokenType.HINT): 2369 hints = [] 2370 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2371 hints.extend(hint) 2372 2373 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2374 self.raise_error("Expected */ after HINT") 2375 2376 return self.expression(exp.Hint, expressions=hints) 2377 2378 return None 2379 2380 def _parse_into(self) -> t.Optional[exp.Into]: 2381 if not self._match(TokenType.INTO): 2382 return None 2383 2384 temp = self._match(TokenType.TEMPORARY) 2385 unlogged = self._match_text_seq("UNLOGGED") 2386 self._match(TokenType.TABLE) 2387 2388 return self.expression( 2389 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2390 ) 2391 2392 def _parse_from( 2393 self, joins: bool = False, skip_from_token: bool = False 2394 ) -> t.Optional[exp.From]: 2395 if not skip_from_token and not self._match(TokenType.FROM): 2396 return None 2397 2398 return self.expression( 2399 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2400 ) 2401 2402 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2403 if not self._match(TokenType.MATCH_RECOGNIZE): 2404 return None 2405 2406 self._match_l_paren() 2407 2408 partition = self._parse_partition_by() 2409 order = self._parse_order() 2410 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2411 2412 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2413 rows = exp.var("ONE ROW PER MATCH") 2414 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2415 text = "ALL ROWS PER MATCH" 2416 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2417 text += f" SHOW EMPTY MATCHES" 2418 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2419 text += f" OMIT EMPTY MATCHES" 2420 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2421 text += f" WITH UNMATCHED ROWS" 2422 rows = exp.var(text) 2423 else: 2424 rows = None 2425 2426 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2427 text = "AFTER MATCH SKIP" 2428 if self._match_text_seq("PAST", "LAST", "ROW"): 2429 text += f" PAST LAST ROW" 2430 elif self._match_text_seq("TO", "NEXT", "ROW"): 2431 text += f" TO NEXT ROW" 2432 elif self._match_text_seq("TO", "FIRST"): 2433 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2434 elif self._match_text_seq("TO", "LAST"): 2435 text += f" TO LAST {self._advance_any().text}" # type: ignore 2436 after = exp.var(text) 2437 else: 2438 after = None 2439 2440 if self._match_text_seq("PATTERN"): 2441 self._match_l_paren() 2442 2443 if not self._curr: 2444 self.raise_error("Expecting )", self._curr) 2445 2446 paren = 1 2447 start = self._curr 2448 2449 while self._curr and paren > 0: 2450 if self._curr.token_type == TokenType.L_PAREN: 2451 paren += 1 2452 if self._curr.token_type == TokenType.R_PAREN: 2453 paren -= 1 2454 2455 end = self._prev 2456 self._advance() 2457 2458 if paren > 0: 2459 self.raise_error("Expecting )", self._curr) 2460 2461 pattern = exp.var(self._find_sql(start, end)) 2462 else: 2463 pattern = None 2464 2465 define = ( 2466 self._parse_csv( 2467 lambda: self.expression( 2468 exp.Alias, 2469 alias=self._parse_id_var(any_token=True), 2470 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2471 ) 2472 ) 2473 if self._match_text_seq("DEFINE") 2474 else None 2475 ) 2476 2477 self._match_r_paren() 2478 2479 return self.expression( 2480 exp.MatchRecognize, 2481 partition_by=partition, 2482 order=order, 2483 measures=measures, 2484 rows=rows, 2485 after=after, 2486 pattern=pattern, 2487 define=define, 2488 alias=self._parse_table_alias(), 2489 ) 2490 2491 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2492 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2493 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2494 2495 if outer_apply or cross_apply: 2496 this = self._parse_select(table=True) 2497 view = None 2498 outer = not cross_apply 2499 elif self._match(TokenType.LATERAL): 2500 this = self._parse_select(table=True) 2501 view = self._match(TokenType.VIEW) 2502 outer = self._match(TokenType.OUTER) 2503 else: 2504 return None 2505 2506 if not this: 2507 this = ( 2508 self._parse_unnest() 2509 or self._parse_function() 2510 or self._parse_id_var(any_token=False) 2511 ) 2512 2513 while self._match(TokenType.DOT): 2514 this = exp.Dot( 2515 this=this, 2516 expression=self._parse_function() or self._parse_id_var(any_token=False), 2517 ) 2518 2519 if view: 2520 table = self._parse_id_var(any_token=False) 2521 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2522 table_alias: t.Optional[exp.TableAlias] = self.expression( 2523 exp.TableAlias, this=table, columns=columns 2524 ) 2525 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2526 # We move the alias from the lateral's child node to the lateral itself 2527 table_alias = this.args["alias"].pop() 2528 else: 2529 table_alias = self._parse_table_alias() 2530 2531 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2532 2533 def _parse_join_parts( 2534 self, 2535 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2536 return ( 2537 self._match_set(self.JOIN_METHODS) and self._prev, 2538 self._match_set(self.JOIN_SIDES) and self._prev, 2539 self._match_set(self.JOIN_KINDS) and self._prev, 2540 ) 2541 2542 def _parse_join( 2543 self, skip_join_token: bool = False, parse_bracket: bool = False 2544 ) -> t.Optional[exp.Join]: 2545 if self._match(TokenType.COMMA): 2546 return self.expression(exp.Join, this=self._parse_table()) 2547 2548 index = self._index 2549 method, side, kind = self._parse_join_parts() 2550 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2551 join = self._match(TokenType.JOIN) 2552 2553 if not skip_join_token and not join: 2554 self._retreat(index) 2555 kind = None 2556 method = None 2557 side = None 2558 2559 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2560 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2561 2562 if not skip_join_token and not join and not outer_apply and not cross_apply: 2563 return None 2564 2565 if outer_apply: 2566 side = Token(TokenType.LEFT, "LEFT") 2567 2568 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2569 2570 if method: 2571 kwargs["method"] = method.text 2572 if side: 2573 kwargs["side"] = side.text 2574 if kind: 2575 kwargs["kind"] = kind.text 2576 if hint: 2577 kwargs["hint"] = hint 2578 2579 if self._match(TokenType.ON): 2580 kwargs["on"] = self._parse_conjunction() 2581 elif self._match(TokenType.USING): 2582 kwargs["using"] = self._parse_wrapped_id_vars() 2583 elif not (kind and kind.token_type == TokenType.CROSS): 2584 index = self._index 2585 join = self._parse_join() 2586 2587 if join and self._match(TokenType.ON): 2588 kwargs["on"] = self._parse_conjunction() 2589 elif join and self._match(TokenType.USING): 2590 kwargs["using"] = self._parse_wrapped_id_vars() 2591 else: 2592 join = None 2593 self._retreat(index) 2594 2595 kwargs["this"].set("joins", [join] if join else None) 2596 2597 comments = [c for token in (method, side, kind) if token for c in token.comments] 2598 return self.expression(exp.Join, comments=comments, **kwargs) 2599 2600 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2601 this = self._parse_conjunction() 2602 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2603 return this 2604 2605 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2606 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2607 2608 return this 2609 2610 def _parse_index( 2611 self, 2612 index: t.Optional[exp.Expression] = None, 2613 ) -> t.Optional[exp.Index]: 2614 if index: 2615 unique = None 2616 primary = None 2617 amp = None 2618 2619 self._match(TokenType.ON) 2620 self._match(TokenType.TABLE) # hive 2621 table = self._parse_table_parts(schema=True) 2622 else: 2623 unique = self._match(TokenType.UNIQUE) 2624 primary = self._match_text_seq("PRIMARY") 2625 amp = self._match_text_seq("AMP") 2626 2627 if not self._match(TokenType.INDEX): 2628 return None 2629 2630 index = self._parse_id_var() 2631 table = None 2632 2633 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2634 2635 if self._match(TokenType.L_PAREN, advance=False): 2636 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2637 else: 2638 columns = None 2639 2640 return self.expression( 2641 exp.Index, 2642 this=index, 2643 table=table, 2644 using=using, 2645 columns=columns, 2646 unique=unique, 2647 primary=primary, 2648 amp=amp, 2649 partition_by=self._parse_partition_by(), 2650 where=self._parse_where(), 2651 ) 2652 2653 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2654 hints: t.List[exp.Expression] = [] 2655 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2656 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2657 hints.append( 2658 self.expression( 2659 exp.WithTableHint, 2660 expressions=self._parse_csv( 2661 lambda: self._parse_function() or self._parse_var(any_token=True) 2662 ), 2663 ) 2664 ) 2665 self._match_r_paren() 2666 else: 2667 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2668 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2669 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2670 2671 self._match_texts(("INDEX", "KEY")) 2672 if self._match(TokenType.FOR): 2673 hint.set("target", self._advance_any() and self._prev.text.upper()) 2674 2675 hint.set("expressions", self._parse_wrapped_id_vars()) 2676 hints.append(hint) 2677 2678 return hints or None 2679 2680 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2681 return ( 2682 (not schema and self._parse_function(optional_parens=False)) 2683 or self._parse_id_var(any_token=False) 2684 or self._parse_string_as_identifier() 2685 or self._parse_placeholder() 2686 ) 2687 2688 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2689 catalog = None 2690 db = None 2691 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2692 2693 while self._match(TokenType.DOT): 2694 if catalog: 2695 # This allows nesting the table in arbitrarily many dot expressions if needed 2696 table = self.expression( 2697 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2698 ) 2699 else: 2700 catalog = db 2701 db = table 2702 table = self._parse_table_part(schema=schema) or "" 2703 2704 if not table: 2705 self.raise_error(f"Expected table name but got {self._curr}") 2706 2707 return self.expression( 2708 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2709 ) 2710 2711 def _parse_table( 2712 self, 2713 schema: bool = False, 2714 joins: bool = False, 2715 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2716 parse_bracket: bool = False, 2717 ) -> t.Optional[exp.Expression]: 2718 lateral = self._parse_lateral() 2719 if lateral: 2720 return lateral 2721 2722 unnest = self._parse_unnest() 2723 if unnest: 2724 return unnest 2725 2726 values = self._parse_derived_table_values() 2727 if values: 2728 return values 2729 2730 subquery = self._parse_select(table=True) 2731 if subquery: 2732 if not subquery.args.get("pivots"): 2733 subquery.set("pivots", self._parse_pivots()) 2734 return subquery 2735 2736 bracket = parse_bracket and self._parse_bracket(None) 2737 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2738 this = t.cast( 2739 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2740 ) 2741 2742 if schema: 2743 return self._parse_schema(this=this) 2744 2745 version = self._parse_version() 2746 2747 if version: 2748 this.set("version", version) 2749 2750 if self.ALIAS_POST_TABLESAMPLE: 2751 table_sample = self._parse_table_sample() 2752 2753 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2754 if alias: 2755 this.set("alias", alias) 2756 2757 if self._match_text_seq("AT"): 2758 this.set("index", self._parse_id_var()) 2759 2760 this.set("hints", self._parse_table_hints()) 2761 2762 if not this.args.get("pivots"): 2763 this.set("pivots", self._parse_pivots()) 2764 2765 if not self.ALIAS_POST_TABLESAMPLE: 2766 table_sample = self._parse_table_sample() 2767 2768 if table_sample: 2769 table_sample.set("this", this) 2770 this = table_sample 2771 2772 if joins: 2773 for join in iter(self._parse_join, None): 2774 this.append("joins", join) 2775 2776 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2777 this.set("ordinality", True) 2778 this.set("alias", self._parse_table_alias()) 2779 2780 return this 2781 2782 def _parse_version(self) -> t.Optional[exp.Version]: 2783 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2784 this = "TIMESTAMP" 2785 elif self._match(TokenType.VERSION_SNAPSHOT): 2786 this = "VERSION" 2787 else: 2788 return None 2789 2790 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2791 kind = self._prev.text.upper() 2792 start = self._parse_bitwise() 2793 self._match_texts(("TO", "AND")) 2794 end = self._parse_bitwise() 2795 expression: t.Optional[exp.Expression] = self.expression( 2796 exp.Tuple, expressions=[start, end] 2797 ) 2798 elif self._match_text_seq("CONTAINED", "IN"): 2799 kind = "CONTAINED IN" 2800 expression = self.expression( 2801 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2802 ) 2803 elif self._match(TokenType.ALL): 2804 kind = "ALL" 2805 expression = None 2806 else: 2807 self._match_text_seq("AS", "OF") 2808 kind = "AS OF" 2809 expression = self._parse_type() 2810 2811 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2812 2813 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2814 if not self._match(TokenType.UNNEST): 2815 return None 2816 2817 expressions = self._parse_wrapped_csv(self._parse_equality) 2818 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2819 2820 alias = self._parse_table_alias() if with_alias else None 2821 2822 if alias: 2823 if self.UNNEST_COLUMN_ONLY: 2824 if alias.args.get("columns"): 2825 self.raise_error("Unexpected extra column alias in unnest.") 2826 2827 alias.set("columns", [alias.this]) 2828 alias.set("this", None) 2829 2830 columns = alias.args.get("columns") or [] 2831 if offset and len(expressions) < len(columns): 2832 offset = columns.pop() 2833 2834 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2835 self._match(TokenType.ALIAS) 2836 offset = self._parse_id_var( 2837 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2838 ) or exp.to_identifier("offset") 2839 2840 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2841 2842 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2843 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2844 if not is_derived and not self._match(TokenType.VALUES): 2845 return None 2846 2847 expressions = self._parse_csv(self._parse_value) 2848 alias = self._parse_table_alias() 2849 2850 if is_derived: 2851 self._match_r_paren() 2852 2853 return self.expression( 2854 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2855 ) 2856 2857 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2858 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2859 as_modifier and self._match_text_seq("USING", "SAMPLE") 2860 ): 2861 return None 2862 2863 bucket_numerator = None 2864 bucket_denominator = None 2865 bucket_field = None 2866 percent = None 2867 rows = None 2868 size = None 2869 seed = None 2870 2871 kind = ( 2872 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2873 ) 2874 method = self._parse_var(tokens=(TokenType.ROW,)) 2875 2876 matched_l_paren = self._match(TokenType.L_PAREN) 2877 2878 if self.TABLESAMPLE_CSV: 2879 num = None 2880 expressions = self._parse_csv(self._parse_primary) 2881 else: 2882 expressions = None 2883 num = ( 2884 self._parse_factor() 2885 if self._match(TokenType.NUMBER, advance=False) 2886 else self._parse_primary() or self._parse_placeholder() 2887 ) 2888 2889 if self._match_text_seq("BUCKET"): 2890 bucket_numerator = self._parse_number() 2891 self._match_text_seq("OUT", "OF") 2892 bucket_denominator = bucket_denominator = self._parse_number() 2893 self._match(TokenType.ON) 2894 bucket_field = self._parse_field() 2895 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2896 percent = num 2897 elif self._match(TokenType.ROWS): 2898 rows = num 2899 elif num: 2900 size = num 2901 2902 if matched_l_paren: 2903 self._match_r_paren() 2904 2905 if self._match(TokenType.L_PAREN): 2906 method = self._parse_var() 2907 seed = self._match(TokenType.COMMA) and self._parse_number() 2908 self._match_r_paren() 2909 elif self._match_texts(("SEED", "REPEATABLE")): 2910 seed = self._parse_wrapped(self._parse_number) 2911 2912 return self.expression( 2913 exp.TableSample, 2914 expressions=expressions, 2915 method=method, 2916 bucket_numerator=bucket_numerator, 2917 bucket_denominator=bucket_denominator, 2918 bucket_field=bucket_field, 2919 percent=percent, 2920 rows=rows, 2921 size=size, 2922 seed=seed, 2923 kind=kind, 2924 ) 2925 2926 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2927 return list(iter(self._parse_pivot, None)) or None 2928 2929 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2930 return list(iter(self._parse_join, None)) or None 2931 2932 # https://duckdb.org/docs/sql/statements/pivot 2933 def _parse_simplified_pivot(self) -> exp.Pivot: 2934 def _parse_on() -> t.Optional[exp.Expression]: 2935 this = self._parse_bitwise() 2936 return self._parse_in(this) if self._match(TokenType.IN) else this 2937 2938 this = self._parse_table() 2939 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2940 using = self._match(TokenType.USING) and self._parse_csv( 2941 lambda: self._parse_alias(self._parse_function()) 2942 ) 2943 group = self._parse_group() 2944 return self.expression( 2945 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2946 ) 2947 2948 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2949 index = self._index 2950 include_nulls = None 2951 2952 if self._match(TokenType.PIVOT): 2953 unpivot = False 2954 elif self._match(TokenType.UNPIVOT): 2955 unpivot = True 2956 2957 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2958 if self._match_text_seq("INCLUDE", "NULLS"): 2959 include_nulls = True 2960 elif self._match_text_seq("EXCLUDE", "NULLS"): 2961 include_nulls = False 2962 else: 2963 return None 2964 2965 expressions = [] 2966 field = None 2967 2968 if not self._match(TokenType.L_PAREN): 2969 self._retreat(index) 2970 return None 2971 2972 if unpivot: 2973 expressions = self._parse_csv(self._parse_column) 2974 else: 2975 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2976 2977 if not expressions: 2978 self.raise_error("Failed to parse PIVOT's aggregation list") 2979 2980 if not self._match(TokenType.FOR): 2981 self.raise_error("Expecting FOR") 2982 2983 value = self._parse_column() 2984 2985 if not self._match(TokenType.IN): 2986 self.raise_error("Expecting IN") 2987 2988 field = self._parse_in(value, alias=True) 2989 2990 self._match_r_paren() 2991 2992 pivot = self.expression( 2993 exp.Pivot, 2994 expressions=expressions, 2995 field=field, 2996 unpivot=unpivot, 2997 include_nulls=include_nulls, 2998 ) 2999 3000 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3001 pivot.set("alias", self._parse_table_alias()) 3002 3003 if not unpivot: 3004 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3005 3006 columns: t.List[exp.Expression] = [] 3007 for fld in pivot.args["field"].expressions: 3008 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3009 for name in names: 3010 if self.PREFIXED_PIVOT_COLUMNS: 3011 name = f"{name}_{field_name}" if name else field_name 3012 else: 3013 name = f"{field_name}_{name}" if name else field_name 3014 3015 columns.append(exp.to_identifier(name)) 3016 3017 pivot.set("columns", columns) 3018 3019 return pivot 3020 3021 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3022 return [agg.alias for agg in aggregations] 3023 3024 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3025 if not skip_where_token and not self._match(TokenType.WHERE): 3026 return None 3027 3028 return self.expression( 3029 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3030 ) 3031 3032 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3033 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3034 return None 3035 3036 elements = defaultdict(list) 3037 3038 if self._match(TokenType.ALL): 3039 return self.expression(exp.Group, all=True) 3040 3041 while True: 3042 expressions = self._parse_csv(self._parse_conjunction) 3043 if expressions: 3044 elements["expressions"].extend(expressions) 3045 3046 grouping_sets = self._parse_grouping_sets() 3047 if grouping_sets: 3048 elements["grouping_sets"].extend(grouping_sets) 3049 3050 rollup = None 3051 cube = None 3052 totals = None 3053 3054 index = self._index 3055 with_ = self._match(TokenType.WITH) 3056 if self._match(TokenType.ROLLUP): 3057 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3058 elements["rollup"].extend(ensure_list(rollup)) 3059 3060 if self._match(TokenType.CUBE): 3061 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3062 elements["cube"].extend(ensure_list(cube)) 3063 3064 if self._match_text_seq("TOTALS"): 3065 totals = True 3066 elements["totals"] = True # type: ignore 3067 3068 if not (grouping_sets or rollup or cube or totals): 3069 if with_: 3070 self._retreat(index) 3071 break 3072 3073 return self.expression(exp.Group, **elements) # type: ignore 3074 3075 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3076 if not self._match(TokenType.GROUPING_SETS): 3077 return None 3078 3079 return self._parse_wrapped_csv(self._parse_grouping_set) 3080 3081 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3082 if self._match(TokenType.L_PAREN): 3083 grouping_set = self._parse_csv(self._parse_column) 3084 self._match_r_paren() 3085 return self.expression(exp.Tuple, expressions=grouping_set) 3086 3087 return self._parse_column() 3088 3089 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3090 if not skip_having_token and not self._match(TokenType.HAVING): 3091 return None 3092 return self.expression(exp.Having, this=self._parse_conjunction()) 3093 3094 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3095 if not self._match(TokenType.QUALIFY): 3096 return None 3097 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3098 3099 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3100 if skip_start_token: 3101 start = None 3102 elif self._match(TokenType.START_WITH): 3103 start = self._parse_conjunction() 3104 else: 3105 return None 3106 3107 self._match(TokenType.CONNECT_BY) 3108 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3109 exp.Prior, this=self._parse_bitwise() 3110 ) 3111 connect = self._parse_conjunction() 3112 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3113 3114 if not start and self._match(TokenType.START_WITH): 3115 start = self._parse_conjunction() 3116 3117 return self.expression(exp.Connect, start=start, connect=connect) 3118 3119 def _parse_order( 3120 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3121 ) -> t.Optional[exp.Expression]: 3122 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3123 return this 3124 3125 return self.expression( 3126 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3127 ) 3128 3129 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3130 if not self._match(token): 3131 return None 3132 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3133 3134 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3135 this = parse_method() if parse_method else self._parse_conjunction() 3136 3137 asc = self._match(TokenType.ASC) 3138 desc = self._match(TokenType.DESC) or (asc and False) 3139 3140 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3141 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3142 3143 nulls_first = is_nulls_first or False 3144 explicitly_null_ordered = is_nulls_first or is_nulls_last 3145 3146 if ( 3147 not explicitly_null_ordered 3148 and ( 3149 (not desc and self.NULL_ORDERING == "nulls_are_small") 3150 or (desc and self.NULL_ORDERING != "nulls_are_small") 3151 ) 3152 and self.NULL_ORDERING != "nulls_are_last" 3153 ): 3154 nulls_first = True 3155 3156 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3157 3158 def _parse_limit( 3159 self, this: t.Optional[exp.Expression] = None, top: bool = False 3160 ) -> t.Optional[exp.Expression]: 3161 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3162 comments = self._prev_comments 3163 if top: 3164 limit_paren = self._match(TokenType.L_PAREN) 3165 expression = self._parse_term() if limit_paren else self._parse_number() 3166 3167 if limit_paren: 3168 self._match_r_paren() 3169 else: 3170 expression = self._parse_term() 3171 3172 if self._match(TokenType.COMMA): 3173 offset = expression 3174 expression = self._parse_term() 3175 else: 3176 offset = None 3177 3178 limit_exp = self.expression( 3179 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3180 ) 3181 3182 return limit_exp 3183 3184 if self._match(TokenType.FETCH): 3185 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3186 direction = self._prev.text if direction else "FIRST" 3187 3188 count = self._parse_field(tokens=self.FETCH_TOKENS) 3189 percent = self._match(TokenType.PERCENT) 3190 3191 self._match_set((TokenType.ROW, TokenType.ROWS)) 3192 3193 only = self._match_text_seq("ONLY") 3194 with_ties = self._match_text_seq("WITH", "TIES") 3195 3196 if only and with_ties: 3197 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3198 3199 return self.expression( 3200 exp.Fetch, 3201 direction=direction, 3202 count=count, 3203 percent=percent, 3204 with_ties=with_ties, 3205 ) 3206 3207 return this 3208 3209 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3210 if not self._match(TokenType.OFFSET): 3211 return this 3212 3213 count = self._parse_term() 3214 self._match_set((TokenType.ROW, TokenType.ROWS)) 3215 return self.expression(exp.Offset, this=this, expression=count) 3216 3217 def _parse_locks(self) -> t.List[exp.Lock]: 3218 locks = [] 3219 while True: 3220 if self._match_text_seq("FOR", "UPDATE"): 3221 update = True 3222 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3223 "LOCK", "IN", "SHARE", "MODE" 3224 ): 3225 update = False 3226 else: 3227 break 3228 3229 expressions = None 3230 if self._match_text_seq("OF"): 3231 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3232 3233 wait: t.Optional[bool | exp.Expression] = None 3234 if self._match_text_seq("NOWAIT"): 3235 wait = True 3236 elif self._match_text_seq("WAIT"): 3237 wait = self._parse_primary() 3238 elif self._match_text_seq("SKIP", "LOCKED"): 3239 wait = False 3240 3241 locks.append( 3242 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3243 ) 3244 3245 return locks 3246 3247 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3248 if not self._match_set(self.SET_OPERATIONS): 3249 return this 3250 3251 token_type = self._prev.token_type 3252 3253 if token_type == TokenType.UNION: 3254 expression = exp.Union 3255 elif token_type == TokenType.EXCEPT: 3256 expression = exp.Except 3257 else: 3258 expression = exp.Intersect 3259 3260 return self.expression( 3261 expression, 3262 comments=self._prev.comments, 3263 this=this, 3264 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3265 by_name=self._match_text_seq("BY", "NAME"), 3266 expression=self._parse_set_operations(self._parse_select(nested=True)), 3267 ) 3268 3269 def _parse_expression(self) -> t.Optional[exp.Expression]: 3270 return self._parse_alias(self._parse_conjunction()) 3271 3272 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3273 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3274 3275 def _parse_equality(self) -> t.Optional[exp.Expression]: 3276 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3277 3278 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3279 return self._parse_tokens(self._parse_range, self.COMPARISON) 3280 3281 def _parse_range(self) -> t.Optional[exp.Expression]: 3282 this = self._parse_bitwise() 3283 negate = self._match(TokenType.NOT) 3284 3285 if self._match_set(self.RANGE_PARSERS): 3286 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3287 if not expression: 3288 return this 3289 3290 this = expression 3291 elif self._match(TokenType.ISNULL): 3292 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3293 3294 # Postgres supports ISNULL and NOTNULL for conditions. 3295 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3296 if self._match(TokenType.NOTNULL): 3297 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3298 this = self.expression(exp.Not, this=this) 3299 3300 if negate: 3301 this = self.expression(exp.Not, this=this) 3302 3303 if self._match(TokenType.IS): 3304 this = self._parse_is(this) 3305 3306 return this 3307 3308 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3309 index = self._index - 1 3310 negate = self._match(TokenType.NOT) 3311 3312 if self._match_text_seq("DISTINCT", "FROM"): 3313 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3314 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3315 3316 expression = self._parse_null() or self._parse_boolean() 3317 if not expression: 3318 self._retreat(index) 3319 return None 3320 3321 this = self.expression(exp.Is, this=this, expression=expression) 3322 return self.expression(exp.Not, this=this) if negate else this 3323 3324 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3325 unnest = self._parse_unnest(with_alias=False) 3326 if unnest: 3327 this = self.expression(exp.In, this=this, unnest=unnest) 3328 elif self._match(TokenType.L_PAREN): 3329 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3330 3331 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3332 this = self.expression(exp.In, this=this, query=expressions[0]) 3333 else: 3334 this = self.expression(exp.In, this=this, expressions=expressions) 3335 3336 self._match_r_paren(this) 3337 else: 3338 this = self.expression(exp.In, this=this, field=self._parse_field()) 3339 3340 return this 3341 3342 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3343 low = self._parse_bitwise() 3344 self._match(TokenType.AND) 3345 high = self._parse_bitwise() 3346 return self.expression(exp.Between, this=this, low=low, high=high) 3347 3348 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3349 if not self._match(TokenType.ESCAPE): 3350 return this 3351 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3352 3353 def _parse_interval(self) -> t.Optional[exp.Interval]: 3354 index = self._index 3355 3356 if not self._match(TokenType.INTERVAL): 3357 return None 3358 3359 if self._match(TokenType.STRING, advance=False): 3360 this = self._parse_primary() 3361 else: 3362 this = self._parse_term() 3363 3364 if not this: 3365 self._retreat(index) 3366 return None 3367 3368 unit = self._parse_function() or self._parse_var(any_token=True) 3369 3370 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3371 # each INTERVAL expression into this canonical form so it's easy to transpile 3372 if this and this.is_number: 3373 this = exp.Literal.string(this.name) 3374 elif this and this.is_string: 3375 parts = this.name.split() 3376 3377 if len(parts) == 2: 3378 if unit: 3379 # This is not actually a unit, it's something else (e.g. a "window side") 3380 unit = None 3381 self._retreat(self._index - 1) 3382 3383 this = exp.Literal.string(parts[0]) 3384 unit = self.expression(exp.Var, this=parts[1]) 3385 3386 return self.expression(exp.Interval, this=this, unit=unit) 3387 3388 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3389 this = self._parse_term() 3390 3391 while True: 3392 if self._match_set(self.BITWISE): 3393 this = self.expression( 3394 self.BITWISE[self._prev.token_type], 3395 this=this, 3396 expression=self._parse_term(), 3397 ) 3398 elif self.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3399 this = self.expression( 3400 exp.DPipe, 3401 this=this, 3402 expression=self._parse_term(), 3403 safe=not self.STRICT_STRING_CONCAT, 3404 ) 3405 elif self._match(TokenType.DQMARK): 3406 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3407 elif self._match_pair(TokenType.LT, TokenType.LT): 3408 this = self.expression( 3409 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3410 ) 3411 elif self._match_pair(TokenType.GT, TokenType.GT): 3412 this = self.expression( 3413 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3414 ) 3415 else: 3416 break 3417 3418 return this 3419 3420 def _parse_term(self) -> t.Optional[exp.Expression]: 3421 return self._parse_tokens(self._parse_factor, self.TERM) 3422 3423 def _parse_factor(self) -> t.Optional[exp.Expression]: 3424 if self.EXPONENT: 3425 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3426 else: 3427 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3428 if isinstance(factor, exp.Div): 3429 factor.args["typed"] = self.TYPED_DIVISION 3430 factor.args["safe"] = self.SAFE_DIVISION 3431 return factor 3432 3433 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3434 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3435 3436 def _parse_unary(self) -> t.Optional[exp.Expression]: 3437 if self._match_set(self.UNARY_PARSERS): 3438 return self.UNARY_PARSERS[self._prev.token_type](self) 3439 return self._parse_at_time_zone(self._parse_type()) 3440 3441 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3442 interval = parse_interval and self._parse_interval() 3443 if interval: 3444 return interval 3445 3446 index = self._index 3447 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3448 this = self._parse_column() 3449 3450 if data_type: 3451 if isinstance(this, exp.Literal): 3452 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3453 if parser: 3454 return parser(self, this, data_type) 3455 return self.expression(exp.Cast, this=this, to=data_type) 3456 if not data_type.expressions: 3457 self._retreat(index) 3458 return self._parse_column() 3459 return self._parse_column_ops(data_type) 3460 3461 return this and self._parse_column_ops(this) 3462 3463 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3464 this = self._parse_type() 3465 if not this: 3466 return None 3467 3468 return self.expression( 3469 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3470 ) 3471 3472 def _parse_types( 3473 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3474 ) -> t.Optional[exp.Expression]: 3475 index = self._index 3476 3477 prefix = self._match_text_seq("SYSUDTLIB", ".") 3478 3479 if not self._match_set(self.TYPE_TOKENS): 3480 identifier = allow_identifiers and self._parse_id_var( 3481 any_token=False, tokens=(TokenType.VAR,) 3482 ) 3483 3484 if identifier: 3485 tokens = self._tokenizer.tokenize(identifier.name) 3486 3487 if len(tokens) != 1: 3488 self.raise_error("Unexpected identifier", self._prev) 3489 3490 if tokens[0].token_type in self.TYPE_TOKENS: 3491 self._prev = tokens[0] 3492 elif self.SUPPORTS_USER_DEFINED_TYPES: 3493 type_name = identifier.name 3494 3495 while self._match(TokenType.DOT): 3496 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3497 3498 return exp.DataType.build(type_name, udt=True) 3499 else: 3500 return None 3501 else: 3502 return None 3503 3504 type_token = self._prev.token_type 3505 3506 if type_token == TokenType.PSEUDO_TYPE: 3507 return self.expression(exp.PseudoType, this=self._prev.text) 3508 3509 if type_token == TokenType.OBJECT_IDENTIFIER: 3510 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3511 3512 nested = type_token in self.NESTED_TYPE_TOKENS 3513 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3514 expressions = None 3515 maybe_func = False 3516 3517 if self._match(TokenType.L_PAREN): 3518 if is_struct: 3519 expressions = self._parse_csv(self._parse_struct_types) 3520 elif nested: 3521 expressions = self._parse_csv( 3522 lambda: self._parse_types( 3523 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3524 ) 3525 ) 3526 elif type_token in self.ENUM_TYPE_TOKENS: 3527 expressions = self._parse_csv(self._parse_equality) 3528 else: 3529 expressions = self._parse_csv(self._parse_type_size) 3530 3531 if not expressions or not self._match(TokenType.R_PAREN): 3532 self._retreat(index) 3533 return None 3534 3535 maybe_func = True 3536 3537 this: t.Optional[exp.Expression] = None 3538 values: t.Optional[t.List[exp.Expression]] = None 3539 3540 if nested and self._match(TokenType.LT): 3541 if is_struct: 3542 expressions = self._parse_csv(self._parse_struct_types) 3543 else: 3544 expressions = self._parse_csv( 3545 lambda: self._parse_types( 3546 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3547 ) 3548 ) 3549 3550 if not self._match(TokenType.GT): 3551 self.raise_error("Expecting >") 3552 3553 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3554 values = self._parse_csv(self._parse_conjunction) 3555 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3556 3557 if type_token in self.TIMESTAMPS: 3558 if self._match_text_seq("WITH", "TIME", "ZONE"): 3559 maybe_func = False 3560 tz_type = ( 3561 exp.DataType.Type.TIMETZ 3562 if type_token in self.TIMES 3563 else exp.DataType.Type.TIMESTAMPTZ 3564 ) 3565 this = exp.DataType(this=tz_type, expressions=expressions) 3566 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3567 maybe_func = False 3568 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3569 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3570 maybe_func = False 3571 elif type_token == TokenType.INTERVAL: 3572 unit = self._parse_var() 3573 3574 if self._match_text_seq("TO"): 3575 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3576 else: 3577 span = None 3578 3579 if span or not unit: 3580 this = self.expression( 3581 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3582 ) 3583 else: 3584 this = self.expression(exp.Interval, unit=unit) 3585 3586 if maybe_func and check_func: 3587 index2 = self._index 3588 peek = self._parse_string() 3589 3590 if not peek: 3591 self._retreat(index) 3592 return None 3593 3594 self._retreat(index2) 3595 3596 if not this: 3597 if self._match_text_seq("UNSIGNED"): 3598 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3599 if not unsigned_type_token: 3600 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3601 3602 type_token = unsigned_type_token or type_token 3603 3604 this = exp.DataType( 3605 this=exp.DataType.Type[type_token.value], 3606 expressions=expressions, 3607 nested=nested, 3608 values=values, 3609 prefix=prefix, 3610 ) 3611 3612 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3613 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3614 3615 return this 3616 3617 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3618 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3619 self._match(TokenType.COLON) 3620 return self._parse_column_def(this) 3621 3622 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3623 if not self._match_text_seq("AT", "TIME", "ZONE"): 3624 return this 3625 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3626 3627 def _parse_column(self) -> t.Optional[exp.Expression]: 3628 this = self._parse_field() 3629 if isinstance(this, exp.Identifier): 3630 this = self.expression(exp.Column, this=this) 3631 elif not this: 3632 return self._parse_bracket(this) 3633 return self._parse_column_ops(this) 3634 3635 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3636 this = self._parse_bracket(this) 3637 3638 while self._match_set(self.COLUMN_OPERATORS): 3639 op_token = self._prev.token_type 3640 op = self.COLUMN_OPERATORS.get(op_token) 3641 3642 if op_token == TokenType.DCOLON: 3643 field = self._parse_types() 3644 if not field: 3645 self.raise_error("Expected type") 3646 elif op and self._curr: 3647 self._advance() 3648 value = self._prev.text 3649 field = ( 3650 exp.Literal.number(value) 3651 if self._prev.token_type == TokenType.NUMBER 3652 else exp.Literal.string(value) 3653 ) 3654 else: 3655 field = self._parse_field(anonymous_func=True, any_token=True) 3656 3657 if isinstance(field, exp.Func): 3658 # bigquery allows function calls like x.y.count(...) 3659 # SAFE.SUBSTR(...) 3660 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3661 this = self._replace_columns_with_dots(this) 3662 3663 if op: 3664 this = op(self, this, field) 3665 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3666 this = self.expression( 3667 exp.Column, 3668 this=field, 3669 table=this.this, 3670 db=this.args.get("table"), 3671 catalog=this.args.get("db"), 3672 ) 3673 else: 3674 this = self.expression(exp.Dot, this=this, expression=field) 3675 this = self._parse_bracket(this) 3676 return this 3677 3678 def _parse_primary(self) -> t.Optional[exp.Expression]: 3679 if self._match_set(self.PRIMARY_PARSERS): 3680 token_type = self._prev.token_type 3681 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3682 3683 if token_type == TokenType.STRING: 3684 expressions = [primary] 3685 while self._match(TokenType.STRING): 3686 expressions.append(exp.Literal.string(self._prev.text)) 3687 3688 if len(expressions) > 1: 3689 return self.expression(exp.Concat, expressions=expressions) 3690 3691 return primary 3692 3693 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3694 return exp.Literal.number(f"0.{self._prev.text}") 3695 3696 if self._match(TokenType.L_PAREN): 3697 comments = self._prev_comments 3698 query = self._parse_select() 3699 3700 if query: 3701 expressions = [query] 3702 else: 3703 expressions = self._parse_expressions() 3704 3705 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3706 3707 if isinstance(this, exp.Subqueryable): 3708 this = self._parse_set_operations( 3709 self._parse_subquery(this=this, parse_alias=False) 3710 ) 3711 elif len(expressions) > 1: 3712 this = self.expression(exp.Tuple, expressions=expressions) 3713 else: 3714 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3715 3716 if this: 3717 this.add_comments(comments) 3718 3719 self._match_r_paren(expression=this) 3720 return this 3721 3722 return None 3723 3724 def _parse_field( 3725 self, 3726 any_token: bool = False, 3727 tokens: t.Optional[t.Collection[TokenType]] = None, 3728 anonymous_func: bool = False, 3729 ) -> t.Optional[exp.Expression]: 3730 return ( 3731 self._parse_primary() 3732 or self._parse_function(anonymous=anonymous_func) 3733 or self._parse_id_var(any_token=any_token, tokens=tokens) 3734 ) 3735 3736 def _parse_function( 3737 self, 3738 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3739 anonymous: bool = False, 3740 optional_parens: bool = True, 3741 ) -> t.Optional[exp.Expression]: 3742 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3743 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3744 fn_syntax = False 3745 if ( 3746 self._match(TokenType.L_BRACE, advance=False) 3747 and self._next 3748 and self._next.text.upper() == "FN" 3749 ): 3750 self._advance(2) 3751 fn_syntax = True 3752 3753 func = self._parse_function_call( 3754 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3755 ) 3756 3757 if fn_syntax: 3758 self._match(TokenType.R_BRACE) 3759 3760 return func 3761 3762 def _parse_function_call( 3763 self, 3764 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3765 anonymous: bool = False, 3766 optional_parens: bool = True, 3767 ) -> t.Optional[exp.Expression]: 3768 if not self._curr: 3769 return None 3770 3771 comments = self._curr.comments 3772 token_type = self._curr.token_type 3773 this = self._curr.text 3774 upper = this.upper() 3775 3776 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3777 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3778 self._advance() 3779 return parser(self) 3780 3781 if not self._next or self._next.token_type != TokenType.L_PAREN: 3782 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3783 self._advance() 3784 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3785 3786 return None 3787 3788 if token_type not in self.FUNC_TOKENS: 3789 return None 3790 3791 self._advance(2) 3792 3793 parser = self.FUNCTION_PARSERS.get(upper) 3794 if parser and not anonymous: 3795 this = parser(self) 3796 else: 3797 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3798 3799 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3800 this = self.expression(subquery_predicate, this=self._parse_select()) 3801 self._match_r_paren() 3802 return this 3803 3804 if functions is None: 3805 functions = self.FUNCTIONS 3806 3807 function = functions.get(upper) 3808 3809 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3810 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3811 3812 if function and not anonymous: 3813 func = self.validate_expression(function(args), args) 3814 if not self.NORMALIZE_FUNCTIONS: 3815 func.meta["name"] = this 3816 this = func 3817 else: 3818 this = self.expression(exp.Anonymous, this=this, expressions=args) 3819 3820 if isinstance(this, exp.Expression): 3821 this.add_comments(comments) 3822 3823 self._match_r_paren(this) 3824 return self._parse_window(this) 3825 3826 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3827 return self._parse_column_def(self._parse_id_var()) 3828 3829 def _parse_user_defined_function( 3830 self, kind: t.Optional[TokenType] = None 3831 ) -> t.Optional[exp.Expression]: 3832 this = self._parse_id_var() 3833 3834 while self._match(TokenType.DOT): 3835 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3836 3837 if not self._match(TokenType.L_PAREN): 3838 return this 3839 3840 expressions = self._parse_csv(self._parse_function_parameter) 3841 self._match_r_paren() 3842 return self.expression( 3843 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3844 ) 3845 3846 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3847 literal = self._parse_primary() 3848 if literal: 3849 return self.expression(exp.Introducer, this=token.text, expression=literal) 3850 3851 return self.expression(exp.Identifier, this=token.text) 3852 3853 def _parse_session_parameter(self) -> exp.SessionParameter: 3854 kind = None 3855 this = self._parse_id_var() or self._parse_primary() 3856 3857 if this and self._match(TokenType.DOT): 3858 kind = this.name 3859 this = self._parse_var() or self._parse_primary() 3860 3861 return self.expression(exp.SessionParameter, this=this, kind=kind) 3862 3863 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3864 index = self._index 3865 3866 if self._match(TokenType.L_PAREN): 3867 expressions = t.cast( 3868 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3869 ) 3870 3871 if not self._match(TokenType.R_PAREN): 3872 self._retreat(index) 3873 else: 3874 expressions = [self._parse_id_var()] 3875 3876 if self._match_set(self.LAMBDAS): 3877 return self.LAMBDAS[self._prev.token_type](self, expressions) 3878 3879 self._retreat(index) 3880 3881 this: t.Optional[exp.Expression] 3882 3883 if self._match(TokenType.DISTINCT): 3884 this = self.expression( 3885 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3886 ) 3887 else: 3888 this = self._parse_select_or_expression(alias=alias) 3889 3890 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3891 3892 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3893 index = self._index 3894 3895 if not self.errors: 3896 try: 3897 if self._parse_select(nested=True): 3898 return this 3899 except ParseError: 3900 pass 3901 finally: 3902 self.errors.clear() 3903 self._retreat(index) 3904 3905 if not self._match(TokenType.L_PAREN): 3906 return this 3907 3908 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3909 3910 self._match_r_paren() 3911 return self.expression(exp.Schema, this=this, expressions=args) 3912 3913 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3914 return self._parse_column_def(self._parse_field(any_token=True)) 3915 3916 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3917 # column defs are not really columns, they're identifiers 3918 if isinstance(this, exp.Column): 3919 this = this.this 3920 3921 kind = self._parse_types(schema=True) 3922 3923 if self._match_text_seq("FOR", "ORDINALITY"): 3924 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3925 3926 constraints: t.List[exp.Expression] = [] 3927 3928 if not kind and self._match(TokenType.ALIAS): 3929 constraints.append( 3930 self.expression( 3931 exp.ComputedColumnConstraint, 3932 this=self._parse_conjunction(), 3933 persisted=self._match_text_seq("PERSISTED"), 3934 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3935 ) 3936 ) 3937 3938 while True: 3939 constraint = self._parse_column_constraint() 3940 if not constraint: 3941 break 3942 constraints.append(constraint) 3943 3944 if not kind and not constraints: 3945 return this 3946 3947 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3948 3949 def _parse_auto_increment( 3950 self, 3951 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3952 start = None 3953 increment = None 3954 3955 if self._match(TokenType.L_PAREN, advance=False): 3956 args = self._parse_wrapped_csv(self._parse_bitwise) 3957 start = seq_get(args, 0) 3958 increment = seq_get(args, 1) 3959 elif self._match_text_seq("START"): 3960 start = self._parse_bitwise() 3961 self._match_text_seq("INCREMENT") 3962 increment = self._parse_bitwise() 3963 3964 if start and increment: 3965 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3966 3967 return exp.AutoIncrementColumnConstraint() 3968 3969 def _parse_compress(self) -> exp.CompressColumnConstraint: 3970 if self._match(TokenType.L_PAREN, advance=False): 3971 return self.expression( 3972 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3973 ) 3974 3975 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3976 3977 def _parse_generated_as_identity( 3978 self, 3979 ) -> ( 3980 exp.GeneratedAsIdentityColumnConstraint 3981 | exp.ComputedColumnConstraint 3982 | exp.GeneratedAsRowColumnConstraint 3983 ): 3984 if self._match_text_seq("BY", "DEFAULT"): 3985 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3986 this = self.expression( 3987 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3988 ) 3989 else: 3990 self._match_text_seq("ALWAYS") 3991 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3992 3993 self._match(TokenType.ALIAS) 3994 3995 if self._match_text_seq("ROW"): 3996 start = self._match_text_seq("START") 3997 if not start: 3998 self._match(TokenType.END) 3999 hidden = self._match_text_seq("HIDDEN") 4000 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4001 4002 identity = self._match_text_seq("IDENTITY") 4003 4004 if self._match(TokenType.L_PAREN): 4005 if self._match(TokenType.START_WITH): 4006 this.set("start", self._parse_bitwise()) 4007 if self._match_text_seq("INCREMENT", "BY"): 4008 this.set("increment", self._parse_bitwise()) 4009 if self._match_text_seq("MINVALUE"): 4010 this.set("minvalue", self._parse_bitwise()) 4011 if self._match_text_seq("MAXVALUE"): 4012 this.set("maxvalue", self._parse_bitwise()) 4013 4014 if self._match_text_seq("CYCLE"): 4015 this.set("cycle", True) 4016 elif self._match_text_seq("NO", "CYCLE"): 4017 this.set("cycle", False) 4018 4019 if not identity: 4020 this.set("expression", self._parse_bitwise()) 4021 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4022 args = self._parse_csv(self._parse_bitwise) 4023 this.set("start", seq_get(args, 0)) 4024 this.set("increment", seq_get(args, 1)) 4025 4026 self._match_r_paren() 4027 4028 return this 4029 4030 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4031 self._match_text_seq("LENGTH") 4032 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4033 4034 def _parse_not_constraint( 4035 self, 4036 ) -> t.Optional[exp.Expression]: 4037 if self._match_text_seq("NULL"): 4038 return self.expression(exp.NotNullColumnConstraint) 4039 if self._match_text_seq("CASESPECIFIC"): 4040 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4041 if self._match_text_seq("FOR", "REPLICATION"): 4042 return self.expression(exp.NotForReplicationColumnConstraint) 4043 return None 4044 4045 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4046 if self._match(TokenType.CONSTRAINT): 4047 this = self._parse_id_var() 4048 else: 4049 this = None 4050 4051 if self._match_texts(self.CONSTRAINT_PARSERS): 4052 return self.expression( 4053 exp.ColumnConstraint, 4054 this=this, 4055 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4056 ) 4057 4058 return this 4059 4060 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4061 if not self._match(TokenType.CONSTRAINT): 4062 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4063 4064 this = self._parse_id_var() 4065 expressions = [] 4066 4067 while True: 4068 constraint = self._parse_unnamed_constraint() or self._parse_function() 4069 if not constraint: 4070 break 4071 expressions.append(constraint) 4072 4073 return self.expression(exp.Constraint, this=this, expressions=expressions) 4074 4075 def _parse_unnamed_constraint( 4076 self, constraints: t.Optional[t.Collection[str]] = None 4077 ) -> t.Optional[exp.Expression]: 4078 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4079 constraints or self.CONSTRAINT_PARSERS 4080 ): 4081 return None 4082 4083 constraint = self._prev.text.upper() 4084 if constraint not in self.CONSTRAINT_PARSERS: 4085 self.raise_error(f"No parser found for schema constraint {constraint}.") 4086 4087 return self.CONSTRAINT_PARSERS[constraint](self) 4088 4089 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4090 self._match_text_seq("KEY") 4091 return self.expression( 4092 exp.UniqueColumnConstraint, 4093 this=self._parse_schema(self._parse_id_var(any_token=False)), 4094 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4095 ) 4096 4097 def _parse_key_constraint_options(self) -> t.List[str]: 4098 options = [] 4099 while True: 4100 if not self._curr: 4101 break 4102 4103 if self._match(TokenType.ON): 4104 action = None 4105 on = self._advance_any() and self._prev.text 4106 4107 if self._match_text_seq("NO", "ACTION"): 4108 action = "NO ACTION" 4109 elif self._match_text_seq("CASCADE"): 4110 action = "CASCADE" 4111 elif self._match_text_seq("RESTRICT"): 4112 action = "RESTRICT" 4113 elif self._match_pair(TokenType.SET, TokenType.NULL): 4114 action = "SET NULL" 4115 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4116 action = "SET DEFAULT" 4117 else: 4118 self.raise_error("Invalid key constraint") 4119 4120 options.append(f"ON {on} {action}") 4121 elif self._match_text_seq("NOT", "ENFORCED"): 4122 options.append("NOT ENFORCED") 4123 elif self._match_text_seq("DEFERRABLE"): 4124 options.append("DEFERRABLE") 4125 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4126 options.append("INITIALLY DEFERRED") 4127 elif self._match_text_seq("NORELY"): 4128 options.append("NORELY") 4129 elif self._match_text_seq("MATCH", "FULL"): 4130 options.append("MATCH FULL") 4131 else: 4132 break 4133 4134 return options 4135 4136 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4137 if match and not self._match(TokenType.REFERENCES): 4138 return None 4139 4140 expressions = None 4141 this = self._parse_table(schema=True) 4142 options = self._parse_key_constraint_options() 4143 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4144 4145 def _parse_foreign_key(self) -> exp.ForeignKey: 4146 expressions = self._parse_wrapped_id_vars() 4147 reference = self._parse_references() 4148 options = {} 4149 4150 while self._match(TokenType.ON): 4151 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4152 self.raise_error("Expected DELETE or UPDATE") 4153 4154 kind = self._prev.text.lower() 4155 4156 if self._match_text_seq("NO", "ACTION"): 4157 action = "NO ACTION" 4158 elif self._match(TokenType.SET): 4159 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4160 action = "SET " + self._prev.text.upper() 4161 else: 4162 self._advance() 4163 action = self._prev.text.upper() 4164 4165 options[kind] = action 4166 4167 return self.expression( 4168 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4169 ) 4170 4171 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4172 return self._parse_field() 4173 4174 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4175 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4176 4177 id_vars = self._parse_wrapped_id_vars() 4178 return self.expression( 4179 exp.PeriodForSystemTimeConstraint, 4180 this=seq_get(id_vars, 0), 4181 expression=seq_get(id_vars, 1), 4182 ) 4183 4184 def _parse_primary_key( 4185 self, wrapped_optional: bool = False, in_props: bool = False 4186 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4187 desc = ( 4188 self._match_set((TokenType.ASC, TokenType.DESC)) 4189 and self._prev.token_type == TokenType.DESC 4190 ) 4191 4192 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4193 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4194 4195 expressions = self._parse_wrapped_csv( 4196 self._parse_primary_key_part, optional=wrapped_optional 4197 ) 4198 options = self._parse_key_constraint_options() 4199 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4200 4201 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4202 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4203 return this 4204 4205 bracket_kind = self._prev.token_type 4206 4207 if self._match(TokenType.COLON): 4208 expressions: t.List[exp.Expression] = [ 4209 self.expression(exp.Slice, expression=self._parse_conjunction()) 4210 ] 4211 else: 4212 expressions = self._parse_csv( 4213 lambda: self._parse_slice( 4214 self._parse_alias(self._parse_conjunction(), explicit=True) 4215 ) 4216 ) 4217 4218 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4219 self.raise_error("Expected ]") 4220 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4221 self.raise_error("Expected }") 4222 4223 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4224 if bracket_kind == TokenType.L_BRACE: 4225 this = self.expression(exp.Struct, expressions=expressions) 4226 elif not this or this.name.upper() == "ARRAY": 4227 this = self.expression(exp.Array, expressions=expressions) 4228 else: 4229 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4230 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4231 4232 self._add_comments(this) 4233 return self._parse_bracket(this) 4234 4235 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4236 if self._match(TokenType.COLON): 4237 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4238 return this 4239 4240 def _parse_case(self) -> t.Optional[exp.Expression]: 4241 ifs = [] 4242 default = None 4243 4244 comments = self._prev_comments 4245 expression = self._parse_conjunction() 4246 4247 while self._match(TokenType.WHEN): 4248 this = self._parse_conjunction() 4249 self._match(TokenType.THEN) 4250 then = self._parse_conjunction() 4251 ifs.append(self.expression(exp.If, this=this, true=then)) 4252 4253 if self._match(TokenType.ELSE): 4254 default = self._parse_conjunction() 4255 4256 if not self._match(TokenType.END): 4257 self.raise_error("Expected END after CASE", self._prev) 4258 4259 return self._parse_window( 4260 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4261 ) 4262 4263 def _parse_if(self) -> t.Optional[exp.Expression]: 4264 if self._match(TokenType.L_PAREN): 4265 args = self._parse_csv(self._parse_conjunction) 4266 this = self.validate_expression(exp.If.from_arg_list(args), args) 4267 self._match_r_paren() 4268 else: 4269 index = self._index - 1 4270 condition = self._parse_conjunction() 4271 4272 if not condition: 4273 self._retreat(index) 4274 return None 4275 4276 self._match(TokenType.THEN) 4277 true = self._parse_conjunction() 4278 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4279 self._match(TokenType.END) 4280 this = self.expression(exp.If, this=condition, true=true, false=false) 4281 4282 return self._parse_window(this) 4283 4284 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4285 if not self._match_text_seq("VALUE", "FOR"): 4286 self._retreat(self._index - 1) 4287 return None 4288 4289 return self.expression( 4290 exp.NextValueFor, 4291 this=self._parse_column(), 4292 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4293 ) 4294 4295 def _parse_extract(self) -> exp.Extract: 4296 this = self._parse_function() or self._parse_var() or self._parse_type() 4297 4298 if self._match(TokenType.FROM): 4299 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4300 4301 if not self._match(TokenType.COMMA): 4302 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4303 4304 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4305 4306 def _parse_any_value(self) -> exp.AnyValue: 4307 this = self._parse_lambda() 4308 is_max = None 4309 having = None 4310 4311 if self._match(TokenType.HAVING): 4312 self._match_texts(("MAX", "MIN")) 4313 is_max = self._prev.text == "MAX" 4314 having = self._parse_column() 4315 4316 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4317 4318 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4319 this = self._parse_conjunction() 4320 4321 if not self._match(TokenType.ALIAS): 4322 if self._match(TokenType.COMMA): 4323 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4324 4325 self.raise_error("Expected AS after CAST") 4326 4327 fmt = None 4328 to = self._parse_types() 4329 4330 if self._match(TokenType.FORMAT): 4331 fmt_string = self._parse_string() 4332 fmt = self._parse_at_time_zone(fmt_string) 4333 4334 if not to: 4335 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4336 if to.this in exp.DataType.TEMPORAL_TYPES: 4337 this = self.expression( 4338 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4339 this=this, 4340 format=exp.Literal.string( 4341 format_time( 4342 fmt_string.this if fmt_string else "", 4343 self.FORMAT_MAPPING or self.TIME_MAPPING, 4344 self.FORMAT_TRIE or self.TIME_TRIE, 4345 ) 4346 ), 4347 ) 4348 4349 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4350 this.set("zone", fmt.args["zone"]) 4351 return this 4352 elif not to: 4353 self.raise_error("Expected TYPE after CAST") 4354 elif isinstance(to, exp.Identifier): 4355 to = exp.DataType.build(to.name, udt=True) 4356 elif to.this == exp.DataType.Type.CHAR: 4357 if self._match(TokenType.CHARACTER_SET): 4358 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4359 4360 return self.expression( 4361 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4362 ) 4363 4364 def _parse_concat(self) -> t.Optional[exp.Expression]: 4365 args = self._parse_csv(self._parse_conjunction) 4366 if self.CONCAT_NULL_OUTPUTS_STRING: 4367 args = self._ensure_string_if_null(args) 4368 4369 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4370 # we find such a call we replace it with its argument. 4371 if len(args) == 1: 4372 return args[0] 4373 4374 return self.expression(exp.Concat, expressions=args, safe=not self.STRICT_STRING_CONCAT) 4375 4376 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4377 args = self._parse_csv(self._parse_conjunction) 4378 if len(args) < 2: 4379 return self.expression(exp.ConcatWs, expressions=args) 4380 delim, *values = args 4381 if self.CONCAT_NULL_OUTPUTS_STRING: 4382 values = self._ensure_string_if_null(values) 4383 4384 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4385 4386 def _parse_string_agg(self) -> exp.Expression: 4387 if self._match(TokenType.DISTINCT): 4388 args: t.List[t.Optional[exp.Expression]] = [ 4389 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4390 ] 4391 if self._match(TokenType.COMMA): 4392 args.extend(self._parse_csv(self._parse_conjunction)) 4393 else: 4394 args = self._parse_csv(self._parse_conjunction) # type: ignore 4395 4396 index = self._index 4397 if not self._match(TokenType.R_PAREN) and args: 4398 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4399 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4400 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4401 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4402 4403 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4404 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4405 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4406 if not self._match_text_seq("WITHIN", "GROUP"): 4407 self._retreat(index) 4408 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4409 4410 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4411 order = self._parse_order(this=seq_get(args, 0)) 4412 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4413 4414 def _parse_convert( 4415 self, strict: bool, safe: t.Optional[bool] = None 4416 ) -> t.Optional[exp.Expression]: 4417 this = self._parse_bitwise() 4418 4419 if self._match(TokenType.USING): 4420 to: t.Optional[exp.Expression] = self.expression( 4421 exp.CharacterSet, this=self._parse_var() 4422 ) 4423 elif self._match(TokenType.COMMA): 4424 to = self._parse_types() 4425 else: 4426 to = None 4427 4428 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4429 4430 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4431 """ 4432 There are generally two variants of the DECODE function: 4433 4434 - DECODE(bin, charset) 4435 - DECODE(expression, search, result [, search, result] ... [, default]) 4436 4437 The second variant will always be parsed into a CASE expression. Note that NULL 4438 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4439 instead of relying on pattern matching. 4440 """ 4441 args = self._parse_csv(self._parse_conjunction) 4442 4443 if len(args) < 3: 4444 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4445 4446 expression, *expressions = args 4447 if not expression: 4448 return None 4449 4450 ifs = [] 4451 for search, result in zip(expressions[::2], expressions[1::2]): 4452 if not search or not result: 4453 return None 4454 4455 if isinstance(search, exp.Literal): 4456 ifs.append( 4457 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4458 ) 4459 elif isinstance(search, exp.Null): 4460 ifs.append( 4461 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4462 ) 4463 else: 4464 cond = exp.or_( 4465 exp.EQ(this=expression.copy(), expression=search), 4466 exp.and_( 4467 exp.Is(this=expression.copy(), expression=exp.Null()), 4468 exp.Is(this=search.copy(), expression=exp.Null()), 4469 copy=False, 4470 ), 4471 copy=False, 4472 ) 4473 ifs.append(exp.If(this=cond, true=result)) 4474 4475 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4476 4477 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4478 self._match_text_seq("KEY") 4479 key = self._parse_column() 4480 self._match_set((TokenType.COLON, TokenType.COMMA)) 4481 self._match_text_seq("VALUE") 4482 value = self._parse_bitwise() 4483 4484 if not key and not value: 4485 return None 4486 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4487 4488 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4489 if not this or not self._match_text_seq("FORMAT", "JSON"): 4490 return this 4491 4492 return self.expression(exp.FormatJson, this=this) 4493 4494 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4495 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4496 for value in values: 4497 if self._match_text_seq(value, "ON", on): 4498 return f"{value} ON {on}" 4499 4500 return None 4501 4502 def _parse_json_object(self) -> exp.JSONObject: 4503 star = self._parse_star() 4504 expressions = ( 4505 [star] 4506 if star 4507 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4508 ) 4509 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4510 4511 unique_keys = None 4512 if self._match_text_seq("WITH", "UNIQUE"): 4513 unique_keys = True 4514 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4515 unique_keys = False 4516 4517 self._match_text_seq("KEYS") 4518 4519 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4520 self._parse_type() 4521 ) 4522 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4523 4524 return self.expression( 4525 exp.JSONObject, 4526 expressions=expressions, 4527 null_handling=null_handling, 4528 unique_keys=unique_keys, 4529 return_type=return_type, 4530 encoding=encoding, 4531 ) 4532 4533 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4534 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4535 if not self._match_text_seq("NESTED"): 4536 this = self._parse_id_var() 4537 kind = self._parse_types(allow_identifiers=False) 4538 nested = None 4539 else: 4540 this = None 4541 kind = None 4542 nested = True 4543 4544 path = self._match_text_seq("PATH") and self._parse_string() 4545 nested_schema = nested and self._parse_json_schema() 4546 4547 return self.expression( 4548 exp.JSONColumnDef, 4549 this=this, 4550 kind=kind, 4551 path=path, 4552 nested_schema=nested_schema, 4553 ) 4554 4555 def _parse_json_schema(self) -> exp.JSONSchema: 4556 self._match_text_seq("COLUMNS") 4557 return self.expression( 4558 exp.JSONSchema, 4559 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4560 ) 4561 4562 def _parse_json_table(self) -> exp.JSONTable: 4563 this = self._parse_format_json(self._parse_bitwise()) 4564 path = self._match(TokenType.COMMA) and self._parse_string() 4565 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4566 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4567 schema = self._parse_json_schema() 4568 4569 return exp.JSONTable( 4570 this=this, 4571 schema=schema, 4572 path=path, 4573 error_handling=error_handling, 4574 empty_handling=empty_handling, 4575 ) 4576 4577 def _parse_logarithm(self) -> exp.Func: 4578 # Default argument order is base, expression 4579 args = self._parse_csv(self._parse_range) 4580 4581 if len(args) > 1: 4582 if not self.LOG_BASE_FIRST: 4583 args.reverse() 4584 return exp.Log.from_arg_list(args) 4585 4586 return self.expression( 4587 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4588 ) 4589 4590 def _parse_match_against(self) -> exp.MatchAgainst: 4591 expressions = self._parse_csv(self._parse_column) 4592 4593 self._match_text_seq(")", "AGAINST", "(") 4594 4595 this = self._parse_string() 4596 4597 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4598 modifier = "IN NATURAL LANGUAGE MODE" 4599 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4600 modifier = f"{modifier} WITH QUERY EXPANSION" 4601 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4602 modifier = "IN BOOLEAN MODE" 4603 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4604 modifier = "WITH QUERY EXPANSION" 4605 else: 4606 modifier = None 4607 4608 return self.expression( 4609 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4610 ) 4611 4612 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4613 def _parse_open_json(self) -> exp.OpenJSON: 4614 this = self._parse_bitwise() 4615 path = self._match(TokenType.COMMA) and self._parse_string() 4616 4617 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4618 this = self._parse_field(any_token=True) 4619 kind = self._parse_types() 4620 path = self._parse_string() 4621 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4622 4623 return self.expression( 4624 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4625 ) 4626 4627 expressions = None 4628 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4629 self._match_l_paren() 4630 expressions = self._parse_csv(_parse_open_json_column_def) 4631 4632 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4633 4634 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4635 args = self._parse_csv(self._parse_bitwise) 4636 4637 if self._match(TokenType.IN): 4638 return self.expression( 4639 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4640 ) 4641 4642 if haystack_first: 4643 haystack = seq_get(args, 0) 4644 needle = seq_get(args, 1) 4645 else: 4646 needle = seq_get(args, 0) 4647 haystack = seq_get(args, 1) 4648 4649 return self.expression( 4650 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4651 ) 4652 4653 def _parse_predict(self) -> exp.Predict: 4654 self._match_text_seq("MODEL") 4655 this = self._parse_table() 4656 4657 self._match(TokenType.COMMA) 4658 self._match_text_seq("TABLE") 4659 4660 return self.expression( 4661 exp.Predict, 4662 this=this, 4663 expression=self._parse_table(), 4664 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4665 ) 4666 4667 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4668 args = self._parse_csv(self._parse_table) 4669 return exp.JoinHint(this=func_name.upper(), expressions=args) 4670 4671 def _parse_substring(self) -> exp.Substring: 4672 # Postgres supports the form: substring(string [from int] [for int]) 4673 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4674 4675 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4676 4677 if self._match(TokenType.FROM): 4678 args.append(self._parse_bitwise()) 4679 if self._match(TokenType.FOR): 4680 args.append(self._parse_bitwise()) 4681 4682 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4683 4684 def _parse_trim(self) -> exp.Trim: 4685 # https://www.w3resource.com/sql/character-functions/trim.php 4686 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4687 4688 position = None 4689 collation = None 4690 expression = None 4691 4692 if self._match_texts(self.TRIM_TYPES): 4693 position = self._prev.text.upper() 4694 4695 this = self._parse_bitwise() 4696 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4697 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4698 expression = self._parse_bitwise() 4699 4700 if invert_order: 4701 this, expression = expression, this 4702 4703 if self._match(TokenType.COLLATE): 4704 collation = self._parse_bitwise() 4705 4706 return self.expression( 4707 exp.Trim, this=this, position=position, expression=expression, collation=collation 4708 ) 4709 4710 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4711 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4712 4713 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4714 return self._parse_window(self._parse_id_var(), alias=True) 4715 4716 def _parse_respect_or_ignore_nulls( 4717 self, this: t.Optional[exp.Expression] 4718 ) -> t.Optional[exp.Expression]: 4719 if self._match_text_seq("IGNORE", "NULLS"): 4720 return self.expression(exp.IgnoreNulls, this=this) 4721 if self._match_text_seq("RESPECT", "NULLS"): 4722 return self.expression(exp.RespectNulls, this=this) 4723 return this 4724 4725 def _parse_window( 4726 self, this: t.Optional[exp.Expression], alias: bool = False 4727 ) -> t.Optional[exp.Expression]: 4728 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4729 self._match(TokenType.WHERE) 4730 this = self.expression( 4731 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4732 ) 4733 self._match_r_paren() 4734 4735 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4736 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4737 if self._match_text_seq("WITHIN", "GROUP"): 4738 order = self._parse_wrapped(self._parse_order) 4739 this = self.expression(exp.WithinGroup, this=this, expression=order) 4740 4741 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4742 # Some dialects choose to implement and some do not. 4743 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4744 4745 # There is some code above in _parse_lambda that handles 4746 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4747 4748 # The below changes handle 4749 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4750 4751 # Oracle allows both formats 4752 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4753 # and Snowflake chose to do the same for familiarity 4754 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4755 this = self._parse_respect_or_ignore_nulls(this) 4756 4757 # bigquery select from window x AS (partition by ...) 4758 if alias: 4759 over = None 4760 self._match(TokenType.ALIAS) 4761 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4762 return this 4763 else: 4764 over = self._prev.text.upper() 4765 4766 if not self._match(TokenType.L_PAREN): 4767 return self.expression( 4768 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4769 ) 4770 4771 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4772 4773 first = self._match(TokenType.FIRST) 4774 if self._match_text_seq("LAST"): 4775 first = False 4776 4777 partition, order = self._parse_partition_and_order() 4778 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4779 4780 if kind: 4781 self._match(TokenType.BETWEEN) 4782 start = self._parse_window_spec() 4783 self._match(TokenType.AND) 4784 end = self._parse_window_spec() 4785 4786 spec = self.expression( 4787 exp.WindowSpec, 4788 kind=kind, 4789 start=start["value"], 4790 start_side=start["side"], 4791 end=end["value"], 4792 end_side=end["side"], 4793 ) 4794 else: 4795 spec = None 4796 4797 self._match_r_paren() 4798 4799 window = self.expression( 4800 exp.Window, 4801 this=this, 4802 partition_by=partition, 4803 order=order, 4804 spec=spec, 4805 alias=window_alias, 4806 over=over, 4807 first=first, 4808 ) 4809 4810 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4811 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4812 return self._parse_window(window, alias=alias) 4813 4814 return window 4815 4816 def _parse_partition_and_order( 4817 self, 4818 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4819 return self._parse_partition_by(), self._parse_order() 4820 4821 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4822 self._match(TokenType.BETWEEN) 4823 4824 return { 4825 "value": ( 4826 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4827 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4828 or self._parse_bitwise() 4829 ), 4830 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4831 } 4832 4833 def _parse_alias( 4834 self, this: t.Optional[exp.Expression], explicit: bool = False 4835 ) -> t.Optional[exp.Expression]: 4836 any_token = self._match(TokenType.ALIAS) 4837 comments = self._prev_comments 4838 4839 if explicit and not any_token: 4840 return this 4841 4842 if self._match(TokenType.L_PAREN): 4843 aliases = self.expression( 4844 exp.Aliases, 4845 comments=comments, 4846 this=this, 4847 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4848 ) 4849 self._match_r_paren(aliases) 4850 return aliases 4851 4852 alias = self._parse_id_var(any_token) 4853 4854 if alias: 4855 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4856 4857 return this 4858 4859 def _parse_id_var( 4860 self, 4861 any_token: bool = True, 4862 tokens: t.Optional[t.Collection[TokenType]] = None, 4863 ) -> t.Optional[exp.Expression]: 4864 identifier = self._parse_identifier() 4865 4866 if identifier: 4867 return identifier 4868 4869 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4870 quoted = self._prev.token_type == TokenType.STRING 4871 return exp.Identifier(this=self._prev.text, quoted=quoted) 4872 4873 return None 4874 4875 def _parse_string(self) -> t.Optional[exp.Expression]: 4876 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4877 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4878 return self._parse_placeholder() 4879 4880 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4881 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4882 4883 def _parse_number(self) -> t.Optional[exp.Expression]: 4884 if self._match(TokenType.NUMBER): 4885 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4886 return self._parse_placeholder() 4887 4888 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4889 if self._match(TokenType.IDENTIFIER): 4890 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4891 return self._parse_placeholder() 4892 4893 def _parse_var( 4894 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4895 ) -> t.Optional[exp.Expression]: 4896 if ( 4897 (any_token and self._advance_any()) 4898 or self._match(TokenType.VAR) 4899 or (self._match_set(tokens) if tokens else False) 4900 ): 4901 return self.expression(exp.Var, this=self._prev.text) 4902 return self._parse_placeholder() 4903 4904 def _advance_any(self) -> t.Optional[Token]: 4905 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4906 self._advance() 4907 return self._prev 4908 return None 4909 4910 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4911 return self._parse_var() or self._parse_string() 4912 4913 def _parse_null(self) -> t.Optional[exp.Expression]: 4914 if self._match_set(self.NULL_TOKENS): 4915 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4916 return self._parse_placeholder() 4917 4918 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4919 if self._match(TokenType.TRUE): 4920 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4921 if self._match(TokenType.FALSE): 4922 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4923 return self._parse_placeholder() 4924 4925 def _parse_star(self) -> t.Optional[exp.Expression]: 4926 if self._match(TokenType.STAR): 4927 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4928 return self._parse_placeholder() 4929 4930 def _parse_parameter(self) -> exp.Parameter: 4931 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4932 return ( 4933 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4934 ) 4935 4936 self._match(TokenType.L_BRACE) 4937 this = _parse_parameter_part() 4938 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4939 self._match(TokenType.R_BRACE) 4940 4941 return self.expression(exp.Parameter, this=this, expression=expression) 4942 4943 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4944 if self._match_set(self.PLACEHOLDER_PARSERS): 4945 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4946 if placeholder: 4947 return placeholder 4948 self._advance(-1) 4949 return None 4950 4951 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4952 if not self._match(TokenType.EXCEPT): 4953 return None 4954 if self._match(TokenType.L_PAREN, advance=False): 4955 return self._parse_wrapped_csv(self._parse_column) 4956 4957 except_column = self._parse_column() 4958 return [except_column] if except_column else None 4959 4960 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4961 if not self._match(TokenType.REPLACE): 4962 return None 4963 if self._match(TokenType.L_PAREN, advance=False): 4964 return self._parse_wrapped_csv(self._parse_expression) 4965 4966 replace_expression = self._parse_expression() 4967 return [replace_expression] if replace_expression else None 4968 4969 def _parse_csv( 4970 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4971 ) -> t.List[exp.Expression]: 4972 parse_result = parse_method() 4973 items = [parse_result] if parse_result is not None else [] 4974 4975 while self._match(sep): 4976 self._add_comments(parse_result) 4977 parse_result = parse_method() 4978 if parse_result is not None: 4979 items.append(parse_result) 4980 4981 return items 4982 4983 def _parse_tokens( 4984 self, parse_method: t.Callable, expressions: t.Dict 4985 ) -> t.Optional[exp.Expression]: 4986 this = parse_method() 4987 4988 while self._match_set(expressions): 4989 this = self.expression( 4990 expressions[self._prev.token_type], 4991 this=this, 4992 comments=self._prev_comments, 4993 expression=parse_method(), 4994 ) 4995 4996 return this 4997 4998 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4999 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5000 5001 def _parse_wrapped_csv( 5002 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5003 ) -> t.List[exp.Expression]: 5004 return self._parse_wrapped( 5005 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5006 ) 5007 5008 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5009 wrapped = self._match(TokenType.L_PAREN) 5010 if not wrapped and not optional: 5011 self.raise_error("Expecting (") 5012 parse_result = parse_method() 5013 if wrapped: 5014 self._match_r_paren() 5015 return parse_result 5016 5017 def _parse_expressions(self) -> t.List[exp.Expression]: 5018 return self._parse_csv(self._parse_expression) 5019 5020 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5021 return self._parse_select() or self._parse_set_operations( 5022 self._parse_expression() if alias else self._parse_conjunction() 5023 ) 5024 5025 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5026 return self._parse_query_modifiers( 5027 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5028 ) 5029 5030 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5031 this = None 5032 if self._match_texts(self.TRANSACTION_KIND): 5033 this = self._prev.text 5034 5035 self._match_texts(("TRANSACTION", "WORK")) 5036 5037 modes = [] 5038 while True: 5039 mode = [] 5040 while self._match(TokenType.VAR): 5041 mode.append(self._prev.text) 5042 5043 if mode: 5044 modes.append(" ".join(mode)) 5045 if not self._match(TokenType.COMMA): 5046 break 5047 5048 return self.expression(exp.Transaction, this=this, modes=modes) 5049 5050 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5051 chain = None 5052 savepoint = None 5053 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5054 5055 self._match_texts(("TRANSACTION", "WORK")) 5056 5057 if self._match_text_seq("TO"): 5058 self._match_text_seq("SAVEPOINT") 5059 savepoint = self._parse_id_var() 5060 5061 if self._match(TokenType.AND): 5062 chain = not self._match_text_seq("NO") 5063 self._match_text_seq("CHAIN") 5064 5065 if is_rollback: 5066 return self.expression(exp.Rollback, savepoint=savepoint) 5067 5068 return self.expression(exp.Commit, chain=chain) 5069 5070 def _parse_refresh(self) -> exp.Refresh: 5071 self._match(TokenType.TABLE) 5072 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5073 5074 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5075 if not self._match_text_seq("ADD"): 5076 return None 5077 5078 self._match(TokenType.COLUMN) 5079 exists_column = self._parse_exists(not_=True) 5080 expression = self._parse_field_def() 5081 5082 if expression: 5083 expression.set("exists", exists_column) 5084 5085 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5086 if self._match_texts(("FIRST", "AFTER")): 5087 position = self._prev.text 5088 column_position = self.expression( 5089 exp.ColumnPosition, this=self._parse_column(), position=position 5090 ) 5091 expression.set("position", column_position) 5092 5093 return expression 5094 5095 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5096 drop = self._match(TokenType.DROP) and self._parse_drop() 5097 if drop and not isinstance(drop, exp.Command): 5098 drop.set("kind", drop.args.get("kind", "COLUMN")) 5099 return drop 5100 5101 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5102 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5103 return self.expression( 5104 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5105 ) 5106 5107 def _parse_add_constraint(self) -> exp.AddConstraint: 5108 this = None 5109 kind = self._prev.token_type 5110 5111 if kind == TokenType.CONSTRAINT: 5112 this = self._parse_id_var() 5113 5114 if self._match_text_seq("CHECK"): 5115 expression = self._parse_wrapped(self._parse_conjunction) 5116 enforced = self._match_text_seq("ENFORCED") 5117 5118 return self.expression( 5119 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5120 ) 5121 5122 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5123 expression = self._parse_foreign_key() 5124 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5125 expression = self._parse_primary_key() 5126 else: 5127 expression = None 5128 5129 return self.expression(exp.AddConstraint, this=this, expression=expression) 5130 5131 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5132 index = self._index - 1 5133 5134 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5135 return self._parse_csv(self._parse_add_constraint) 5136 5137 self._retreat(index) 5138 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5139 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5140 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5141 5142 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5143 self._match(TokenType.COLUMN) 5144 column = self._parse_field(any_token=True) 5145 5146 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5147 return self.expression(exp.AlterColumn, this=column, drop=True) 5148 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5149 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5150 5151 self._match_text_seq("SET", "DATA") 5152 return self.expression( 5153 exp.AlterColumn, 5154 this=column, 5155 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5156 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5157 using=self._match(TokenType.USING) and self._parse_conjunction(), 5158 ) 5159 5160 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5161 index = self._index - 1 5162 5163 partition_exists = self._parse_exists() 5164 if self._match(TokenType.PARTITION, advance=False): 5165 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5166 5167 self._retreat(index) 5168 return self._parse_csv(self._parse_drop_column) 5169 5170 def _parse_alter_table_rename(self) -> exp.RenameTable: 5171 self._match_text_seq("TO") 5172 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5173 5174 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5175 start = self._prev 5176 5177 if not self._match(TokenType.TABLE): 5178 return self._parse_as_command(start) 5179 5180 exists = self._parse_exists() 5181 only = self._match_text_seq("ONLY") 5182 this = self._parse_table(schema=True) 5183 5184 if self._next: 5185 self._advance() 5186 5187 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5188 if parser: 5189 actions = ensure_list(parser(self)) 5190 5191 if not self._curr: 5192 return self.expression( 5193 exp.AlterTable, 5194 this=this, 5195 exists=exists, 5196 actions=actions, 5197 only=only, 5198 ) 5199 5200 return self._parse_as_command(start) 5201 5202 def _parse_merge(self) -> exp.Merge: 5203 self._match(TokenType.INTO) 5204 target = self._parse_table() 5205 5206 if target and self._match(TokenType.ALIAS, advance=False): 5207 target.set("alias", self._parse_table_alias()) 5208 5209 self._match(TokenType.USING) 5210 using = self._parse_table() 5211 5212 self._match(TokenType.ON) 5213 on = self._parse_conjunction() 5214 5215 return self.expression( 5216 exp.Merge, 5217 this=target, 5218 using=using, 5219 on=on, 5220 expressions=self._parse_when_matched(), 5221 ) 5222 5223 def _parse_when_matched(self) -> t.List[exp.When]: 5224 whens = [] 5225 5226 while self._match(TokenType.WHEN): 5227 matched = not self._match(TokenType.NOT) 5228 self._match_text_seq("MATCHED") 5229 source = ( 5230 False 5231 if self._match_text_seq("BY", "TARGET") 5232 else self._match_text_seq("BY", "SOURCE") 5233 ) 5234 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5235 5236 self._match(TokenType.THEN) 5237 5238 if self._match(TokenType.INSERT): 5239 _this = self._parse_star() 5240 if _this: 5241 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5242 else: 5243 then = self.expression( 5244 exp.Insert, 5245 this=self._parse_value(), 5246 expression=self._match(TokenType.VALUES) and self._parse_value(), 5247 ) 5248 elif self._match(TokenType.UPDATE): 5249 expressions = self._parse_star() 5250 if expressions: 5251 then = self.expression(exp.Update, expressions=expressions) 5252 else: 5253 then = self.expression( 5254 exp.Update, 5255 expressions=self._match(TokenType.SET) 5256 and self._parse_csv(self._parse_equality), 5257 ) 5258 elif self._match(TokenType.DELETE): 5259 then = self.expression(exp.Var, this=self._prev.text) 5260 else: 5261 then = None 5262 5263 whens.append( 5264 self.expression( 5265 exp.When, 5266 matched=matched, 5267 source=source, 5268 condition=condition, 5269 then=then, 5270 ) 5271 ) 5272 return whens 5273 5274 def _parse_show(self) -> t.Optional[exp.Expression]: 5275 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5276 if parser: 5277 return parser(self) 5278 return self._parse_as_command(self._prev) 5279 5280 def _parse_set_item_assignment( 5281 self, kind: t.Optional[str] = None 5282 ) -> t.Optional[exp.Expression]: 5283 index = self._index 5284 5285 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5286 return self._parse_set_transaction(global_=kind == "GLOBAL") 5287 5288 left = self._parse_primary() or self._parse_id_var() 5289 assignment_delimiter = self._match_texts(("=", "TO")) 5290 5291 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5292 self._retreat(index) 5293 return None 5294 5295 right = self._parse_statement() or self._parse_id_var() 5296 this = self.expression(exp.EQ, this=left, expression=right) 5297 5298 return self.expression(exp.SetItem, this=this, kind=kind) 5299 5300 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5301 self._match_text_seq("TRANSACTION") 5302 characteristics = self._parse_csv( 5303 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5304 ) 5305 return self.expression( 5306 exp.SetItem, 5307 expressions=characteristics, 5308 kind="TRANSACTION", 5309 **{"global": global_}, # type: ignore 5310 ) 5311 5312 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5313 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5314 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5315 5316 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5317 index = self._index 5318 set_ = self.expression( 5319 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5320 ) 5321 5322 if self._curr: 5323 self._retreat(index) 5324 return self._parse_as_command(self._prev) 5325 5326 return set_ 5327 5328 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5329 for option in options: 5330 if self._match_text_seq(*option.split(" ")): 5331 return exp.var(option) 5332 return None 5333 5334 def _parse_as_command(self, start: Token) -> exp.Command: 5335 while self._curr: 5336 self._advance() 5337 text = self._find_sql(start, self._prev) 5338 size = len(start.text) 5339 return exp.Command(this=text[:size], expression=text[size:]) 5340 5341 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5342 settings = [] 5343 5344 self._match_l_paren() 5345 kind = self._parse_id_var() 5346 5347 if self._match(TokenType.L_PAREN): 5348 while True: 5349 key = self._parse_id_var() 5350 value = self._parse_primary() 5351 5352 if not key and value is None: 5353 break 5354 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5355 self._match(TokenType.R_PAREN) 5356 5357 self._match_r_paren() 5358 5359 return self.expression( 5360 exp.DictProperty, 5361 this=this, 5362 kind=kind.this if kind else None, 5363 settings=settings, 5364 ) 5365 5366 def _parse_dict_range(self, this: str) -> exp.DictRange: 5367 self._match_l_paren() 5368 has_min = self._match_text_seq("MIN") 5369 if has_min: 5370 min = self._parse_var() or self._parse_primary() 5371 self._match_text_seq("MAX") 5372 max = self._parse_var() or self._parse_primary() 5373 else: 5374 max = self._parse_var() or self._parse_primary() 5375 min = exp.Literal.number(0) 5376 self._match_r_paren() 5377 return self.expression(exp.DictRange, this=this, min=min, max=max) 5378 5379 def _parse_comprehension( 5380 self, this: t.Optional[exp.Expression] 5381 ) -> t.Optional[exp.Comprehension]: 5382 index = self._index 5383 expression = self._parse_column() 5384 if not self._match(TokenType.IN): 5385 self._retreat(index - 1) 5386 return None 5387 iterator = self._parse_column() 5388 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5389 return self.expression( 5390 exp.Comprehension, 5391 this=this, 5392 expression=expression, 5393 iterator=iterator, 5394 condition=condition, 5395 ) 5396 5397 def _find_parser( 5398 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5399 ) -> t.Optional[t.Callable]: 5400 if not self._curr: 5401 return None 5402 5403 index = self._index 5404 this = [] 5405 while True: 5406 # The current token might be multiple words 5407 curr = self._curr.text.upper() 5408 key = curr.split(" ") 5409 this.append(curr) 5410 5411 self._advance() 5412 result, trie = in_trie(trie, key) 5413 if result == TrieResult.FAILED: 5414 break 5415 5416 if result == TrieResult.EXISTS: 5417 subparser = parsers[" ".join(this)] 5418 return subparser 5419 5420 self._retreat(index) 5421 return None 5422 5423 def _match(self, token_type, advance=True, expression=None): 5424 if not self._curr: 5425 return None 5426 5427 if self._curr.token_type == token_type: 5428 if advance: 5429 self._advance() 5430 self._add_comments(expression) 5431 return True 5432 5433 return None 5434 5435 def _match_set(self, types, advance=True): 5436 if not self._curr: 5437 return None 5438 5439 if self._curr.token_type in types: 5440 if advance: 5441 self._advance() 5442 return True 5443 5444 return None 5445 5446 def _match_pair(self, token_type_a, token_type_b, advance=True): 5447 if not self._curr or not self._next: 5448 return None 5449 5450 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5451 if advance: 5452 self._advance(2) 5453 return True 5454 5455 return None 5456 5457 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5458 if not self._match(TokenType.L_PAREN, expression=expression): 5459 self.raise_error("Expecting (") 5460 5461 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5462 if not self._match(TokenType.R_PAREN, expression=expression): 5463 self.raise_error("Expecting )") 5464 5465 def _match_texts(self, texts, advance=True): 5466 if self._curr and self._curr.text.upper() in texts: 5467 if advance: 5468 self._advance() 5469 return True 5470 return False 5471 5472 def _match_text_seq(self, *texts, advance=True): 5473 index = self._index 5474 for text in texts: 5475 if self._curr and self._curr.text.upper() == text: 5476 self._advance() 5477 else: 5478 self._retreat(index) 5479 return False 5480 5481 if not advance: 5482 self._retreat(index) 5483 5484 return True 5485 5486 @t.overload 5487 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5488 ... 5489 5490 @t.overload 5491 def _replace_columns_with_dots( 5492 self, this: t.Optional[exp.Expression] 5493 ) -> t.Optional[exp.Expression]: 5494 ... 5495 5496 def _replace_columns_with_dots(self, this): 5497 if isinstance(this, exp.Dot): 5498 exp.replace_children(this, self._replace_columns_with_dots) 5499 elif isinstance(this, exp.Column): 5500 exp.replace_children(this, self._replace_columns_with_dots) 5501 table = this.args.get("table") 5502 this = ( 5503 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5504 ) 5505 5506 return this 5507 5508 def _replace_lambda( 5509 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5510 ) -> t.Optional[exp.Expression]: 5511 if not node: 5512 return node 5513 5514 for column in node.find_all(exp.Column): 5515 if column.parts[0].name in lambda_variables: 5516 dot_or_id = column.to_dot() if column.table else column.this 5517 parent = column.parent 5518 5519 while isinstance(parent, exp.Dot): 5520 if not isinstance(parent.parent, exp.Dot): 5521 parent.replace(dot_or_id) 5522 break 5523 parent = parent.parent 5524 else: 5525 if column is node: 5526 node = dot_or_id 5527 else: 5528 column.replace(dot_or_id) 5529 return node 5530 5531 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5532 return [ 5533 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5534 for value in values 5535 if value 5536 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
974 def __init__( 975 self, 976 error_level: t.Optional[ErrorLevel] = None, 977 error_message_context: int = 100, 978 max_errors: int = 3, 979 ): 980 self.error_level = error_level or ErrorLevel.IMMEDIATE 981 self.error_message_context = error_message_context 982 self.max_errors = max_errors 983 self._tokenizer = self.TOKENIZER_CLASS() 984 self.reset()
996 def parse( 997 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 998 ) -> t.List[t.Optional[exp.Expression]]: 999 """ 1000 Parses a list of tokens and returns a list of syntax trees, one tree 1001 per parsed SQL statement. 1002 1003 Args: 1004 raw_tokens: The list of tokens. 1005 sql: The original SQL string, used to produce helpful debug messages. 1006 1007 Returns: 1008 The list of the produced syntax trees. 1009 """ 1010 return self._parse( 1011 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1012 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1014 def parse_into( 1015 self, 1016 expression_types: exp.IntoType, 1017 raw_tokens: t.List[Token], 1018 sql: t.Optional[str] = None, 1019 ) -> t.List[t.Optional[exp.Expression]]: 1020 """ 1021 Parses a list of tokens into a given Expression type. If a collection of Expression 1022 types is given instead, this method will try to parse the token list into each one 1023 of them, stopping at the first for which the parsing succeeds. 1024 1025 Args: 1026 expression_types: The expression type(s) to try and parse the token list into. 1027 raw_tokens: The list of tokens. 1028 sql: The original SQL string, used to produce helpful debug messages. 1029 1030 Returns: 1031 The target Expression. 1032 """ 1033 errors = [] 1034 for expression_type in ensure_list(expression_types): 1035 parser = self.EXPRESSION_PARSERS.get(expression_type) 1036 if not parser: 1037 raise TypeError(f"No parser registered for {expression_type}") 1038 1039 try: 1040 return self._parse(parser, raw_tokens, sql) 1041 except ParseError as e: 1042 e.errors[0]["into_expression"] = expression_type 1043 errors.append(e) 1044 1045 raise ParseError( 1046 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1047 errors=merge_errors(errors), 1048 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1085 def check_errors(self) -> None: 1086 """Logs or raises any found errors, depending on the chosen error level setting.""" 1087 if self.error_level == ErrorLevel.WARN: 1088 for error in self.errors: 1089 logger.error(str(error)) 1090 elif self.error_level == ErrorLevel.RAISE and self.errors: 1091 raise ParseError( 1092 concat_messages(self.errors, self.max_errors), 1093 errors=merge_errors(self.errors), 1094 )
Logs or raises any found errors, depending on the chosen error level setting.
1096 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1097 """ 1098 Appends an error in the list of recorded errors or raises it, depending on the chosen 1099 error level setting. 1100 """ 1101 token = token or self._curr or self._prev or Token.string("") 1102 start = token.start 1103 end = token.end + 1 1104 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1105 highlight = self.sql[start:end] 1106 end_context = self.sql[end : end + self.error_message_context] 1107 1108 error = ParseError.new( 1109 f"{message}. Line {token.line}, Col: {token.col}.\n" 1110 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1111 description=message, 1112 line=token.line, 1113 col=token.col, 1114 start_context=start_context, 1115 highlight=highlight, 1116 end_context=end_context, 1117 ) 1118 1119 if self.error_level == ErrorLevel.IMMEDIATE: 1120 raise error 1121 1122 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1124 def expression( 1125 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1126 ) -> E: 1127 """ 1128 Creates a new, validated Expression. 1129 1130 Args: 1131 exp_class: The expression class to instantiate. 1132 comments: An optional list of comments to attach to the expression. 1133 kwargs: The arguments to set for the expression along with their respective values. 1134 1135 Returns: 1136 The target expression. 1137 """ 1138 instance = exp_class(**kwargs) 1139 instance.add_comments(comments) if comments else self._add_comments(instance) 1140 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1147 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1148 """ 1149 Validates an Expression, making sure that all its mandatory arguments are set. 1150 1151 Args: 1152 expression: The expression to validate. 1153 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1154 1155 Returns: 1156 The validated expression. 1157 """ 1158 if self.error_level != ErrorLevel.IGNORE: 1159 for error_message in expression.error_messages(args): 1160 self.raise_error(error_message) 1161 1162 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.