sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.CONSTRAINT, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 TokenType.FOREIGN_KEY, 253 *DB_CREATABLES, 254 } 255 256 # Tokens that can represent identifiers 257 ID_VAR_TOKENS = { 258 TokenType.VAR, 259 TokenType.ANTI, 260 TokenType.APPLY, 261 TokenType.ASC, 262 TokenType.AUTO_INCREMENT, 263 TokenType.BEGIN, 264 TokenType.CACHE, 265 TokenType.CASE, 266 TokenType.COLLATE, 267 TokenType.COMMAND, 268 TokenType.COMMENT, 269 TokenType.COMMIT, 270 TokenType.CONSTRAINT, 271 TokenType.DEFAULT, 272 TokenType.DELETE, 273 TokenType.DESC, 274 TokenType.DESCRIBE, 275 TokenType.DICTIONARY, 276 TokenType.DIV, 277 TokenType.END, 278 TokenType.EXECUTE, 279 TokenType.ESCAPE, 280 TokenType.FALSE, 281 TokenType.FIRST, 282 TokenType.FILTER, 283 TokenType.FORMAT, 284 TokenType.FULL, 285 TokenType.IS, 286 TokenType.ISNULL, 287 TokenType.INTERVAL, 288 TokenType.KEEP, 289 TokenType.KILL, 290 TokenType.LEFT, 291 TokenType.LOAD, 292 TokenType.MERGE, 293 TokenType.NATURAL, 294 TokenType.NEXT, 295 TokenType.OFFSET, 296 TokenType.ORDINALITY, 297 TokenType.OVERLAPS, 298 TokenType.OVERWRITE, 299 TokenType.PARTITION, 300 TokenType.PERCENT, 301 TokenType.PIVOT, 302 TokenType.PRAGMA, 303 TokenType.RANGE, 304 TokenType.RECURSIVE, 305 TokenType.REFERENCES, 306 TokenType.REFRESH, 307 TokenType.RIGHT, 308 TokenType.ROW, 309 TokenType.ROWS, 310 TokenType.SEMI, 311 TokenType.SET, 312 TokenType.SETTINGS, 313 TokenType.SHOW, 314 TokenType.TEMPORARY, 315 TokenType.TOP, 316 TokenType.TRUE, 317 TokenType.UNIQUE, 318 TokenType.UNPIVOT, 319 TokenType.UPDATE, 320 TokenType.USE, 321 TokenType.VOLATILE, 322 TokenType.WINDOW, 323 *CREATABLES, 324 *SUBQUERY_PREDICATES, 325 *TYPE_TOKENS, 326 *NO_PAREN_FUNCTIONS, 327 } 328 329 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 330 331 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 332 TokenType.ANTI, 333 TokenType.APPLY, 334 TokenType.ASOF, 335 TokenType.FULL, 336 TokenType.LEFT, 337 TokenType.LOCK, 338 TokenType.NATURAL, 339 TokenType.OFFSET, 340 TokenType.RIGHT, 341 TokenType.SEMI, 342 TokenType.WINDOW, 343 } 344 345 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 346 347 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 348 349 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 350 351 FUNC_TOKENS = { 352 TokenType.COLLATE, 353 TokenType.COMMAND, 354 TokenType.CURRENT_DATE, 355 TokenType.CURRENT_DATETIME, 356 TokenType.CURRENT_TIMESTAMP, 357 TokenType.CURRENT_TIME, 358 TokenType.CURRENT_USER, 359 TokenType.FILTER, 360 TokenType.FIRST, 361 TokenType.FORMAT, 362 TokenType.GLOB, 363 TokenType.IDENTIFIER, 364 TokenType.INDEX, 365 TokenType.ISNULL, 366 TokenType.ILIKE, 367 TokenType.INSERT, 368 TokenType.LIKE, 369 TokenType.MERGE, 370 TokenType.OFFSET, 371 TokenType.PRIMARY_KEY, 372 TokenType.RANGE, 373 TokenType.REPLACE, 374 TokenType.RLIKE, 375 TokenType.ROW, 376 TokenType.UNNEST, 377 TokenType.VAR, 378 TokenType.LEFT, 379 TokenType.RIGHT, 380 TokenType.DATE, 381 TokenType.DATETIME, 382 TokenType.TABLE, 383 TokenType.TIMESTAMP, 384 TokenType.TIMESTAMPTZ, 385 TokenType.WINDOW, 386 TokenType.XOR, 387 *TYPE_TOKENS, 388 *SUBQUERY_PREDICATES, 389 } 390 391 CONJUNCTION = { 392 TokenType.AND: exp.And, 393 TokenType.OR: exp.Or, 394 } 395 396 EQUALITY = { 397 TokenType.COLON_EQ: exp.PropertyEQ, 398 TokenType.EQ: exp.EQ, 399 TokenType.NEQ: exp.NEQ, 400 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 401 } 402 403 COMPARISON = { 404 TokenType.GT: exp.GT, 405 TokenType.GTE: exp.GTE, 406 TokenType.LT: exp.LT, 407 TokenType.LTE: exp.LTE, 408 } 409 410 BITWISE = { 411 TokenType.AMP: exp.BitwiseAnd, 412 TokenType.CARET: exp.BitwiseXor, 413 TokenType.PIPE: exp.BitwiseOr, 414 TokenType.DPIPE: exp.DPipe, 415 } 416 417 TERM = { 418 TokenType.DASH: exp.Sub, 419 TokenType.PLUS: exp.Add, 420 TokenType.MOD: exp.Mod, 421 TokenType.COLLATE: exp.Collate, 422 } 423 424 FACTOR = { 425 TokenType.DIV: exp.IntDiv, 426 TokenType.LR_ARROW: exp.Distance, 427 TokenType.SLASH: exp.Div, 428 TokenType.STAR: exp.Mul, 429 } 430 431 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 432 433 TIMES = { 434 TokenType.TIME, 435 TokenType.TIMETZ, 436 } 437 438 TIMESTAMPS = { 439 TokenType.TIMESTAMP, 440 TokenType.TIMESTAMPTZ, 441 TokenType.TIMESTAMPLTZ, 442 *TIMES, 443 } 444 445 SET_OPERATIONS = { 446 TokenType.UNION, 447 TokenType.INTERSECT, 448 TokenType.EXCEPT, 449 } 450 451 JOIN_METHODS = { 452 TokenType.NATURAL, 453 TokenType.ASOF, 454 } 455 456 JOIN_SIDES = { 457 TokenType.LEFT, 458 TokenType.RIGHT, 459 TokenType.FULL, 460 } 461 462 JOIN_KINDS = { 463 TokenType.INNER, 464 TokenType.OUTER, 465 TokenType.CROSS, 466 TokenType.SEMI, 467 TokenType.ANTI, 468 } 469 470 JOIN_HINTS: t.Set[str] = set() 471 472 LAMBDAS = { 473 TokenType.ARROW: lambda self, expressions: self.expression( 474 exp.Lambda, 475 this=self._replace_lambda( 476 self._parse_conjunction(), 477 {node.name for node in expressions}, 478 ), 479 expressions=expressions, 480 ), 481 TokenType.FARROW: lambda self, expressions: self.expression( 482 exp.Kwarg, 483 this=exp.var(expressions[0].name), 484 expression=self._parse_conjunction(), 485 ), 486 } 487 488 COLUMN_OPERATORS = { 489 TokenType.DOT: None, 490 TokenType.DCOLON: lambda self, this, to: self.expression( 491 exp.Cast if self.STRICT_CAST else exp.TryCast, 492 this=this, 493 to=to, 494 ), 495 TokenType.ARROW: lambda self, this, path: self.expression( 496 exp.JSONExtract, 497 this=this, 498 expression=path, 499 ), 500 TokenType.DARROW: lambda self, this, path: self.expression( 501 exp.JSONExtractScalar, 502 this=this, 503 expression=path, 504 ), 505 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 506 exp.JSONBExtract, 507 this=this, 508 expression=path, 509 ), 510 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 511 exp.JSONBExtractScalar, 512 this=this, 513 expression=path, 514 ), 515 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 516 exp.JSONBContains, 517 this=this, 518 expression=key, 519 ), 520 } 521 522 EXPRESSION_PARSERS = { 523 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 524 exp.Column: lambda self: self._parse_column(), 525 exp.Condition: lambda self: self._parse_conjunction(), 526 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 527 exp.Expression: lambda self: self._parse_statement(), 528 exp.From: lambda self: self._parse_from(), 529 exp.Group: lambda self: self._parse_group(), 530 exp.Having: lambda self: self._parse_having(), 531 exp.Identifier: lambda self: self._parse_id_var(), 532 exp.Join: lambda self: self._parse_join(), 533 exp.Lambda: lambda self: self._parse_lambda(), 534 exp.Lateral: lambda self: self._parse_lateral(), 535 exp.Limit: lambda self: self._parse_limit(), 536 exp.Offset: lambda self: self._parse_offset(), 537 exp.Order: lambda self: self._parse_order(), 538 exp.Ordered: lambda self: self._parse_ordered(), 539 exp.Properties: lambda self: self._parse_properties(), 540 exp.Qualify: lambda self: self._parse_qualify(), 541 exp.Returning: lambda self: self._parse_returning(), 542 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 543 exp.Table: lambda self: self._parse_table_parts(), 544 exp.TableAlias: lambda self: self._parse_table_alias(), 545 exp.Where: lambda self: self._parse_where(), 546 exp.Window: lambda self: self._parse_named_window(), 547 exp.With: lambda self: self._parse_with(), 548 "JOIN_TYPE": lambda self: self._parse_join_parts(), 549 } 550 551 STATEMENT_PARSERS = { 552 TokenType.ALTER: lambda self: self._parse_alter(), 553 TokenType.BEGIN: lambda self: self._parse_transaction(), 554 TokenType.CACHE: lambda self: self._parse_cache(), 555 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 556 TokenType.COMMENT: lambda self: self._parse_comment(), 557 TokenType.CREATE: lambda self: self._parse_create(), 558 TokenType.DELETE: lambda self: self._parse_delete(), 559 TokenType.DESC: lambda self: self._parse_describe(), 560 TokenType.DESCRIBE: lambda self: self._parse_describe(), 561 TokenType.DROP: lambda self: self._parse_drop(), 562 TokenType.INSERT: lambda self: self._parse_insert(), 563 TokenType.KILL: lambda self: self._parse_kill(), 564 TokenType.LOAD: lambda self: self._parse_load(), 565 TokenType.MERGE: lambda self: self._parse_merge(), 566 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 567 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 568 TokenType.REFRESH: lambda self: self._parse_refresh(), 569 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 570 TokenType.SET: lambda self: self._parse_set(), 571 TokenType.UNCACHE: lambda self: self._parse_uncache(), 572 TokenType.UPDATE: lambda self: self._parse_update(), 573 TokenType.USE: lambda self: self.expression( 574 exp.Use, 575 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 576 and exp.var(self._prev.text), 577 this=self._parse_table(schema=False), 578 ), 579 } 580 581 UNARY_PARSERS = { 582 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 583 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 584 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 585 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 586 } 587 588 PRIMARY_PARSERS = { 589 TokenType.STRING: lambda self, token: self.expression( 590 exp.Literal, this=token.text, is_string=True 591 ), 592 TokenType.NUMBER: lambda self, token: self.expression( 593 exp.Literal, this=token.text, is_string=False 594 ), 595 TokenType.STAR: lambda self, _: self.expression( 596 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 597 ), 598 TokenType.NULL: lambda self, _: self.expression(exp.Null), 599 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 600 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 601 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 602 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 603 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 604 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 605 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 606 exp.National, this=token.text 607 ), 608 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 609 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 610 exp.RawString, this=token.text 611 ), 612 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 613 } 614 615 PLACEHOLDER_PARSERS = { 616 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 617 TokenType.PARAMETER: lambda self: self._parse_parameter(), 618 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 619 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 620 else None, 621 } 622 623 RANGE_PARSERS = { 624 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 625 TokenType.GLOB: binary_range_parser(exp.Glob), 626 TokenType.ILIKE: binary_range_parser(exp.ILike), 627 TokenType.IN: lambda self, this: self._parse_in(this), 628 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 629 TokenType.IS: lambda self, this: self._parse_is(this), 630 TokenType.LIKE: binary_range_parser(exp.Like), 631 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 632 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 633 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 634 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 635 } 636 637 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 638 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 639 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 640 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 641 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 642 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 643 "CHECKSUM": lambda self: self._parse_checksum(), 644 "CLUSTER BY": lambda self: self._parse_cluster(), 645 "CLUSTERED": lambda self: self._parse_clustered_by(), 646 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 647 exp.CollateProperty, **kwargs 648 ), 649 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 650 "COPY": lambda self: self._parse_copy_property(), 651 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 652 "DEFINER": lambda self: self._parse_definer(), 653 "DETERMINISTIC": lambda self: self.expression( 654 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 655 ), 656 "DISTKEY": lambda self: self._parse_distkey(), 657 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 658 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 659 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 660 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 661 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 662 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 663 "FREESPACE": lambda self: self._parse_freespace(), 664 "HEAP": lambda self: self.expression(exp.HeapProperty), 665 "IMMUTABLE": lambda self: self.expression( 666 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 667 ), 668 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 669 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 670 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 671 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 672 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 673 "LIKE": lambda self: self._parse_create_like(), 674 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 675 "LOCK": lambda self: self._parse_locking(), 676 "LOCKING": lambda self: self._parse_locking(), 677 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 678 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 679 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 680 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 681 "NO": lambda self: self._parse_no_property(), 682 "ON": lambda self: self._parse_on_property(), 683 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 684 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 685 "PARTITION": lambda self: self._parse_partitioned_of(), 686 "PARTITION BY": lambda self: self._parse_partitioned_by(), 687 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 688 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 689 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 690 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 691 "REMOTE": lambda self: self._parse_remote_with_connection(), 692 "RETURNS": lambda self: self._parse_returns(), 693 "ROW": lambda self: self._parse_row(), 694 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 695 "SAMPLE": lambda self: self.expression( 696 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 697 ), 698 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 699 "SETTINGS": lambda self: self.expression( 700 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 701 ), 702 "SORTKEY": lambda self: self._parse_sortkey(), 703 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 704 "STABLE": lambda self: self.expression( 705 exp.StabilityProperty, this=exp.Literal.string("STABLE") 706 ), 707 "STORED": lambda self: self._parse_stored(), 708 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 709 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 710 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 711 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 712 "TO": lambda self: self._parse_to_table(), 713 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 714 "TRANSFORM": lambda self: self.expression( 715 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 716 ), 717 "TTL": lambda self: self._parse_ttl(), 718 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 719 "VOLATILE": lambda self: self._parse_volatile_property(), 720 "WITH": lambda self: self._parse_with_property(), 721 } 722 723 CONSTRAINT_PARSERS = { 724 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 725 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 726 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 727 "CHARACTER SET": lambda self: self.expression( 728 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 729 ), 730 "CHECK": lambda self: self.expression( 731 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 732 ), 733 "COLLATE": lambda self: self.expression( 734 exp.CollateColumnConstraint, this=self._parse_var() 735 ), 736 "COMMENT": lambda self: self.expression( 737 exp.CommentColumnConstraint, this=self._parse_string() 738 ), 739 "COMPRESS": lambda self: self._parse_compress(), 740 "CLUSTERED": lambda self: self.expression( 741 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 742 ), 743 "NONCLUSTERED": lambda self: self.expression( 744 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 745 ), 746 "DEFAULT": lambda self: self.expression( 747 exp.DefaultColumnConstraint, this=self._parse_bitwise() 748 ), 749 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 750 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 751 "FORMAT": lambda self: self.expression( 752 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 753 ), 754 "GENERATED": lambda self: self._parse_generated_as_identity(), 755 "IDENTITY": lambda self: self._parse_auto_increment(), 756 "INLINE": lambda self: self._parse_inline(), 757 "LIKE": lambda self: self._parse_create_like(), 758 "NOT": lambda self: self._parse_not_constraint(), 759 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 760 "ON": lambda self: ( 761 self._match(TokenType.UPDATE) 762 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 763 ) 764 or self.expression(exp.OnProperty, this=self._parse_id_var()), 765 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 766 "PERIOD": lambda self: self._parse_period_for_system_time(), 767 "PRIMARY KEY": lambda self: self._parse_primary_key(), 768 "REFERENCES": lambda self: self._parse_references(match=False), 769 "TITLE": lambda self: self.expression( 770 exp.TitleColumnConstraint, this=self._parse_var_or_string() 771 ), 772 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 773 "UNIQUE": lambda self: self._parse_unique(), 774 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 775 "WITH": lambda self: self.expression( 776 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 777 ), 778 } 779 780 ALTER_PARSERS = { 781 "ADD": lambda self: self._parse_alter_table_add(), 782 "ALTER": lambda self: self._parse_alter_table_alter(), 783 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 784 "DROP": lambda self: self._parse_alter_table_drop(), 785 "RENAME": lambda self: self._parse_alter_table_rename(), 786 } 787 788 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 789 790 NO_PAREN_FUNCTION_PARSERS = { 791 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 792 "CASE": lambda self: self._parse_case(), 793 "IF": lambda self: self._parse_if(), 794 "NEXT": lambda self: self._parse_next_value_for(), 795 } 796 797 INVALID_FUNC_NAME_TOKENS = { 798 TokenType.IDENTIFIER, 799 TokenType.STRING, 800 } 801 802 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 803 804 FUNCTION_PARSERS = { 805 "ANY_VALUE": lambda self: self._parse_any_value(), 806 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 807 "CONCAT": lambda self: self._parse_concat(), 808 "CONCAT_WS": lambda self: self._parse_concat_ws(), 809 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 810 "DECODE": lambda self: self._parse_decode(), 811 "EXTRACT": lambda self: self._parse_extract(), 812 "JSON_OBJECT": lambda self: self._parse_json_object(), 813 "JSON_TABLE": lambda self: self._parse_json_table(), 814 "LOG": lambda self: self._parse_logarithm(), 815 "MATCH": lambda self: self._parse_match_against(), 816 "OPENJSON": lambda self: self._parse_open_json(), 817 "POSITION": lambda self: self._parse_position(), 818 "PREDICT": lambda self: self._parse_predict(), 819 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 820 "STRING_AGG": lambda self: self._parse_string_agg(), 821 "SUBSTRING": lambda self: self._parse_substring(), 822 "TRIM": lambda self: self._parse_trim(), 823 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 824 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 825 } 826 827 QUERY_MODIFIER_PARSERS = { 828 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 829 TokenType.WHERE: lambda self: ("where", self._parse_where()), 830 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 831 TokenType.HAVING: lambda self: ("having", self._parse_having()), 832 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 833 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 834 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 835 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 836 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 837 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 838 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 839 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 840 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 841 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 842 TokenType.CLUSTER_BY: lambda self: ( 843 "cluster", 844 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 845 ), 846 TokenType.DISTRIBUTE_BY: lambda self: ( 847 "distribute", 848 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 849 ), 850 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 851 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 852 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 853 } 854 855 SET_PARSERS = { 856 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 857 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 858 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 859 "TRANSACTION": lambda self: self._parse_set_transaction(), 860 } 861 862 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 863 864 TYPE_LITERAL_PARSERS = { 865 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 866 } 867 868 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 869 870 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 871 872 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 873 874 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 875 TRANSACTION_CHARACTERISTICS = { 876 "ISOLATION LEVEL REPEATABLE READ", 877 "ISOLATION LEVEL READ COMMITTED", 878 "ISOLATION LEVEL READ UNCOMMITTED", 879 "ISOLATION LEVEL SERIALIZABLE", 880 "READ WRITE", 881 "READ ONLY", 882 } 883 884 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 885 886 CLONE_KEYWORDS = {"CLONE", "COPY"} 887 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 888 889 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 890 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 891 892 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 893 894 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 895 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 896 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 897 898 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 899 900 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 901 902 DISTINCT_TOKENS = {TokenType.DISTINCT} 903 904 NULL_TOKENS = {TokenType.NULL} 905 906 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 907 908 STRICT_CAST = True 909 910 # A NULL arg in CONCAT yields NULL by default 911 CONCAT_NULL_OUTPUTS_STRING = False 912 913 PREFIXED_PIVOT_COLUMNS = False 914 IDENTIFY_PIVOT_STRINGS = False 915 916 LOG_BASE_FIRST = True 917 LOG_DEFAULTS_TO_LN = False 918 919 # Whether or not ADD is present for each column added by ALTER TABLE 920 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 921 922 # Whether or not the table sample clause expects CSV syntax 923 TABLESAMPLE_CSV = False 924 925 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 926 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 927 928 # Whether the TRIM function expects the characters to trim as its first argument 929 TRIM_PATTERN_FIRST = False 930 931 # Whether the behavior of a / b depends on the types of a and b. 932 # False means a / b is always float division. 933 # True means a / b is integer division if both a and b are integers. 934 TYPED_DIVISION = False 935 936 # False means 1 / 0 throws an error. 937 # True means 1 / 0 returns null. 938 SAFE_DIVISION = False 939 940 __slots__ = ( 941 "error_level", 942 "error_message_context", 943 "max_errors", 944 "sql", 945 "errors", 946 "_tokens", 947 "_index", 948 "_curr", 949 "_next", 950 "_prev", 951 "_prev_comments", 952 "_tokenizer", 953 ) 954 955 # Autofilled 956 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 957 INDEX_OFFSET: int = 0 958 UNNEST_COLUMN_ONLY: bool = False 959 ALIAS_POST_TABLESAMPLE: bool = False 960 STRICT_STRING_CONCAT = False 961 SUPPORTS_USER_DEFINED_TYPES = True 962 NORMALIZE_FUNCTIONS = "upper" 963 NULL_ORDERING: str = "nulls_are_small" 964 SHOW_TRIE: t.Dict = {} 965 SET_TRIE: t.Dict = {} 966 FORMAT_MAPPING: t.Dict[str, str] = {} 967 FORMAT_TRIE: t.Dict = {} 968 TIME_MAPPING: t.Dict[str, str] = {} 969 TIME_TRIE: t.Dict = {} 970 971 def __init__( 972 self, 973 error_level: t.Optional[ErrorLevel] = None, 974 error_message_context: int = 100, 975 max_errors: int = 3, 976 ): 977 self.error_level = error_level or ErrorLevel.IMMEDIATE 978 self.error_message_context = error_message_context 979 self.max_errors = max_errors 980 self._tokenizer = self.TOKENIZER_CLASS() 981 self.reset() 982 983 def reset(self): 984 self.sql = "" 985 self.errors = [] 986 self._tokens = [] 987 self._index = 0 988 self._curr = None 989 self._next = None 990 self._prev = None 991 self._prev_comments = None 992 993 def parse( 994 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 995 ) -> t.List[t.Optional[exp.Expression]]: 996 """ 997 Parses a list of tokens and returns a list of syntax trees, one tree 998 per parsed SQL statement. 999 1000 Args: 1001 raw_tokens: The list of tokens. 1002 sql: The original SQL string, used to produce helpful debug messages. 1003 1004 Returns: 1005 The list of the produced syntax trees. 1006 """ 1007 return self._parse( 1008 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1009 ) 1010 1011 def parse_into( 1012 self, 1013 expression_types: exp.IntoType, 1014 raw_tokens: t.List[Token], 1015 sql: t.Optional[str] = None, 1016 ) -> t.List[t.Optional[exp.Expression]]: 1017 """ 1018 Parses a list of tokens into a given Expression type. If a collection of Expression 1019 types is given instead, this method will try to parse the token list into each one 1020 of them, stopping at the first for which the parsing succeeds. 1021 1022 Args: 1023 expression_types: The expression type(s) to try and parse the token list into. 1024 raw_tokens: The list of tokens. 1025 sql: The original SQL string, used to produce helpful debug messages. 1026 1027 Returns: 1028 The target Expression. 1029 """ 1030 errors = [] 1031 for expression_type in ensure_list(expression_types): 1032 parser = self.EXPRESSION_PARSERS.get(expression_type) 1033 if not parser: 1034 raise TypeError(f"No parser registered for {expression_type}") 1035 1036 try: 1037 return self._parse(parser, raw_tokens, sql) 1038 except ParseError as e: 1039 e.errors[0]["into_expression"] = expression_type 1040 errors.append(e) 1041 1042 raise ParseError( 1043 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1044 errors=merge_errors(errors), 1045 ) from errors[-1] 1046 1047 def _parse( 1048 self, 1049 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1050 raw_tokens: t.List[Token], 1051 sql: t.Optional[str] = None, 1052 ) -> t.List[t.Optional[exp.Expression]]: 1053 self.reset() 1054 self.sql = sql or "" 1055 1056 total = len(raw_tokens) 1057 chunks: t.List[t.List[Token]] = [[]] 1058 1059 for i, token in enumerate(raw_tokens): 1060 if token.token_type == TokenType.SEMICOLON: 1061 if i < total - 1: 1062 chunks.append([]) 1063 else: 1064 chunks[-1].append(token) 1065 1066 expressions = [] 1067 1068 for tokens in chunks: 1069 self._index = -1 1070 self._tokens = tokens 1071 self._advance() 1072 1073 expressions.append(parse_method(self)) 1074 1075 if self._index < len(self._tokens): 1076 self.raise_error("Invalid expression / Unexpected token") 1077 1078 self.check_errors() 1079 1080 return expressions 1081 1082 def check_errors(self) -> None: 1083 """Logs or raises any found errors, depending on the chosen error level setting.""" 1084 if self.error_level == ErrorLevel.WARN: 1085 for error in self.errors: 1086 logger.error(str(error)) 1087 elif self.error_level == ErrorLevel.RAISE and self.errors: 1088 raise ParseError( 1089 concat_messages(self.errors, self.max_errors), 1090 errors=merge_errors(self.errors), 1091 ) 1092 1093 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1094 """ 1095 Appends an error in the list of recorded errors or raises it, depending on the chosen 1096 error level setting. 1097 """ 1098 token = token or self._curr or self._prev or Token.string("") 1099 start = token.start 1100 end = token.end + 1 1101 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1102 highlight = self.sql[start:end] 1103 end_context = self.sql[end : end + self.error_message_context] 1104 1105 error = ParseError.new( 1106 f"{message}. Line {token.line}, Col: {token.col}.\n" 1107 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1108 description=message, 1109 line=token.line, 1110 col=token.col, 1111 start_context=start_context, 1112 highlight=highlight, 1113 end_context=end_context, 1114 ) 1115 1116 if self.error_level == ErrorLevel.IMMEDIATE: 1117 raise error 1118 1119 self.errors.append(error) 1120 1121 def expression( 1122 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1123 ) -> E: 1124 """ 1125 Creates a new, validated Expression. 1126 1127 Args: 1128 exp_class: The expression class to instantiate. 1129 comments: An optional list of comments to attach to the expression. 1130 kwargs: The arguments to set for the expression along with their respective values. 1131 1132 Returns: 1133 The target expression. 1134 """ 1135 instance = exp_class(**kwargs) 1136 instance.add_comments(comments) if comments else self._add_comments(instance) 1137 return self.validate_expression(instance) 1138 1139 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1140 if expression and self._prev_comments: 1141 expression.add_comments(self._prev_comments) 1142 self._prev_comments = None 1143 1144 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1145 """ 1146 Validates an Expression, making sure that all its mandatory arguments are set. 1147 1148 Args: 1149 expression: The expression to validate. 1150 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1151 1152 Returns: 1153 The validated expression. 1154 """ 1155 if self.error_level != ErrorLevel.IGNORE: 1156 for error_message in expression.error_messages(args): 1157 self.raise_error(error_message) 1158 1159 return expression 1160 1161 def _find_sql(self, start: Token, end: Token) -> str: 1162 return self.sql[start.start : end.end + 1] 1163 1164 def _advance(self, times: int = 1) -> None: 1165 self._index += times 1166 self._curr = seq_get(self._tokens, self._index) 1167 self._next = seq_get(self._tokens, self._index + 1) 1168 1169 if self._index > 0: 1170 self._prev = self._tokens[self._index - 1] 1171 self._prev_comments = self._prev.comments 1172 else: 1173 self._prev = None 1174 self._prev_comments = None 1175 1176 def _retreat(self, index: int) -> None: 1177 if index != self._index: 1178 self._advance(index - self._index) 1179 1180 def _parse_command(self) -> exp.Command: 1181 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1182 1183 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1184 start = self._prev 1185 exists = self._parse_exists() if allow_exists else None 1186 1187 self._match(TokenType.ON) 1188 1189 kind = self._match_set(self.CREATABLES) and self._prev 1190 if not kind: 1191 return self._parse_as_command(start) 1192 1193 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1194 this = self._parse_user_defined_function(kind=kind.token_type) 1195 elif kind.token_type == TokenType.TABLE: 1196 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1197 elif kind.token_type == TokenType.COLUMN: 1198 this = self._parse_column() 1199 else: 1200 this = self._parse_id_var() 1201 1202 self._match(TokenType.IS) 1203 1204 return self.expression( 1205 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1206 ) 1207 1208 def _parse_to_table( 1209 self, 1210 ) -> exp.ToTableProperty: 1211 table = self._parse_table_parts(schema=True) 1212 return self.expression(exp.ToTableProperty, this=table) 1213 1214 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1215 def _parse_ttl(self) -> exp.Expression: 1216 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1217 this = self._parse_bitwise() 1218 1219 if self._match_text_seq("DELETE"): 1220 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1221 if self._match_text_seq("RECOMPRESS"): 1222 return self.expression( 1223 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1224 ) 1225 if self._match_text_seq("TO", "DISK"): 1226 return self.expression( 1227 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1228 ) 1229 if self._match_text_seq("TO", "VOLUME"): 1230 return self.expression( 1231 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1232 ) 1233 1234 return this 1235 1236 expressions = self._parse_csv(_parse_ttl_action) 1237 where = self._parse_where() 1238 group = self._parse_group() 1239 1240 aggregates = None 1241 if group and self._match(TokenType.SET): 1242 aggregates = self._parse_csv(self._parse_set_item) 1243 1244 return self.expression( 1245 exp.MergeTreeTTL, 1246 expressions=expressions, 1247 where=where, 1248 group=group, 1249 aggregates=aggregates, 1250 ) 1251 1252 def _parse_statement(self) -> t.Optional[exp.Expression]: 1253 if self._curr is None: 1254 return None 1255 1256 if self._match_set(self.STATEMENT_PARSERS): 1257 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1258 1259 if self._match_set(Tokenizer.COMMANDS): 1260 return self._parse_command() 1261 1262 expression = self._parse_expression() 1263 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1264 return self._parse_query_modifiers(expression) 1265 1266 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1267 start = self._prev 1268 temporary = self._match(TokenType.TEMPORARY) 1269 materialized = self._match_text_seq("MATERIALIZED") 1270 1271 kind = self._match_set(self.CREATABLES) and self._prev.text 1272 if not kind: 1273 return self._parse_as_command(start) 1274 1275 return self.expression( 1276 exp.Drop, 1277 comments=start.comments, 1278 exists=exists or self._parse_exists(), 1279 this=self._parse_table(schema=True), 1280 kind=kind, 1281 temporary=temporary, 1282 materialized=materialized, 1283 cascade=self._match_text_seq("CASCADE"), 1284 constraints=self._match_text_seq("CONSTRAINTS"), 1285 purge=self._match_text_seq("PURGE"), 1286 ) 1287 1288 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1289 return ( 1290 self._match_text_seq("IF") 1291 and (not not_ or self._match(TokenType.NOT)) 1292 and self._match(TokenType.EXISTS) 1293 ) 1294 1295 def _parse_create(self) -> exp.Create | exp.Command: 1296 # Note: this can't be None because we've matched a statement parser 1297 start = self._prev 1298 comments = self._prev_comments 1299 1300 replace = start.text.upper() == "REPLACE" or self._match_pair( 1301 TokenType.OR, TokenType.REPLACE 1302 ) 1303 unique = self._match(TokenType.UNIQUE) 1304 1305 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1306 self._advance() 1307 1308 properties = None 1309 create_token = self._match_set(self.CREATABLES) and self._prev 1310 1311 if not create_token: 1312 # exp.Properties.Location.POST_CREATE 1313 properties = self._parse_properties() 1314 create_token = self._match_set(self.CREATABLES) and self._prev 1315 1316 if not properties or not create_token: 1317 return self._parse_as_command(start) 1318 1319 exists = self._parse_exists(not_=True) 1320 this = None 1321 expression: t.Optional[exp.Expression] = None 1322 indexes = None 1323 no_schema_binding = None 1324 begin = None 1325 end = None 1326 clone = None 1327 1328 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1329 nonlocal properties 1330 if properties and temp_props: 1331 properties.expressions.extend(temp_props.expressions) 1332 elif temp_props: 1333 properties = temp_props 1334 1335 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1336 this = self._parse_user_defined_function(kind=create_token.token_type) 1337 1338 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1339 extend_props(self._parse_properties()) 1340 1341 self._match(TokenType.ALIAS) 1342 1343 if self._match(TokenType.COMMAND): 1344 expression = self._parse_as_command(self._prev) 1345 else: 1346 begin = self._match(TokenType.BEGIN) 1347 return_ = self._match_text_seq("RETURN") 1348 1349 if self._match(TokenType.STRING, advance=False): 1350 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1351 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1352 expression = self._parse_string() 1353 extend_props(self._parse_properties()) 1354 else: 1355 expression = self._parse_statement() 1356 1357 end = self._match_text_seq("END") 1358 1359 if return_: 1360 expression = self.expression(exp.Return, this=expression) 1361 elif create_token.token_type == TokenType.INDEX: 1362 this = self._parse_index(index=self._parse_id_var()) 1363 elif create_token.token_type in self.DB_CREATABLES: 1364 table_parts = self._parse_table_parts(schema=True) 1365 1366 # exp.Properties.Location.POST_NAME 1367 self._match(TokenType.COMMA) 1368 extend_props(self._parse_properties(before=True)) 1369 1370 this = self._parse_schema(this=table_parts) 1371 1372 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1373 extend_props(self._parse_properties()) 1374 1375 self._match(TokenType.ALIAS) 1376 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1377 # exp.Properties.Location.POST_ALIAS 1378 extend_props(self._parse_properties()) 1379 1380 expression = self._parse_ddl_select() 1381 1382 if create_token.token_type == TokenType.TABLE: 1383 # exp.Properties.Location.POST_EXPRESSION 1384 extend_props(self._parse_properties()) 1385 1386 indexes = [] 1387 while True: 1388 index = self._parse_index() 1389 1390 # exp.Properties.Location.POST_INDEX 1391 extend_props(self._parse_properties()) 1392 1393 if not index: 1394 break 1395 else: 1396 self._match(TokenType.COMMA) 1397 indexes.append(index) 1398 elif create_token.token_type == TokenType.VIEW: 1399 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1400 no_schema_binding = True 1401 1402 shallow = self._match_text_seq("SHALLOW") 1403 1404 if self._match_texts(self.CLONE_KEYWORDS): 1405 copy = self._prev.text.lower() == "copy" 1406 clone = self._parse_table(schema=True) 1407 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1408 clone_kind = ( 1409 self._match(TokenType.L_PAREN) 1410 and self._match_texts(self.CLONE_KINDS) 1411 and self._prev.text.upper() 1412 ) 1413 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1414 self._match(TokenType.R_PAREN) 1415 clone = self.expression( 1416 exp.Clone, 1417 this=clone, 1418 when=when, 1419 kind=clone_kind, 1420 shallow=shallow, 1421 expression=clone_expression, 1422 copy=copy, 1423 ) 1424 1425 return self.expression( 1426 exp.Create, 1427 comments=comments, 1428 this=this, 1429 kind=create_token.text, 1430 replace=replace, 1431 unique=unique, 1432 expression=expression, 1433 exists=exists, 1434 properties=properties, 1435 indexes=indexes, 1436 no_schema_binding=no_schema_binding, 1437 begin=begin, 1438 end=end, 1439 clone=clone, 1440 ) 1441 1442 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1443 # only used for teradata currently 1444 self._match(TokenType.COMMA) 1445 1446 kwargs = { 1447 "no": self._match_text_seq("NO"), 1448 "dual": self._match_text_seq("DUAL"), 1449 "before": self._match_text_seq("BEFORE"), 1450 "default": self._match_text_seq("DEFAULT"), 1451 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1452 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1453 "after": self._match_text_seq("AFTER"), 1454 "minimum": self._match_texts(("MIN", "MINIMUM")), 1455 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1456 } 1457 1458 if self._match_texts(self.PROPERTY_PARSERS): 1459 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1460 try: 1461 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1462 except TypeError: 1463 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1464 1465 return None 1466 1467 def _parse_property(self) -> t.Optional[exp.Expression]: 1468 if self._match_texts(self.PROPERTY_PARSERS): 1469 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1470 1471 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1472 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1473 1474 if self._match_text_seq("COMPOUND", "SORTKEY"): 1475 return self._parse_sortkey(compound=True) 1476 1477 if self._match_text_seq("SQL", "SECURITY"): 1478 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1479 1480 index = self._index 1481 key = self._parse_column() 1482 1483 if not self._match(TokenType.EQ): 1484 self._retreat(index) 1485 return None 1486 1487 return self.expression( 1488 exp.Property, 1489 this=key.to_dot() if isinstance(key, exp.Column) else key, 1490 value=self._parse_column() or self._parse_var(any_token=True), 1491 ) 1492 1493 def _parse_stored(self) -> exp.FileFormatProperty: 1494 self._match(TokenType.ALIAS) 1495 1496 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1497 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1498 1499 return self.expression( 1500 exp.FileFormatProperty, 1501 this=self.expression( 1502 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1503 ) 1504 if input_format or output_format 1505 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1506 ) 1507 1508 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1509 self._match(TokenType.EQ) 1510 self._match(TokenType.ALIAS) 1511 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1512 1513 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1514 properties = [] 1515 while True: 1516 if before: 1517 prop = self._parse_property_before() 1518 else: 1519 prop = self._parse_property() 1520 1521 if not prop: 1522 break 1523 for p in ensure_list(prop): 1524 properties.append(p) 1525 1526 if properties: 1527 return self.expression(exp.Properties, expressions=properties) 1528 1529 return None 1530 1531 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1532 return self.expression( 1533 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1534 ) 1535 1536 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1537 if self._index >= 2: 1538 pre_volatile_token = self._tokens[self._index - 2] 1539 else: 1540 pre_volatile_token = None 1541 1542 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1543 return exp.VolatileProperty() 1544 1545 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1546 1547 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1548 self._match_pair(TokenType.EQ, TokenType.ON) 1549 1550 prop = self.expression(exp.WithSystemVersioningProperty) 1551 if self._match(TokenType.L_PAREN): 1552 self._match_text_seq("HISTORY_TABLE", "=") 1553 prop.set("this", self._parse_table_parts()) 1554 1555 if self._match(TokenType.COMMA): 1556 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1557 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1558 1559 self._match_r_paren() 1560 1561 return prop 1562 1563 def _parse_with_property( 1564 self, 1565 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1566 if self._match(TokenType.L_PAREN, advance=False): 1567 return self._parse_wrapped_csv(self._parse_property) 1568 1569 if self._match_text_seq("JOURNAL"): 1570 return self._parse_withjournaltable() 1571 1572 if self._match_text_seq("DATA"): 1573 return self._parse_withdata(no=False) 1574 elif self._match_text_seq("NO", "DATA"): 1575 return self._parse_withdata(no=True) 1576 1577 if not self._next: 1578 return None 1579 1580 return self._parse_withisolatedloading() 1581 1582 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1583 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1584 self._match(TokenType.EQ) 1585 1586 user = self._parse_id_var() 1587 self._match(TokenType.PARAMETER) 1588 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1589 1590 if not user or not host: 1591 return None 1592 1593 return exp.DefinerProperty(this=f"{user}@{host}") 1594 1595 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1596 self._match(TokenType.TABLE) 1597 self._match(TokenType.EQ) 1598 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1599 1600 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1601 return self.expression(exp.LogProperty, no=no) 1602 1603 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1604 return self.expression(exp.JournalProperty, **kwargs) 1605 1606 def _parse_checksum(self) -> exp.ChecksumProperty: 1607 self._match(TokenType.EQ) 1608 1609 on = None 1610 if self._match(TokenType.ON): 1611 on = True 1612 elif self._match_text_seq("OFF"): 1613 on = False 1614 1615 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1616 1617 def _parse_cluster(self) -> exp.Cluster: 1618 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1619 1620 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1621 self._match_text_seq("BY") 1622 1623 self._match_l_paren() 1624 expressions = self._parse_csv(self._parse_column) 1625 self._match_r_paren() 1626 1627 if self._match_text_seq("SORTED", "BY"): 1628 self._match_l_paren() 1629 sorted_by = self._parse_csv(self._parse_ordered) 1630 self._match_r_paren() 1631 else: 1632 sorted_by = None 1633 1634 self._match(TokenType.INTO) 1635 buckets = self._parse_number() 1636 self._match_text_seq("BUCKETS") 1637 1638 return self.expression( 1639 exp.ClusteredByProperty, 1640 expressions=expressions, 1641 sorted_by=sorted_by, 1642 buckets=buckets, 1643 ) 1644 1645 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1646 if not self._match_text_seq("GRANTS"): 1647 self._retreat(self._index - 1) 1648 return None 1649 1650 return self.expression(exp.CopyGrantsProperty) 1651 1652 def _parse_freespace(self) -> exp.FreespaceProperty: 1653 self._match(TokenType.EQ) 1654 return self.expression( 1655 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1656 ) 1657 1658 def _parse_mergeblockratio( 1659 self, no: bool = False, default: bool = False 1660 ) -> exp.MergeBlockRatioProperty: 1661 if self._match(TokenType.EQ): 1662 return self.expression( 1663 exp.MergeBlockRatioProperty, 1664 this=self._parse_number(), 1665 percent=self._match(TokenType.PERCENT), 1666 ) 1667 1668 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1669 1670 def _parse_datablocksize( 1671 self, 1672 default: t.Optional[bool] = None, 1673 minimum: t.Optional[bool] = None, 1674 maximum: t.Optional[bool] = None, 1675 ) -> exp.DataBlocksizeProperty: 1676 self._match(TokenType.EQ) 1677 size = self._parse_number() 1678 1679 units = None 1680 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1681 units = self._prev.text 1682 1683 return self.expression( 1684 exp.DataBlocksizeProperty, 1685 size=size, 1686 units=units, 1687 default=default, 1688 minimum=minimum, 1689 maximum=maximum, 1690 ) 1691 1692 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1693 self._match(TokenType.EQ) 1694 always = self._match_text_seq("ALWAYS") 1695 manual = self._match_text_seq("MANUAL") 1696 never = self._match_text_seq("NEVER") 1697 default = self._match_text_seq("DEFAULT") 1698 1699 autotemp = None 1700 if self._match_text_seq("AUTOTEMP"): 1701 autotemp = self._parse_schema() 1702 1703 return self.expression( 1704 exp.BlockCompressionProperty, 1705 always=always, 1706 manual=manual, 1707 never=never, 1708 default=default, 1709 autotemp=autotemp, 1710 ) 1711 1712 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1713 no = self._match_text_seq("NO") 1714 concurrent = self._match_text_seq("CONCURRENT") 1715 self._match_text_seq("ISOLATED", "LOADING") 1716 for_all = self._match_text_seq("FOR", "ALL") 1717 for_insert = self._match_text_seq("FOR", "INSERT") 1718 for_none = self._match_text_seq("FOR", "NONE") 1719 return self.expression( 1720 exp.IsolatedLoadingProperty, 1721 no=no, 1722 concurrent=concurrent, 1723 for_all=for_all, 1724 for_insert=for_insert, 1725 for_none=for_none, 1726 ) 1727 1728 def _parse_locking(self) -> exp.LockingProperty: 1729 if self._match(TokenType.TABLE): 1730 kind = "TABLE" 1731 elif self._match(TokenType.VIEW): 1732 kind = "VIEW" 1733 elif self._match(TokenType.ROW): 1734 kind = "ROW" 1735 elif self._match_text_seq("DATABASE"): 1736 kind = "DATABASE" 1737 else: 1738 kind = None 1739 1740 if kind in ("DATABASE", "TABLE", "VIEW"): 1741 this = self._parse_table_parts() 1742 else: 1743 this = None 1744 1745 if self._match(TokenType.FOR): 1746 for_or_in = "FOR" 1747 elif self._match(TokenType.IN): 1748 for_or_in = "IN" 1749 else: 1750 for_or_in = None 1751 1752 if self._match_text_seq("ACCESS"): 1753 lock_type = "ACCESS" 1754 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1755 lock_type = "EXCLUSIVE" 1756 elif self._match_text_seq("SHARE"): 1757 lock_type = "SHARE" 1758 elif self._match_text_seq("READ"): 1759 lock_type = "READ" 1760 elif self._match_text_seq("WRITE"): 1761 lock_type = "WRITE" 1762 elif self._match_text_seq("CHECKSUM"): 1763 lock_type = "CHECKSUM" 1764 else: 1765 lock_type = None 1766 1767 override = self._match_text_seq("OVERRIDE") 1768 1769 return self.expression( 1770 exp.LockingProperty, 1771 this=this, 1772 kind=kind, 1773 for_or_in=for_or_in, 1774 lock_type=lock_type, 1775 override=override, 1776 ) 1777 1778 def _parse_partition_by(self) -> t.List[exp.Expression]: 1779 if self._match(TokenType.PARTITION_BY): 1780 return self._parse_csv(self._parse_conjunction) 1781 return [] 1782 1783 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1784 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1785 if self._match_text_seq("MINVALUE"): 1786 return exp.var("MINVALUE") 1787 if self._match_text_seq("MAXVALUE"): 1788 return exp.var("MAXVALUE") 1789 return self._parse_bitwise() 1790 1791 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1792 expression = None 1793 from_expressions = None 1794 to_expressions = None 1795 1796 if self._match(TokenType.IN): 1797 this = self._parse_wrapped_csv(self._parse_bitwise) 1798 elif self._match(TokenType.FROM): 1799 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1800 self._match_text_seq("TO") 1801 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1802 elif self._match_text_seq("WITH", "(", "MODULUS"): 1803 this = self._parse_number() 1804 self._match_text_seq(",", "REMAINDER") 1805 expression = self._parse_number() 1806 self._match_r_paren() 1807 else: 1808 self.raise_error("Failed to parse partition bound spec.") 1809 1810 return self.expression( 1811 exp.PartitionBoundSpec, 1812 this=this, 1813 expression=expression, 1814 from_expressions=from_expressions, 1815 to_expressions=to_expressions, 1816 ) 1817 1818 # https://www.postgresql.org/docs/current/sql-createtable.html 1819 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1820 if not self._match_text_seq("OF"): 1821 self._retreat(self._index - 1) 1822 return None 1823 1824 this = self._parse_table(schema=True) 1825 1826 if self._match(TokenType.DEFAULT): 1827 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1828 elif self._match_text_seq("FOR", "VALUES"): 1829 expression = self._parse_partition_bound_spec() 1830 else: 1831 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1832 1833 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1834 1835 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1836 self._match(TokenType.EQ) 1837 return self.expression( 1838 exp.PartitionedByProperty, 1839 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1840 ) 1841 1842 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1843 if self._match_text_seq("AND", "STATISTICS"): 1844 statistics = True 1845 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1846 statistics = False 1847 else: 1848 statistics = None 1849 1850 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1851 1852 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1853 if self._match_text_seq("PRIMARY", "INDEX"): 1854 return exp.NoPrimaryIndexProperty() 1855 return None 1856 1857 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1858 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1859 return exp.OnCommitProperty() 1860 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1861 return exp.OnCommitProperty(delete=True) 1862 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1863 1864 def _parse_distkey(self) -> exp.DistKeyProperty: 1865 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1866 1867 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1868 table = self._parse_table(schema=True) 1869 1870 options = [] 1871 while self._match_texts(("INCLUDING", "EXCLUDING")): 1872 this = self._prev.text.upper() 1873 1874 id_var = self._parse_id_var() 1875 if not id_var: 1876 return None 1877 1878 options.append( 1879 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1880 ) 1881 1882 return self.expression(exp.LikeProperty, this=table, expressions=options) 1883 1884 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1885 return self.expression( 1886 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1887 ) 1888 1889 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1890 self._match(TokenType.EQ) 1891 return self.expression( 1892 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1893 ) 1894 1895 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1896 self._match_text_seq("WITH", "CONNECTION") 1897 return self.expression( 1898 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1899 ) 1900 1901 def _parse_returns(self) -> exp.ReturnsProperty: 1902 value: t.Optional[exp.Expression] 1903 is_table = self._match(TokenType.TABLE) 1904 1905 if is_table: 1906 if self._match(TokenType.LT): 1907 value = self.expression( 1908 exp.Schema, 1909 this="TABLE", 1910 expressions=self._parse_csv(self._parse_struct_types), 1911 ) 1912 if not self._match(TokenType.GT): 1913 self.raise_error("Expecting >") 1914 else: 1915 value = self._parse_schema(exp.var("TABLE")) 1916 else: 1917 value = self._parse_types() 1918 1919 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1920 1921 def _parse_describe(self) -> exp.Describe: 1922 kind = self._match_set(self.CREATABLES) and self._prev.text 1923 this = self._parse_table(schema=True) 1924 properties = self._parse_properties() 1925 expressions = properties.expressions if properties else None 1926 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1927 1928 def _parse_insert(self) -> exp.Insert: 1929 comments = ensure_list(self._prev_comments) 1930 overwrite = self._match(TokenType.OVERWRITE) 1931 ignore = self._match(TokenType.IGNORE) 1932 local = self._match_text_seq("LOCAL") 1933 alternative = None 1934 1935 if self._match_text_seq("DIRECTORY"): 1936 this: t.Optional[exp.Expression] = self.expression( 1937 exp.Directory, 1938 this=self._parse_var_or_string(), 1939 local=local, 1940 row_format=self._parse_row_format(match_row=True), 1941 ) 1942 else: 1943 if self._match(TokenType.OR): 1944 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1945 1946 self._match(TokenType.INTO) 1947 comments += ensure_list(self._prev_comments) 1948 self._match(TokenType.TABLE) 1949 this = self._parse_table(schema=True) 1950 1951 returning = self._parse_returning() 1952 1953 return self.expression( 1954 exp.Insert, 1955 comments=comments, 1956 this=this, 1957 by_name=self._match_text_seq("BY", "NAME"), 1958 exists=self._parse_exists(), 1959 partition=self._parse_partition(), 1960 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1961 and self._parse_conjunction(), 1962 expression=self._parse_ddl_select(), 1963 conflict=self._parse_on_conflict(), 1964 returning=returning or self._parse_returning(), 1965 overwrite=overwrite, 1966 alternative=alternative, 1967 ignore=ignore, 1968 ) 1969 1970 def _parse_kill(self) -> exp.Kill: 1971 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1972 1973 return self.expression( 1974 exp.Kill, 1975 this=self._parse_primary(), 1976 kind=kind, 1977 ) 1978 1979 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1980 conflict = self._match_text_seq("ON", "CONFLICT") 1981 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1982 1983 if not conflict and not duplicate: 1984 return None 1985 1986 nothing = None 1987 expressions = None 1988 key = None 1989 constraint = None 1990 1991 if conflict: 1992 if self._match_text_seq("ON", "CONSTRAINT"): 1993 constraint = self._parse_id_var() 1994 else: 1995 key = self._parse_csv(self._parse_value) 1996 1997 self._match_text_seq("DO") 1998 if self._match_text_seq("NOTHING"): 1999 nothing = True 2000 else: 2001 self._match(TokenType.UPDATE) 2002 self._match(TokenType.SET) 2003 expressions = self._parse_csv(self._parse_equality) 2004 2005 return self.expression( 2006 exp.OnConflict, 2007 duplicate=duplicate, 2008 expressions=expressions, 2009 nothing=nothing, 2010 key=key, 2011 constraint=constraint, 2012 ) 2013 2014 def _parse_returning(self) -> t.Optional[exp.Returning]: 2015 if not self._match(TokenType.RETURNING): 2016 return None 2017 return self.expression( 2018 exp.Returning, 2019 expressions=self._parse_csv(self._parse_expression), 2020 into=self._match(TokenType.INTO) and self._parse_table_part(), 2021 ) 2022 2023 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2024 if not self._match(TokenType.FORMAT): 2025 return None 2026 return self._parse_row_format() 2027 2028 def _parse_row_format( 2029 self, match_row: bool = False 2030 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2031 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2032 return None 2033 2034 if self._match_text_seq("SERDE"): 2035 this = self._parse_string() 2036 2037 serde_properties = None 2038 if self._match(TokenType.SERDE_PROPERTIES): 2039 serde_properties = self.expression( 2040 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2041 ) 2042 2043 return self.expression( 2044 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2045 ) 2046 2047 self._match_text_seq("DELIMITED") 2048 2049 kwargs = {} 2050 2051 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2052 kwargs["fields"] = self._parse_string() 2053 if self._match_text_seq("ESCAPED", "BY"): 2054 kwargs["escaped"] = self._parse_string() 2055 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2056 kwargs["collection_items"] = self._parse_string() 2057 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2058 kwargs["map_keys"] = self._parse_string() 2059 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2060 kwargs["lines"] = self._parse_string() 2061 if self._match_text_seq("NULL", "DEFINED", "AS"): 2062 kwargs["null"] = self._parse_string() 2063 2064 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2065 2066 def _parse_load(self) -> exp.LoadData | exp.Command: 2067 if self._match_text_seq("DATA"): 2068 local = self._match_text_seq("LOCAL") 2069 self._match_text_seq("INPATH") 2070 inpath = self._parse_string() 2071 overwrite = self._match(TokenType.OVERWRITE) 2072 self._match_pair(TokenType.INTO, TokenType.TABLE) 2073 2074 return self.expression( 2075 exp.LoadData, 2076 this=self._parse_table(schema=True), 2077 local=local, 2078 overwrite=overwrite, 2079 inpath=inpath, 2080 partition=self._parse_partition(), 2081 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2082 serde=self._match_text_seq("SERDE") and self._parse_string(), 2083 ) 2084 return self._parse_as_command(self._prev) 2085 2086 def _parse_delete(self) -> exp.Delete: 2087 # This handles MySQL's "Multiple-Table Syntax" 2088 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2089 tables = None 2090 comments = self._prev_comments 2091 if not self._match(TokenType.FROM, advance=False): 2092 tables = self._parse_csv(self._parse_table) or None 2093 2094 returning = self._parse_returning() 2095 2096 return self.expression( 2097 exp.Delete, 2098 comments=comments, 2099 tables=tables, 2100 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2101 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2102 where=self._parse_where(), 2103 returning=returning or self._parse_returning(), 2104 limit=self._parse_limit(), 2105 ) 2106 2107 def _parse_update(self) -> exp.Update: 2108 comments = self._prev_comments 2109 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2110 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2111 returning = self._parse_returning() 2112 return self.expression( 2113 exp.Update, 2114 comments=comments, 2115 **{ # type: ignore 2116 "this": this, 2117 "expressions": expressions, 2118 "from": self._parse_from(joins=True), 2119 "where": self._parse_where(), 2120 "returning": returning or self._parse_returning(), 2121 "order": self._parse_order(), 2122 "limit": self._parse_limit(), 2123 }, 2124 ) 2125 2126 def _parse_uncache(self) -> exp.Uncache: 2127 if not self._match(TokenType.TABLE): 2128 self.raise_error("Expecting TABLE after UNCACHE") 2129 2130 return self.expression( 2131 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2132 ) 2133 2134 def _parse_cache(self) -> exp.Cache: 2135 lazy = self._match_text_seq("LAZY") 2136 self._match(TokenType.TABLE) 2137 table = self._parse_table(schema=True) 2138 2139 options = [] 2140 if self._match_text_seq("OPTIONS"): 2141 self._match_l_paren() 2142 k = self._parse_string() 2143 self._match(TokenType.EQ) 2144 v = self._parse_string() 2145 options = [k, v] 2146 self._match_r_paren() 2147 2148 self._match(TokenType.ALIAS) 2149 return self.expression( 2150 exp.Cache, 2151 this=table, 2152 lazy=lazy, 2153 options=options, 2154 expression=self._parse_select(nested=True), 2155 ) 2156 2157 def _parse_partition(self) -> t.Optional[exp.Partition]: 2158 if not self._match(TokenType.PARTITION): 2159 return None 2160 2161 return self.expression( 2162 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2163 ) 2164 2165 def _parse_value(self) -> exp.Tuple: 2166 if self._match(TokenType.L_PAREN): 2167 expressions = self._parse_csv(self._parse_conjunction) 2168 self._match_r_paren() 2169 return self.expression(exp.Tuple, expressions=expressions) 2170 2171 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2172 # https://prestodb.io/docs/current/sql/values.html 2173 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2174 2175 def _parse_projections(self) -> t.List[exp.Expression]: 2176 return self._parse_expressions() 2177 2178 def _parse_select( 2179 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2180 ) -> t.Optional[exp.Expression]: 2181 cte = self._parse_with() 2182 2183 if cte: 2184 this = self._parse_statement() 2185 2186 if not this: 2187 self.raise_error("Failed to parse any statement following CTE") 2188 return cte 2189 2190 if "with" in this.arg_types: 2191 this.set("with", cte) 2192 else: 2193 self.raise_error(f"{this.key} does not support CTE") 2194 this = cte 2195 2196 return this 2197 2198 # duckdb supports leading with FROM x 2199 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2200 2201 if self._match(TokenType.SELECT): 2202 comments = self._prev_comments 2203 2204 hint = self._parse_hint() 2205 all_ = self._match(TokenType.ALL) 2206 distinct = self._match_set(self.DISTINCT_TOKENS) 2207 2208 kind = ( 2209 self._match(TokenType.ALIAS) 2210 and self._match_texts(("STRUCT", "VALUE")) 2211 and self._prev.text 2212 ) 2213 2214 if distinct: 2215 distinct = self.expression( 2216 exp.Distinct, 2217 on=self._parse_value() if self._match(TokenType.ON) else None, 2218 ) 2219 2220 if all_ and distinct: 2221 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2222 2223 limit = self._parse_limit(top=True) 2224 projections = self._parse_projections() 2225 2226 this = self.expression( 2227 exp.Select, 2228 kind=kind, 2229 hint=hint, 2230 distinct=distinct, 2231 expressions=projections, 2232 limit=limit, 2233 ) 2234 this.comments = comments 2235 2236 into = self._parse_into() 2237 if into: 2238 this.set("into", into) 2239 2240 if not from_: 2241 from_ = self._parse_from() 2242 2243 if from_: 2244 this.set("from", from_) 2245 2246 this = self._parse_query_modifiers(this) 2247 elif (table or nested) and self._match(TokenType.L_PAREN): 2248 if self._match(TokenType.PIVOT): 2249 this = self._parse_simplified_pivot() 2250 elif self._match(TokenType.FROM): 2251 this = exp.select("*").from_( 2252 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2253 ) 2254 else: 2255 this = self._parse_table() if table else self._parse_select(nested=True) 2256 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2257 2258 self._match_r_paren() 2259 2260 # We return early here so that the UNION isn't attached to the subquery by the 2261 # following call to _parse_set_operations, but instead becomes the parent node 2262 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2263 elif self._match(TokenType.VALUES): 2264 this = self.expression( 2265 exp.Values, 2266 expressions=self._parse_csv(self._parse_value), 2267 alias=self._parse_table_alias(), 2268 ) 2269 elif from_: 2270 this = exp.select("*").from_(from_.this, copy=False) 2271 else: 2272 this = None 2273 2274 return self._parse_set_operations(this) 2275 2276 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2277 if not skip_with_token and not self._match(TokenType.WITH): 2278 return None 2279 2280 comments = self._prev_comments 2281 recursive = self._match(TokenType.RECURSIVE) 2282 2283 expressions = [] 2284 while True: 2285 expressions.append(self._parse_cte()) 2286 2287 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2288 break 2289 else: 2290 self._match(TokenType.WITH) 2291 2292 return self.expression( 2293 exp.With, comments=comments, expressions=expressions, recursive=recursive 2294 ) 2295 2296 def _parse_cte(self) -> exp.CTE: 2297 alias = self._parse_table_alias() 2298 if not alias or not alias.this: 2299 self.raise_error("Expected CTE to have alias") 2300 2301 self._match(TokenType.ALIAS) 2302 return self.expression( 2303 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2304 ) 2305 2306 def _parse_table_alias( 2307 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2308 ) -> t.Optional[exp.TableAlias]: 2309 any_token = self._match(TokenType.ALIAS) 2310 alias = ( 2311 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2312 or self._parse_string_as_identifier() 2313 ) 2314 2315 index = self._index 2316 if self._match(TokenType.L_PAREN): 2317 columns = self._parse_csv(self._parse_function_parameter) 2318 self._match_r_paren() if columns else self._retreat(index) 2319 else: 2320 columns = None 2321 2322 if not alias and not columns: 2323 return None 2324 2325 return self.expression(exp.TableAlias, this=alias, columns=columns) 2326 2327 def _parse_subquery( 2328 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2329 ) -> t.Optional[exp.Subquery]: 2330 if not this: 2331 return None 2332 2333 return self.expression( 2334 exp.Subquery, 2335 this=this, 2336 pivots=self._parse_pivots(), 2337 alias=self._parse_table_alias() if parse_alias else None, 2338 ) 2339 2340 def _parse_query_modifiers( 2341 self, this: t.Optional[exp.Expression] 2342 ) -> t.Optional[exp.Expression]: 2343 if isinstance(this, self.MODIFIABLES): 2344 for join in iter(self._parse_join, None): 2345 this.append("joins", join) 2346 for lateral in iter(self._parse_lateral, None): 2347 this.append("laterals", lateral) 2348 2349 while True: 2350 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2351 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2352 key, expression = parser(self) 2353 2354 if expression: 2355 this.set(key, expression) 2356 if key == "limit": 2357 offset = expression.args.pop("offset", None) 2358 if offset: 2359 this.set("offset", exp.Offset(expression=offset)) 2360 continue 2361 break 2362 return this 2363 2364 def _parse_hint(self) -> t.Optional[exp.Hint]: 2365 if self._match(TokenType.HINT): 2366 hints = [] 2367 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2368 hints.extend(hint) 2369 2370 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2371 self.raise_error("Expected */ after HINT") 2372 2373 return self.expression(exp.Hint, expressions=hints) 2374 2375 return None 2376 2377 def _parse_into(self) -> t.Optional[exp.Into]: 2378 if not self._match(TokenType.INTO): 2379 return None 2380 2381 temp = self._match(TokenType.TEMPORARY) 2382 unlogged = self._match_text_seq("UNLOGGED") 2383 self._match(TokenType.TABLE) 2384 2385 return self.expression( 2386 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2387 ) 2388 2389 def _parse_from( 2390 self, joins: bool = False, skip_from_token: bool = False 2391 ) -> t.Optional[exp.From]: 2392 if not skip_from_token and not self._match(TokenType.FROM): 2393 return None 2394 2395 return self.expression( 2396 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2397 ) 2398 2399 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2400 if not self._match(TokenType.MATCH_RECOGNIZE): 2401 return None 2402 2403 self._match_l_paren() 2404 2405 partition = self._parse_partition_by() 2406 order = self._parse_order() 2407 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2408 2409 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2410 rows = exp.var("ONE ROW PER MATCH") 2411 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2412 text = "ALL ROWS PER MATCH" 2413 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2414 text += f" SHOW EMPTY MATCHES" 2415 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2416 text += f" OMIT EMPTY MATCHES" 2417 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2418 text += f" WITH UNMATCHED ROWS" 2419 rows = exp.var(text) 2420 else: 2421 rows = None 2422 2423 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2424 text = "AFTER MATCH SKIP" 2425 if self._match_text_seq("PAST", "LAST", "ROW"): 2426 text += f" PAST LAST ROW" 2427 elif self._match_text_seq("TO", "NEXT", "ROW"): 2428 text += f" TO NEXT ROW" 2429 elif self._match_text_seq("TO", "FIRST"): 2430 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2431 elif self._match_text_seq("TO", "LAST"): 2432 text += f" TO LAST {self._advance_any().text}" # type: ignore 2433 after = exp.var(text) 2434 else: 2435 after = None 2436 2437 if self._match_text_seq("PATTERN"): 2438 self._match_l_paren() 2439 2440 if not self._curr: 2441 self.raise_error("Expecting )", self._curr) 2442 2443 paren = 1 2444 start = self._curr 2445 2446 while self._curr and paren > 0: 2447 if self._curr.token_type == TokenType.L_PAREN: 2448 paren += 1 2449 if self._curr.token_type == TokenType.R_PAREN: 2450 paren -= 1 2451 2452 end = self._prev 2453 self._advance() 2454 2455 if paren > 0: 2456 self.raise_error("Expecting )", self._curr) 2457 2458 pattern = exp.var(self._find_sql(start, end)) 2459 else: 2460 pattern = None 2461 2462 define = ( 2463 self._parse_csv( 2464 lambda: self.expression( 2465 exp.Alias, 2466 alias=self._parse_id_var(any_token=True), 2467 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2468 ) 2469 ) 2470 if self._match_text_seq("DEFINE") 2471 else None 2472 ) 2473 2474 self._match_r_paren() 2475 2476 return self.expression( 2477 exp.MatchRecognize, 2478 partition_by=partition, 2479 order=order, 2480 measures=measures, 2481 rows=rows, 2482 after=after, 2483 pattern=pattern, 2484 define=define, 2485 alias=self._parse_table_alias(), 2486 ) 2487 2488 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2489 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2490 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2491 2492 if outer_apply or cross_apply: 2493 this = self._parse_select(table=True) 2494 view = None 2495 outer = not cross_apply 2496 elif self._match(TokenType.LATERAL): 2497 this = self._parse_select(table=True) 2498 view = self._match(TokenType.VIEW) 2499 outer = self._match(TokenType.OUTER) 2500 else: 2501 return None 2502 2503 if not this: 2504 this = ( 2505 self._parse_unnest() 2506 or self._parse_function() 2507 or self._parse_id_var(any_token=False) 2508 ) 2509 2510 while self._match(TokenType.DOT): 2511 this = exp.Dot( 2512 this=this, 2513 expression=self._parse_function() or self._parse_id_var(any_token=False), 2514 ) 2515 2516 if view: 2517 table = self._parse_id_var(any_token=False) 2518 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2519 table_alias: t.Optional[exp.TableAlias] = self.expression( 2520 exp.TableAlias, this=table, columns=columns 2521 ) 2522 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2523 # We move the alias from the lateral's child node to the lateral itself 2524 table_alias = this.args["alias"].pop() 2525 else: 2526 table_alias = self._parse_table_alias() 2527 2528 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2529 2530 def _parse_join_parts( 2531 self, 2532 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2533 return ( 2534 self._match_set(self.JOIN_METHODS) and self._prev, 2535 self._match_set(self.JOIN_SIDES) and self._prev, 2536 self._match_set(self.JOIN_KINDS) and self._prev, 2537 ) 2538 2539 def _parse_join( 2540 self, skip_join_token: bool = False, parse_bracket: bool = False 2541 ) -> t.Optional[exp.Join]: 2542 if self._match(TokenType.COMMA): 2543 return self.expression(exp.Join, this=self._parse_table()) 2544 2545 index = self._index 2546 method, side, kind = self._parse_join_parts() 2547 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2548 join = self._match(TokenType.JOIN) 2549 2550 if not skip_join_token and not join: 2551 self._retreat(index) 2552 kind = None 2553 method = None 2554 side = None 2555 2556 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2557 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2558 2559 if not skip_join_token and not join and not outer_apply and not cross_apply: 2560 return None 2561 2562 if outer_apply: 2563 side = Token(TokenType.LEFT, "LEFT") 2564 2565 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2566 2567 if method: 2568 kwargs["method"] = method.text 2569 if side: 2570 kwargs["side"] = side.text 2571 if kind: 2572 kwargs["kind"] = kind.text 2573 if hint: 2574 kwargs["hint"] = hint 2575 2576 if self._match(TokenType.ON): 2577 kwargs["on"] = self._parse_conjunction() 2578 elif self._match(TokenType.USING): 2579 kwargs["using"] = self._parse_wrapped_id_vars() 2580 elif not (kind and kind.token_type == TokenType.CROSS): 2581 index = self._index 2582 join = self._parse_join() 2583 2584 if join and self._match(TokenType.ON): 2585 kwargs["on"] = self._parse_conjunction() 2586 elif join and self._match(TokenType.USING): 2587 kwargs["using"] = self._parse_wrapped_id_vars() 2588 else: 2589 join = None 2590 self._retreat(index) 2591 2592 kwargs["this"].set("joins", [join] if join else None) 2593 2594 comments = [c for token in (method, side, kind) if token for c in token.comments] 2595 return self.expression(exp.Join, comments=comments, **kwargs) 2596 2597 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2598 this = self._parse_conjunction() 2599 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2600 return this 2601 2602 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2603 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2604 2605 return this 2606 2607 def _parse_index( 2608 self, 2609 index: t.Optional[exp.Expression] = None, 2610 ) -> t.Optional[exp.Index]: 2611 if index: 2612 unique = None 2613 primary = None 2614 amp = None 2615 2616 self._match(TokenType.ON) 2617 self._match(TokenType.TABLE) # hive 2618 table = self._parse_table_parts(schema=True) 2619 else: 2620 unique = self._match(TokenType.UNIQUE) 2621 primary = self._match_text_seq("PRIMARY") 2622 amp = self._match_text_seq("AMP") 2623 2624 if not self._match(TokenType.INDEX): 2625 return None 2626 2627 index = self._parse_id_var() 2628 table = None 2629 2630 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2631 2632 if self._match(TokenType.L_PAREN, advance=False): 2633 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2634 else: 2635 columns = None 2636 2637 return self.expression( 2638 exp.Index, 2639 this=index, 2640 table=table, 2641 using=using, 2642 columns=columns, 2643 unique=unique, 2644 primary=primary, 2645 amp=amp, 2646 partition_by=self._parse_partition_by(), 2647 where=self._parse_where(), 2648 ) 2649 2650 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2651 hints: t.List[exp.Expression] = [] 2652 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2653 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2654 hints.append( 2655 self.expression( 2656 exp.WithTableHint, 2657 expressions=self._parse_csv( 2658 lambda: self._parse_function() or self._parse_var(any_token=True) 2659 ), 2660 ) 2661 ) 2662 self._match_r_paren() 2663 else: 2664 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2665 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2666 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2667 2668 self._match_texts(("INDEX", "KEY")) 2669 if self._match(TokenType.FOR): 2670 hint.set("target", self._advance_any() and self._prev.text.upper()) 2671 2672 hint.set("expressions", self._parse_wrapped_id_vars()) 2673 hints.append(hint) 2674 2675 return hints or None 2676 2677 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2678 return ( 2679 (not schema and self._parse_function(optional_parens=False)) 2680 or self._parse_id_var(any_token=False) 2681 or self._parse_string_as_identifier() 2682 or self._parse_placeholder() 2683 ) 2684 2685 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2686 catalog = None 2687 db = None 2688 table = self._parse_table_part(schema=schema) 2689 2690 while self._match(TokenType.DOT): 2691 if catalog: 2692 # This allows nesting the table in arbitrarily many dot expressions if needed 2693 table = self.expression( 2694 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2695 ) 2696 else: 2697 catalog = db 2698 db = table 2699 table = self._parse_table_part(schema=schema) 2700 2701 if not table: 2702 self.raise_error(f"Expected table name but got {self._curr}") 2703 2704 return self.expression( 2705 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2706 ) 2707 2708 def _parse_table( 2709 self, 2710 schema: bool = False, 2711 joins: bool = False, 2712 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2713 parse_bracket: bool = False, 2714 ) -> t.Optional[exp.Expression]: 2715 lateral = self._parse_lateral() 2716 if lateral: 2717 return lateral 2718 2719 unnest = self._parse_unnest() 2720 if unnest: 2721 return unnest 2722 2723 values = self._parse_derived_table_values() 2724 if values: 2725 return values 2726 2727 subquery = self._parse_select(table=True) 2728 if subquery: 2729 if not subquery.args.get("pivots"): 2730 subquery.set("pivots", self._parse_pivots()) 2731 return subquery 2732 2733 bracket = parse_bracket and self._parse_bracket(None) 2734 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2735 this = t.cast( 2736 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2737 ) 2738 2739 if schema: 2740 return self._parse_schema(this=this) 2741 2742 version = self._parse_version() 2743 2744 if version: 2745 this.set("version", version) 2746 2747 if self.ALIAS_POST_TABLESAMPLE: 2748 table_sample = self._parse_table_sample() 2749 2750 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2751 if alias: 2752 this.set("alias", alias) 2753 2754 if self._match_text_seq("AT"): 2755 this.set("index", self._parse_id_var()) 2756 2757 this.set("hints", self._parse_table_hints()) 2758 2759 if not this.args.get("pivots"): 2760 this.set("pivots", self._parse_pivots()) 2761 2762 if not self.ALIAS_POST_TABLESAMPLE: 2763 table_sample = self._parse_table_sample() 2764 2765 if table_sample: 2766 table_sample.set("this", this) 2767 this = table_sample 2768 2769 if joins: 2770 for join in iter(self._parse_join, None): 2771 this.append("joins", join) 2772 2773 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2774 this.set("ordinality", True) 2775 this.set("alias", self._parse_table_alias()) 2776 2777 return this 2778 2779 def _parse_version(self) -> t.Optional[exp.Version]: 2780 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2781 this = "TIMESTAMP" 2782 elif self._match(TokenType.VERSION_SNAPSHOT): 2783 this = "VERSION" 2784 else: 2785 return None 2786 2787 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2788 kind = self._prev.text.upper() 2789 start = self._parse_bitwise() 2790 self._match_texts(("TO", "AND")) 2791 end = self._parse_bitwise() 2792 expression: t.Optional[exp.Expression] = self.expression( 2793 exp.Tuple, expressions=[start, end] 2794 ) 2795 elif self._match_text_seq("CONTAINED", "IN"): 2796 kind = "CONTAINED IN" 2797 expression = self.expression( 2798 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2799 ) 2800 elif self._match(TokenType.ALL): 2801 kind = "ALL" 2802 expression = None 2803 else: 2804 self._match_text_seq("AS", "OF") 2805 kind = "AS OF" 2806 expression = self._parse_type() 2807 2808 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2809 2810 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2811 if not self._match(TokenType.UNNEST): 2812 return None 2813 2814 expressions = self._parse_wrapped_csv(self._parse_equality) 2815 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2816 2817 alias = self._parse_table_alias() if with_alias else None 2818 2819 if alias: 2820 if self.UNNEST_COLUMN_ONLY: 2821 if alias.args.get("columns"): 2822 self.raise_error("Unexpected extra column alias in unnest.") 2823 2824 alias.set("columns", [alias.this]) 2825 alias.set("this", None) 2826 2827 columns = alias.args.get("columns") or [] 2828 if offset and len(expressions) < len(columns): 2829 offset = columns.pop() 2830 2831 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2832 self._match(TokenType.ALIAS) 2833 offset = self._parse_id_var( 2834 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2835 ) or exp.to_identifier("offset") 2836 2837 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2838 2839 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2840 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2841 if not is_derived and not self._match(TokenType.VALUES): 2842 return None 2843 2844 expressions = self._parse_csv(self._parse_value) 2845 alias = self._parse_table_alias() 2846 2847 if is_derived: 2848 self._match_r_paren() 2849 2850 return self.expression( 2851 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2852 ) 2853 2854 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2855 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2856 as_modifier and self._match_text_seq("USING", "SAMPLE") 2857 ): 2858 return None 2859 2860 bucket_numerator = None 2861 bucket_denominator = None 2862 bucket_field = None 2863 percent = None 2864 rows = None 2865 size = None 2866 seed = None 2867 2868 kind = ( 2869 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2870 ) 2871 method = self._parse_var(tokens=(TokenType.ROW,)) 2872 2873 matched_l_paren = self._match(TokenType.L_PAREN) 2874 2875 if self.TABLESAMPLE_CSV: 2876 num = None 2877 expressions = self._parse_csv(self._parse_primary) 2878 else: 2879 expressions = None 2880 num = ( 2881 self._parse_factor() 2882 if self._match(TokenType.NUMBER, advance=False) 2883 else self._parse_primary() 2884 ) 2885 2886 if self._match_text_seq("BUCKET"): 2887 bucket_numerator = self._parse_number() 2888 self._match_text_seq("OUT", "OF") 2889 bucket_denominator = bucket_denominator = self._parse_number() 2890 self._match(TokenType.ON) 2891 bucket_field = self._parse_field() 2892 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2893 percent = num 2894 elif self._match(TokenType.ROWS): 2895 rows = num 2896 elif num: 2897 size = num 2898 2899 if matched_l_paren: 2900 self._match_r_paren() 2901 2902 if self._match(TokenType.L_PAREN): 2903 method = self._parse_var() 2904 seed = self._match(TokenType.COMMA) and self._parse_number() 2905 self._match_r_paren() 2906 elif self._match_texts(("SEED", "REPEATABLE")): 2907 seed = self._parse_wrapped(self._parse_number) 2908 2909 return self.expression( 2910 exp.TableSample, 2911 expressions=expressions, 2912 method=method, 2913 bucket_numerator=bucket_numerator, 2914 bucket_denominator=bucket_denominator, 2915 bucket_field=bucket_field, 2916 percent=percent, 2917 rows=rows, 2918 size=size, 2919 seed=seed, 2920 kind=kind, 2921 ) 2922 2923 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2924 return list(iter(self._parse_pivot, None)) or None 2925 2926 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2927 return list(iter(self._parse_join, None)) or None 2928 2929 # https://duckdb.org/docs/sql/statements/pivot 2930 def _parse_simplified_pivot(self) -> exp.Pivot: 2931 def _parse_on() -> t.Optional[exp.Expression]: 2932 this = self._parse_bitwise() 2933 return self._parse_in(this) if self._match(TokenType.IN) else this 2934 2935 this = self._parse_table() 2936 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2937 using = self._match(TokenType.USING) and self._parse_csv( 2938 lambda: self._parse_alias(self._parse_function()) 2939 ) 2940 group = self._parse_group() 2941 return self.expression( 2942 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2943 ) 2944 2945 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2946 index = self._index 2947 include_nulls = None 2948 2949 if self._match(TokenType.PIVOT): 2950 unpivot = False 2951 elif self._match(TokenType.UNPIVOT): 2952 unpivot = True 2953 2954 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2955 if self._match_text_seq("INCLUDE", "NULLS"): 2956 include_nulls = True 2957 elif self._match_text_seq("EXCLUDE", "NULLS"): 2958 include_nulls = False 2959 else: 2960 return None 2961 2962 expressions = [] 2963 field = None 2964 2965 if not self._match(TokenType.L_PAREN): 2966 self._retreat(index) 2967 return None 2968 2969 if unpivot: 2970 expressions = self._parse_csv(self._parse_column) 2971 else: 2972 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2973 2974 if not expressions: 2975 self.raise_error("Failed to parse PIVOT's aggregation list") 2976 2977 if not self._match(TokenType.FOR): 2978 self.raise_error("Expecting FOR") 2979 2980 value = self._parse_column() 2981 2982 if not self._match(TokenType.IN): 2983 self.raise_error("Expecting IN") 2984 2985 field = self._parse_in(value, alias=True) 2986 2987 self._match_r_paren() 2988 2989 pivot = self.expression( 2990 exp.Pivot, 2991 expressions=expressions, 2992 field=field, 2993 unpivot=unpivot, 2994 include_nulls=include_nulls, 2995 ) 2996 2997 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2998 pivot.set("alias", self._parse_table_alias()) 2999 3000 if not unpivot: 3001 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3002 3003 columns: t.List[exp.Expression] = [] 3004 for fld in pivot.args["field"].expressions: 3005 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3006 for name in names: 3007 if self.PREFIXED_PIVOT_COLUMNS: 3008 name = f"{name}_{field_name}" if name else field_name 3009 else: 3010 name = f"{field_name}_{name}" if name else field_name 3011 3012 columns.append(exp.to_identifier(name)) 3013 3014 pivot.set("columns", columns) 3015 3016 return pivot 3017 3018 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3019 return [agg.alias for agg in aggregations] 3020 3021 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3022 if not skip_where_token and not self._match(TokenType.WHERE): 3023 return None 3024 3025 return self.expression( 3026 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3027 ) 3028 3029 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3030 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3031 return None 3032 3033 elements = defaultdict(list) 3034 3035 if self._match(TokenType.ALL): 3036 return self.expression(exp.Group, all=True) 3037 3038 while True: 3039 expressions = self._parse_csv(self._parse_conjunction) 3040 if expressions: 3041 elements["expressions"].extend(expressions) 3042 3043 grouping_sets = self._parse_grouping_sets() 3044 if grouping_sets: 3045 elements["grouping_sets"].extend(grouping_sets) 3046 3047 rollup = None 3048 cube = None 3049 totals = None 3050 3051 index = self._index 3052 with_ = self._match(TokenType.WITH) 3053 if self._match(TokenType.ROLLUP): 3054 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3055 elements["rollup"].extend(ensure_list(rollup)) 3056 3057 if self._match(TokenType.CUBE): 3058 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3059 elements["cube"].extend(ensure_list(cube)) 3060 3061 if self._match_text_seq("TOTALS"): 3062 totals = True 3063 elements["totals"] = True # type: ignore 3064 3065 if not (grouping_sets or rollup or cube or totals): 3066 if with_: 3067 self._retreat(index) 3068 break 3069 3070 return self.expression(exp.Group, **elements) # type: ignore 3071 3072 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3073 if not self._match(TokenType.GROUPING_SETS): 3074 return None 3075 3076 return self._parse_wrapped_csv(self._parse_grouping_set) 3077 3078 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3079 if self._match(TokenType.L_PAREN): 3080 grouping_set = self._parse_csv(self._parse_column) 3081 self._match_r_paren() 3082 return self.expression(exp.Tuple, expressions=grouping_set) 3083 3084 return self._parse_column() 3085 3086 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3087 if not skip_having_token and not self._match(TokenType.HAVING): 3088 return None 3089 return self.expression(exp.Having, this=self._parse_conjunction()) 3090 3091 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3092 if not self._match(TokenType.QUALIFY): 3093 return None 3094 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3095 3096 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3097 if skip_start_token: 3098 start = None 3099 elif self._match(TokenType.START_WITH): 3100 start = self._parse_conjunction() 3101 else: 3102 return None 3103 3104 self._match(TokenType.CONNECT_BY) 3105 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3106 exp.Prior, this=self._parse_bitwise() 3107 ) 3108 connect = self._parse_conjunction() 3109 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3110 3111 if not start and self._match(TokenType.START_WITH): 3112 start = self._parse_conjunction() 3113 3114 return self.expression(exp.Connect, start=start, connect=connect) 3115 3116 def _parse_order( 3117 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3118 ) -> t.Optional[exp.Expression]: 3119 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3120 return this 3121 3122 return self.expression( 3123 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3124 ) 3125 3126 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3127 if not self._match(token): 3128 return None 3129 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3130 3131 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3132 this = parse_method() if parse_method else self._parse_conjunction() 3133 3134 asc = self._match(TokenType.ASC) 3135 desc = self._match(TokenType.DESC) or (asc and False) 3136 3137 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3138 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3139 3140 nulls_first = is_nulls_first or False 3141 explicitly_null_ordered = is_nulls_first or is_nulls_last 3142 3143 if ( 3144 not explicitly_null_ordered 3145 and ( 3146 (not desc and self.NULL_ORDERING == "nulls_are_small") 3147 or (desc and self.NULL_ORDERING != "nulls_are_small") 3148 ) 3149 and self.NULL_ORDERING != "nulls_are_last" 3150 ): 3151 nulls_first = True 3152 3153 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3154 3155 def _parse_limit( 3156 self, this: t.Optional[exp.Expression] = None, top: bool = False 3157 ) -> t.Optional[exp.Expression]: 3158 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3159 comments = self._prev_comments 3160 if top: 3161 limit_paren = self._match(TokenType.L_PAREN) 3162 expression = self._parse_term() if limit_paren else self._parse_number() 3163 3164 if limit_paren: 3165 self._match_r_paren() 3166 else: 3167 expression = self._parse_term() 3168 3169 if self._match(TokenType.COMMA): 3170 offset = expression 3171 expression = self._parse_term() 3172 else: 3173 offset = None 3174 3175 limit_exp = self.expression( 3176 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3177 ) 3178 3179 return limit_exp 3180 3181 if self._match(TokenType.FETCH): 3182 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3183 direction = self._prev.text if direction else "FIRST" 3184 3185 count = self._parse_field(tokens=self.FETCH_TOKENS) 3186 percent = self._match(TokenType.PERCENT) 3187 3188 self._match_set((TokenType.ROW, TokenType.ROWS)) 3189 3190 only = self._match_text_seq("ONLY") 3191 with_ties = self._match_text_seq("WITH", "TIES") 3192 3193 if only and with_ties: 3194 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3195 3196 return self.expression( 3197 exp.Fetch, 3198 direction=direction, 3199 count=count, 3200 percent=percent, 3201 with_ties=with_ties, 3202 ) 3203 3204 return this 3205 3206 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3207 if not self._match(TokenType.OFFSET): 3208 return this 3209 3210 count = self._parse_term() 3211 self._match_set((TokenType.ROW, TokenType.ROWS)) 3212 return self.expression(exp.Offset, this=this, expression=count) 3213 3214 def _parse_locks(self) -> t.List[exp.Lock]: 3215 locks = [] 3216 while True: 3217 if self._match_text_seq("FOR", "UPDATE"): 3218 update = True 3219 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3220 "LOCK", "IN", "SHARE", "MODE" 3221 ): 3222 update = False 3223 else: 3224 break 3225 3226 expressions = None 3227 if self._match_text_seq("OF"): 3228 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3229 3230 wait: t.Optional[bool | exp.Expression] = None 3231 if self._match_text_seq("NOWAIT"): 3232 wait = True 3233 elif self._match_text_seq("WAIT"): 3234 wait = self._parse_primary() 3235 elif self._match_text_seq("SKIP", "LOCKED"): 3236 wait = False 3237 3238 locks.append( 3239 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3240 ) 3241 3242 return locks 3243 3244 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3245 if not self._match_set(self.SET_OPERATIONS): 3246 return this 3247 3248 token_type = self._prev.token_type 3249 3250 if token_type == TokenType.UNION: 3251 expression = exp.Union 3252 elif token_type == TokenType.EXCEPT: 3253 expression = exp.Except 3254 else: 3255 expression = exp.Intersect 3256 3257 return self.expression( 3258 expression, 3259 comments=self._prev.comments, 3260 this=this, 3261 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3262 by_name=self._match_text_seq("BY", "NAME"), 3263 expression=self._parse_set_operations(self._parse_select(nested=True)), 3264 ) 3265 3266 def _parse_expression(self) -> t.Optional[exp.Expression]: 3267 return self._parse_alias(self._parse_conjunction()) 3268 3269 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3270 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3271 3272 def _parse_equality(self) -> t.Optional[exp.Expression]: 3273 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3274 3275 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3276 return self._parse_tokens(self._parse_range, self.COMPARISON) 3277 3278 def _parse_range(self) -> t.Optional[exp.Expression]: 3279 this = self._parse_bitwise() 3280 negate = self._match(TokenType.NOT) 3281 3282 if self._match_set(self.RANGE_PARSERS): 3283 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3284 if not expression: 3285 return this 3286 3287 this = expression 3288 elif self._match(TokenType.ISNULL): 3289 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3290 3291 # Postgres supports ISNULL and NOTNULL for conditions. 3292 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3293 if self._match(TokenType.NOTNULL): 3294 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3295 this = self.expression(exp.Not, this=this) 3296 3297 if negate: 3298 this = self.expression(exp.Not, this=this) 3299 3300 if self._match(TokenType.IS): 3301 this = self._parse_is(this) 3302 3303 return this 3304 3305 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3306 index = self._index - 1 3307 negate = self._match(TokenType.NOT) 3308 3309 if self._match_text_seq("DISTINCT", "FROM"): 3310 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3311 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3312 3313 expression = self._parse_null() or self._parse_boolean() 3314 if not expression: 3315 self._retreat(index) 3316 return None 3317 3318 this = self.expression(exp.Is, this=this, expression=expression) 3319 return self.expression(exp.Not, this=this) if negate else this 3320 3321 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3322 unnest = self._parse_unnest(with_alias=False) 3323 if unnest: 3324 this = self.expression(exp.In, this=this, unnest=unnest) 3325 elif self._match(TokenType.L_PAREN): 3326 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3327 3328 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3329 this = self.expression(exp.In, this=this, query=expressions[0]) 3330 else: 3331 this = self.expression(exp.In, this=this, expressions=expressions) 3332 3333 self._match_r_paren(this) 3334 else: 3335 this = self.expression(exp.In, this=this, field=self._parse_field()) 3336 3337 return this 3338 3339 def _parse_between(self, this: exp.Expression) -> exp.Between: 3340 low = self._parse_bitwise() 3341 self._match(TokenType.AND) 3342 high = self._parse_bitwise() 3343 return self.expression(exp.Between, this=this, low=low, high=high) 3344 3345 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3346 if not self._match(TokenType.ESCAPE): 3347 return this 3348 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3349 3350 def _parse_interval(self) -> t.Optional[exp.Interval]: 3351 index = self._index 3352 3353 if not self._match(TokenType.INTERVAL): 3354 return None 3355 3356 if self._match(TokenType.STRING, advance=False): 3357 this = self._parse_primary() 3358 else: 3359 this = self._parse_term() 3360 3361 if not this: 3362 self._retreat(index) 3363 return None 3364 3365 unit = self._parse_function() or self._parse_var(any_token=True) 3366 3367 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3368 # each INTERVAL expression into this canonical form so it's easy to transpile 3369 if this and this.is_number: 3370 this = exp.Literal.string(this.name) 3371 elif this and this.is_string: 3372 parts = this.name.split() 3373 3374 if len(parts) == 2: 3375 if unit: 3376 # This is not actually a unit, it's something else (e.g. a "window side") 3377 unit = None 3378 self._retreat(self._index - 1) 3379 3380 this = exp.Literal.string(parts[0]) 3381 unit = self.expression(exp.Var, this=parts[1]) 3382 3383 return self.expression(exp.Interval, this=this, unit=unit) 3384 3385 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3386 this = self._parse_term() 3387 3388 while True: 3389 if self._match_set(self.BITWISE): 3390 this = self.expression( 3391 self.BITWISE[self._prev.token_type], 3392 this=this, 3393 expression=self._parse_term(), 3394 ) 3395 elif self._match(TokenType.DQMARK): 3396 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3397 elif self._match_pair(TokenType.LT, TokenType.LT): 3398 this = self.expression( 3399 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3400 ) 3401 elif self._match_pair(TokenType.GT, TokenType.GT): 3402 this = self.expression( 3403 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3404 ) 3405 else: 3406 break 3407 3408 return this 3409 3410 def _parse_term(self) -> t.Optional[exp.Expression]: 3411 return self._parse_tokens(self._parse_factor, self.TERM) 3412 3413 def _parse_factor(self) -> t.Optional[exp.Expression]: 3414 if self.EXPONENT: 3415 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3416 else: 3417 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3418 if isinstance(factor, exp.Div): 3419 factor.args["typed"] = self.TYPED_DIVISION 3420 factor.args["safe"] = self.SAFE_DIVISION 3421 return factor 3422 3423 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3424 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3425 3426 def _parse_unary(self) -> t.Optional[exp.Expression]: 3427 if self._match_set(self.UNARY_PARSERS): 3428 return self.UNARY_PARSERS[self._prev.token_type](self) 3429 return self._parse_at_time_zone(self._parse_type()) 3430 3431 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3432 interval = parse_interval and self._parse_interval() 3433 if interval: 3434 return interval 3435 3436 index = self._index 3437 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3438 this = self._parse_column() 3439 3440 if data_type: 3441 if isinstance(this, exp.Literal): 3442 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3443 if parser: 3444 return parser(self, this, data_type) 3445 return self.expression(exp.Cast, this=this, to=data_type) 3446 if not data_type.expressions: 3447 self._retreat(index) 3448 return self._parse_column() 3449 return self._parse_column_ops(data_type) 3450 3451 return this and self._parse_column_ops(this) 3452 3453 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3454 this = self._parse_type() 3455 if not this: 3456 return None 3457 3458 return self.expression( 3459 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3460 ) 3461 3462 def _parse_types( 3463 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3464 ) -> t.Optional[exp.Expression]: 3465 index = self._index 3466 3467 prefix = self._match_text_seq("SYSUDTLIB", ".") 3468 3469 if not self._match_set(self.TYPE_TOKENS): 3470 identifier = allow_identifiers and self._parse_id_var( 3471 any_token=False, tokens=(TokenType.VAR,) 3472 ) 3473 3474 if identifier: 3475 tokens = self._tokenizer.tokenize(identifier.name) 3476 3477 if len(tokens) != 1: 3478 self.raise_error("Unexpected identifier", self._prev) 3479 3480 if tokens[0].token_type in self.TYPE_TOKENS: 3481 self._prev = tokens[0] 3482 elif self.SUPPORTS_USER_DEFINED_TYPES: 3483 type_name = identifier.name 3484 3485 while self._match(TokenType.DOT): 3486 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3487 3488 return exp.DataType.build(type_name, udt=True) 3489 else: 3490 return None 3491 else: 3492 return None 3493 3494 type_token = self._prev.token_type 3495 3496 if type_token == TokenType.PSEUDO_TYPE: 3497 return self.expression(exp.PseudoType, this=self._prev.text) 3498 3499 if type_token == TokenType.OBJECT_IDENTIFIER: 3500 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3501 3502 nested = type_token in self.NESTED_TYPE_TOKENS 3503 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3504 expressions = None 3505 maybe_func = False 3506 3507 if self._match(TokenType.L_PAREN): 3508 if is_struct: 3509 expressions = self._parse_csv(self._parse_struct_types) 3510 elif nested: 3511 expressions = self._parse_csv( 3512 lambda: self._parse_types( 3513 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3514 ) 3515 ) 3516 elif type_token in self.ENUM_TYPE_TOKENS: 3517 expressions = self._parse_csv(self._parse_equality) 3518 else: 3519 expressions = self._parse_csv(self._parse_type_size) 3520 3521 if not expressions or not self._match(TokenType.R_PAREN): 3522 self._retreat(index) 3523 return None 3524 3525 maybe_func = True 3526 3527 this: t.Optional[exp.Expression] = None 3528 values: t.Optional[t.List[exp.Expression]] = None 3529 3530 if nested and self._match(TokenType.LT): 3531 if is_struct: 3532 expressions = self._parse_csv(self._parse_struct_types) 3533 else: 3534 expressions = self._parse_csv( 3535 lambda: self._parse_types( 3536 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3537 ) 3538 ) 3539 3540 if not self._match(TokenType.GT): 3541 self.raise_error("Expecting >") 3542 3543 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3544 values = self._parse_csv(self._parse_conjunction) 3545 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3546 3547 if type_token in self.TIMESTAMPS: 3548 if self._match_text_seq("WITH", "TIME", "ZONE"): 3549 maybe_func = False 3550 tz_type = ( 3551 exp.DataType.Type.TIMETZ 3552 if type_token in self.TIMES 3553 else exp.DataType.Type.TIMESTAMPTZ 3554 ) 3555 this = exp.DataType(this=tz_type, expressions=expressions) 3556 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3557 maybe_func = False 3558 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3559 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3560 maybe_func = False 3561 elif type_token == TokenType.INTERVAL: 3562 unit = self._parse_var() 3563 3564 if self._match_text_seq("TO"): 3565 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3566 else: 3567 span = None 3568 3569 if span or not unit: 3570 this = self.expression( 3571 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3572 ) 3573 else: 3574 this = self.expression(exp.Interval, unit=unit) 3575 3576 if maybe_func and check_func: 3577 index2 = self._index 3578 peek = self._parse_string() 3579 3580 if not peek: 3581 self._retreat(index) 3582 return None 3583 3584 self._retreat(index2) 3585 3586 if not this: 3587 if self._match_text_seq("UNSIGNED"): 3588 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3589 if not unsigned_type_token: 3590 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3591 3592 type_token = unsigned_type_token or type_token 3593 3594 this = exp.DataType( 3595 this=exp.DataType.Type[type_token.value], 3596 expressions=expressions, 3597 nested=nested, 3598 values=values, 3599 prefix=prefix, 3600 ) 3601 3602 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3603 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3604 3605 return this 3606 3607 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3608 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3609 self._match(TokenType.COLON) 3610 return self._parse_column_def(this) 3611 3612 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3613 if not self._match_text_seq("AT", "TIME", "ZONE"): 3614 return this 3615 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3616 3617 def _parse_column(self) -> t.Optional[exp.Expression]: 3618 this = self._parse_field() 3619 if isinstance(this, exp.Identifier): 3620 this = self.expression(exp.Column, this=this) 3621 elif not this: 3622 return self._parse_bracket(this) 3623 return self._parse_column_ops(this) 3624 3625 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3626 this = self._parse_bracket(this) 3627 3628 while self._match_set(self.COLUMN_OPERATORS): 3629 op_token = self._prev.token_type 3630 op = self.COLUMN_OPERATORS.get(op_token) 3631 3632 if op_token == TokenType.DCOLON: 3633 field = self._parse_types() 3634 if not field: 3635 self.raise_error("Expected type") 3636 elif op and self._curr: 3637 self._advance() 3638 value = self._prev.text 3639 field = ( 3640 exp.Literal.number(value) 3641 if self._prev.token_type == TokenType.NUMBER 3642 else exp.Literal.string(value) 3643 ) 3644 else: 3645 field = self._parse_field(anonymous_func=True, any_token=True) 3646 3647 if isinstance(field, exp.Func): 3648 # bigquery allows function calls like x.y.count(...) 3649 # SAFE.SUBSTR(...) 3650 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3651 this = self._replace_columns_with_dots(this) 3652 3653 if op: 3654 this = op(self, this, field) 3655 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3656 this = self.expression( 3657 exp.Column, 3658 this=field, 3659 table=this.this, 3660 db=this.args.get("table"), 3661 catalog=this.args.get("db"), 3662 ) 3663 else: 3664 this = self.expression(exp.Dot, this=this, expression=field) 3665 this = self._parse_bracket(this) 3666 return this 3667 3668 def _parse_primary(self) -> t.Optional[exp.Expression]: 3669 if self._match_set(self.PRIMARY_PARSERS): 3670 token_type = self._prev.token_type 3671 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3672 3673 if token_type == TokenType.STRING: 3674 expressions = [primary] 3675 while self._match(TokenType.STRING): 3676 expressions.append(exp.Literal.string(self._prev.text)) 3677 3678 if len(expressions) > 1: 3679 return self.expression(exp.Concat, expressions=expressions) 3680 3681 return primary 3682 3683 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3684 return exp.Literal.number(f"0.{self._prev.text}") 3685 3686 if self._match(TokenType.L_PAREN): 3687 comments = self._prev_comments 3688 query = self._parse_select() 3689 3690 if query: 3691 expressions = [query] 3692 else: 3693 expressions = self._parse_expressions() 3694 3695 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3696 3697 if isinstance(this, exp.Subqueryable): 3698 this = self._parse_set_operations( 3699 self._parse_subquery(this=this, parse_alias=False) 3700 ) 3701 elif len(expressions) > 1: 3702 this = self.expression(exp.Tuple, expressions=expressions) 3703 else: 3704 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3705 3706 if this: 3707 this.add_comments(comments) 3708 3709 self._match_r_paren(expression=this) 3710 return this 3711 3712 return None 3713 3714 def _parse_field( 3715 self, 3716 any_token: bool = False, 3717 tokens: t.Optional[t.Collection[TokenType]] = None, 3718 anonymous_func: bool = False, 3719 ) -> t.Optional[exp.Expression]: 3720 return ( 3721 self._parse_primary() 3722 or self._parse_function(anonymous=anonymous_func) 3723 or self._parse_id_var(any_token=any_token, tokens=tokens) 3724 ) 3725 3726 def _parse_function( 3727 self, 3728 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3729 anonymous: bool = False, 3730 optional_parens: bool = True, 3731 ) -> t.Optional[exp.Expression]: 3732 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3733 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3734 fn_syntax = False 3735 if ( 3736 self._match(TokenType.L_BRACE, advance=False) 3737 and self._next 3738 and self._next.text.upper() == "FN" 3739 ): 3740 self._advance(2) 3741 fn_syntax = True 3742 3743 func = self._parse_function_call( 3744 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3745 ) 3746 3747 if fn_syntax: 3748 self._match(TokenType.R_BRACE) 3749 3750 return func 3751 3752 def _parse_function_call( 3753 self, 3754 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3755 anonymous: bool = False, 3756 optional_parens: bool = True, 3757 ) -> t.Optional[exp.Expression]: 3758 if not self._curr: 3759 return None 3760 3761 token_type = self._curr.token_type 3762 this = self._curr.text 3763 upper = this.upper() 3764 3765 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3766 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3767 self._advance() 3768 return parser(self) 3769 3770 if not self._next or self._next.token_type != TokenType.L_PAREN: 3771 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3772 self._advance() 3773 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3774 3775 return None 3776 3777 if token_type not in self.FUNC_TOKENS: 3778 return None 3779 3780 self._advance(2) 3781 3782 parser = self.FUNCTION_PARSERS.get(upper) 3783 if parser and not anonymous: 3784 this = parser(self) 3785 else: 3786 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3787 3788 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3789 this = self.expression(subquery_predicate, this=self._parse_select()) 3790 self._match_r_paren() 3791 return this 3792 3793 if functions is None: 3794 functions = self.FUNCTIONS 3795 3796 function = functions.get(upper) 3797 3798 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3799 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3800 3801 if function and not anonymous: 3802 func = self.validate_expression(function(args), args) 3803 if not self.NORMALIZE_FUNCTIONS: 3804 func.meta["name"] = this 3805 this = func 3806 else: 3807 this = self.expression(exp.Anonymous, this=this, expressions=args) 3808 3809 self._match_r_paren(this) 3810 return self._parse_window(this) 3811 3812 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3813 return self._parse_column_def(self._parse_id_var()) 3814 3815 def _parse_user_defined_function( 3816 self, kind: t.Optional[TokenType] = None 3817 ) -> t.Optional[exp.Expression]: 3818 this = self._parse_id_var() 3819 3820 while self._match(TokenType.DOT): 3821 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3822 3823 if not self._match(TokenType.L_PAREN): 3824 return this 3825 3826 expressions = self._parse_csv(self._parse_function_parameter) 3827 self._match_r_paren() 3828 return self.expression( 3829 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3830 ) 3831 3832 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3833 literal = self._parse_primary() 3834 if literal: 3835 return self.expression(exp.Introducer, this=token.text, expression=literal) 3836 3837 return self.expression(exp.Identifier, this=token.text) 3838 3839 def _parse_session_parameter(self) -> exp.SessionParameter: 3840 kind = None 3841 this = self._parse_id_var() or self._parse_primary() 3842 3843 if this and self._match(TokenType.DOT): 3844 kind = this.name 3845 this = self._parse_var() or self._parse_primary() 3846 3847 return self.expression(exp.SessionParameter, this=this, kind=kind) 3848 3849 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3850 index = self._index 3851 3852 if self._match(TokenType.L_PAREN): 3853 expressions = t.cast( 3854 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3855 ) 3856 3857 if not self._match(TokenType.R_PAREN): 3858 self._retreat(index) 3859 else: 3860 expressions = [self._parse_id_var()] 3861 3862 if self._match_set(self.LAMBDAS): 3863 return self.LAMBDAS[self._prev.token_type](self, expressions) 3864 3865 self._retreat(index) 3866 3867 this: t.Optional[exp.Expression] 3868 3869 if self._match(TokenType.DISTINCT): 3870 this = self.expression( 3871 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3872 ) 3873 else: 3874 this = self._parse_select_or_expression(alias=alias) 3875 3876 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3877 3878 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3879 index = self._index 3880 3881 if not self.errors: 3882 try: 3883 if self._parse_select(nested=True): 3884 return this 3885 except ParseError: 3886 pass 3887 finally: 3888 self.errors.clear() 3889 self._retreat(index) 3890 3891 if not self._match(TokenType.L_PAREN): 3892 return this 3893 3894 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3895 3896 self._match_r_paren() 3897 return self.expression(exp.Schema, this=this, expressions=args) 3898 3899 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3900 return self._parse_column_def(self._parse_field(any_token=True)) 3901 3902 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3903 # column defs are not really columns, they're identifiers 3904 if isinstance(this, exp.Column): 3905 this = this.this 3906 3907 kind = self._parse_types(schema=True) 3908 3909 if self._match_text_seq("FOR", "ORDINALITY"): 3910 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3911 3912 constraints: t.List[exp.Expression] = [] 3913 3914 if not kind and self._match(TokenType.ALIAS): 3915 constraints.append( 3916 self.expression( 3917 exp.ComputedColumnConstraint, 3918 this=self._parse_conjunction(), 3919 persisted=self._match_text_seq("PERSISTED"), 3920 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3921 ) 3922 ) 3923 3924 while True: 3925 constraint = self._parse_column_constraint() 3926 if not constraint: 3927 break 3928 constraints.append(constraint) 3929 3930 if not kind and not constraints: 3931 return this 3932 3933 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3934 3935 def _parse_auto_increment( 3936 self, 3937 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3938 start = None 3939 increment = None 3940 3941 if self._match(TokenType.L_PAREN, advance=False): 3942 args = self._parse_wrapped_csv(self._parse_bitwise) 3943 start = seq_get(args, 0) 3944 increment = seq_get(args, 1) 3945 elif self._match_text_seq("START"): 3946 start = self._parse_bitwise() 3947 self._match_text_seq("INCREMENT") 3948 increment = self._parse_bitwise() 3949 3950 if start and increment: 3951 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3952 3953 return exp.AutoIncrementColumnConstraint() 3954 3955 def _parse_compress(self) -> exp.CompressColumnConstraint: 3956 if self._match(TokenType.L_PAREN, advance=False): 3957 return self.expression( 3958 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3959 ) 3960 3961 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3962 3963 def _parse_generated_as_identity( 3964 self, 3965 ) -> ( 3966 exp.GeneratedAsIdentityColumnConstraint 3967 | exp.ComputedColumnConstraint 3968 | exp.GeneratedAsRowColumnConstraint 3969 ): 3970 if self._match_text_seq("BY", "DEFAULT"): 3971 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3972 this = self.expression( 3973 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3974 ) 3975 else: 3976 self._match_text_seq("ALWAYS") 3977 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3978 3979 self._match(TokenType.ALIAS) 3980 3981 if self._match_text_seq("ROW"): 3982 start = self._match_text_seq("START") 3983 if not start: 3984 self._match(TokenType.END) 3985 hidden = self._match_text_seq("HIDDEN") 3986 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 3987 3988 identity = self._match_text_seq("IDENTITY") 3989 3990 if self._match(TokenType.L_PAREN): 3991 if self._match(TokenType.START_WITH): 3992 this.set("start", self._parse_bitwise()) 3993 if self._match_text_seq("INCREMENT", "BY"): 3994 this.set("increment", self._parse_bitwise()) 3995 if self._match_text_seq("MINVALUE"): 3996 this.set("minvalue", self._parse_bitwise()) 3997 if self._match_text_seq("MAXVALUE"): 3998 this.set("maxvalue", self._parse_bitwise()) 3999 4000 if self._match_text_seq("CYCLE"): 4001 this.set("cycle", True) 4002 elif self._match_text_seq("NO", "CYCLE"): 4003 this.set("cycle", False) 4004 4005 if not identity: 4006 this.set("expression", self._parse_bitwise()) 4007 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4008 args = self._parse_csv(self._parse_bitwise) 4009 this.set("start", seq_get(args, 0)) 4010 this.set("increment", seq_get(args, 1)) 4011 4012 self._match_r_paren() 4013 4014 return this 4015 4016 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4017 self._match_text_seq("LENGTH") 4018 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4019 4020 def _parse_not_constraint( 4021 self, 4022 ) -> t.Optional[exp.Expression]: 4023 if self._match_text_seq("NULL"): 4024 return self.expression(exp.NotNullColumnConstraint) 4025 if self._match_text_seq("CASESPECIFIC"): 4026 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4027 if self._match_text_seq("FOR", "REPLICATION"): 4028 return self.expression(exp.NotForReplicationColumnConstraint) 4029 return None 4030 4031 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4032 if self._match(TokenType.CONSTRAINT): 4033 this = self._parse_id_var() 4034 else: 4035 this = None 4036 4037 if self._match_texts(self.CONSTRAINT_PARSERS): 4038 return self.expression( 4039 exp.ColumnConstraint, 4040 this=this, 4041 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4042 ) 4043 4044 return this 4045 4046 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4047 if not self._match(TokenType.CONSTRAINT): 4048 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4049 4050 this = self._parse_id_var() 4051 expressions = [] 4052 4053 while True: 4054 constraint = self._parse_unnamed_constraint() or self._parse_function() 4055 if not constraint: 4056 break 4057 expressions.append(constraint) 4058 4059 return self.expression(exp.Constraint, this=this, expressions=expressions) 4060 4061 def _parse_unnamed_constraint( 4062 self, constraints: t.Optional[t.Collection[str]] = None 4063 ) -> t.Optional[exp.Expression]: 4064 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4065 constraints or self.CONSTRAINT_PARSERS 4066 ): 4067 return None 4068 4069 constraint = self._prev.text.upper() 4070 if constraint not in self.CONSTRAINT_PARSERS: 4071 self.raise_error(f"No parser found for schema constraint {constraint}.") 4072 4073 return self.CONSTRAINT_PARSERS[constraint](self) 4074 4075 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4076 self._match_text_seq("KEY") 4077 return self.expression( 4078 exp.UniqueColumnConstraint, 4079 this=self._parse_schema(self._parse_id_var(any_token=False)), 4080 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4081 ) 4082 4083 def _parse_key_constraint_options(self) -> t.List[str]: 4084 options = [] 4085 while True: 4086 if not self._curr: 4087 break 4088 4089 if self._match(TokenType.ON): 4090 action = None 4091 on = self._advance_any() and self._prev.text 4092 4093 if self._match_text_seq("NO", "ACTION"): 4094 action = "NO ACTION" 4095 elif self._match_text_seq("CASCADE"): 4096 action = "CASCADE" 4097 elif self._match_text_seq("RESTRICT"): 4098 action = "RESTRICT" 4099 elif self._match_pair(TokenType.SET, TokenType.NULL): 4100 action = "SET NULL" 4101 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4102 action = "SET DEFAULT" 4103 else: 4104 self.raise_error("Invalid key constraint") 4105 4106 options.append(f"ON {on} {action}") 4107 elif self._match_text_seq("NOT", "ENFORCED"): 4108 options.append("NOT ENFORCED") 4109 elif self._match_text_seq("DEFERRABLE"): 4110 options.append("DEFERRABLE") 4111 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4112 options.append("INITIALLY DEFERRED") 4113 elif self._match_text_seq("NORELY"): 4114 options.append("NORELY") 4115 elif self._match_text_seq("MATCH", "FULL"): 4116 options.append("MATCH FULL") 4117 else: 4118 break 4119 4120 return options 4121 4122 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4123 if match and not self._match(TokenType.REFERENCES): 4124 return None 4125 4126 expressions = None 4127 this = self._parse_table(schema=True) 4128 options = self._parse_key_constraint_options() 4129 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4130 4131 def _parse_foreign_key(self) -> exp.ForeignKey: 4132 expressions = self._parse_wrapped_id_vars() 4133 reference = self._parse_references() 4134 options = {} 4135 4136 while self._match(TokenType.ON): 4137 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4138 self.raise_error("Expected DELETE or UPDATE") 4139 4140 kind = self._prev.text.lower() 4141 4142 if self._match_text_seq("NO", "ACTION"): 4143 action = "NO ACTION" 4144 elif self._match(TokenType.SET): 4145 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4146 action = "SET " + self._prev.text.upper() 4147 else: 4148 self._advance() 4149 action = self._prev.text.upper() 4150 4151 options[kind] = action 4152 4153 return self.expression( 4154 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4155 ) 4156 4157 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4158 return self._parse_field() 4159 4160 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4161 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4162 4163 id_vars = self._parse_wrapped_id_vars() 4164 return self.expression( 4165 exp.PeriodForSystemTimeConstraint, 4166 this=seq_get(id_vars, 0), 4167 expression=seq_get(id_vars, 1), 4168 ) 4169 4170 def _parse_primary_key( 4171 self, wrapped_optional: bool = False, in_props: bool = False 4172 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4173 desc = ( 4174 self._match_set((TokenType.ASC, TokenType.DESC)) 4175 and self._prev.token_type == TokenType.DESC 4176 ) 4177 4178 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4179 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4180 4181 expressions = self._parse_wrapped_csv( 4182 self._parse_primary_key_part, optional=wrapped_optional 4183 ) 4184 options = self._parse_key_constraint_options() 4185 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4186 4187 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4188 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4189 return this 4190 4191 bracket_kind = self._prev.token_type 4192 4193 if self._match(TokenType.COLON): 4194 expressions: t.List[exp.Expression] = [ 4195 self.expression(exp.Slice, expression=self._parse_conjunction()) 4196 ] 4197 else: 4198 expressions = self._parse_csv( 4199 lambda: self._parse_slice( 4200 self._parse_alias(self._parse_conjunction(), explicit=True) 4201 ) 4202 ) 4203 4204 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4205 self.raise_error("Expected ]") 4206 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4207 self.raise_error("Expected }") 4208 4209 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4210 if bracket_kind == TokenType.L_BRACE: 4211 this = self.expression(exp.Struct, expressions=expressions) 4212 elif not this or this.name.upper() == "ARRAY": 4213 this = self.expression(exp.Array, expressions=expressions) 4214 else: 4215 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4216 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4217 4218 self._add_comments(this) 4219 return self._parse_bracket(this) 4220 4221 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4222 if self._match(TokenType.COLON): 4223 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4224 return this 4225 4226 def _parse_case(self) -> t.Optional[exp.Expression]: 4227 ifs = [] 4228 default = None 4229 4230 comments = self._prev_comments 4231 expression = self._parse_conjunction() 4232 4233 while self._match(TokenType.WHEN): 4234 this = self._parse_conjunction() 4235 self._match(TokenType.THEN) 4236 then = self._parse_conjunction() 4237 ifs.append(self.expression(exp.If, this=this, true=then)) 4238 4239 if self._match(TokenType.ELSE): 4240 default = self._parse_conjunction() 4241 4242 if not self._match(TokenType.END): 4243 self.raise_error("Expected END after CASE", self._prev) 4244 4245 return self._parse_window( 4246 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4247 ) 4248 4249 def _parse_if(self) -> t.Optional[exp.Expression]: 4250 if self._match(TokenType.L_PAREN): 4251 args = self._parse_csv(self._parse_conjunction) 4252 this = self.validate_expression(exp.If.from_arg_list(args), args) 4253 self._match_r_paren() 4254 else: 4255 index = self._index - 1 4256 condition = self._parse_conjunction() 4257 4258 if not condition: 4259 self._retreat(index) 4260 return None 4261 4262 self._match(TokenType.THEN) 4263 true = self._parse_conjunction() 4264 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4265 self._match(TokenType.END) 4266 this = self.expression(exp.If, this=condition, true=true, false=false) 4267 4268 return self._parse_window(this) 4269 4270 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4271 if not self._match_text_seq("VALUE", "FOR"): 4272 self._retreat(self._index - 1) 4273 return None 4274 4275 return self.expression( 4276 exp.NextValueFor, 4277 this=self._parse_column(), 4278 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4279 ) 4280 4281 def _parse_extract(self) -> exp.Extract: 4282 this = self._parse_function() or self._parse_var() or self._parse_type() 4283 4284 if self._match(TokenType.FROM): 4285 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4286 4287 if not self._match(TokenType.COMMA): 4288 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4289 4290 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4291 4292 def _parse_any_value(self) -> exp.AnyValue: 4293 this = self._parse_lambda() 4294 is_max = None 4295 having = None 4296 4297 if self._match(TokenType.HAVING): 4298 self._match_texts(("MAX", "MIN")) 4299 is_max = self._prev.text == "MAX" 4300 having = self._parse_column() 4301 4302 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4303 4304 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4305 this = self._parse_conjunction() 4306 4307 if not self._match(TokenType.ALIAS): 4308 if self._match(TokenType.COMMA): 4309 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4310 4311 self.raise_error("Expected AS after CAST") 4312 4313 fmt = None 4314 to = self._parse_types() 4315 4316 if self._match(TokenType.FORMAT): 4317 fmt_string = self._parse_string() 4318 fmt = self._parse_at_time_zone(fmt_string) 4319 4320 if not to: 4321 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4322 if to.this in exp.DataType.TEMPORAL_TYPES: 4323 this = self.expression( 4324 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4325 this=this, 4326 format=exp.Literal.string( 4327 format_time( 4328 fmt_string.this if fmt_string else "", 4329 self.FORMAT_MAPPING or self.TIME_MAPPING, 4330 self.FORMAT_TRIE or self.TIME_TRIE, 4331 ) 4332 ), 4333 ) 4334 4335 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4336 this.set("zone", fmt.args["zone"]) 4337 return this 4338 elif not to: 4339 self.raise_error("Expected TYPE after CAST") 4340 elif isinstance(to, exp.Identifier): 4341 to = exp.DataType.build(to.name, udt=True) 4342 elif to.this == exp.DataType.Type.CHAR: 4343 if self._match(TokenType.CHARACTER_SET): 4344 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4345 4346 return self.expression( 4347 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4348 ) 4349 4350 def _parse_concat(self) -> t.Optional[exp.Expression]: 4351 args = self._parse_csv(self._parse_conjunction) 4352 if self.CONCAT_NULL_OUTPUTS_STRING: 4353 args = self._ensure_string_if_null(args) 4354 4355 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4356 # we find such a call we replace it with its argument. 4357 if len(args) == 1: 4358 return args[0] 4359 4360 return self.expression( 4361 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4362 ) 4363 4364 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4365 args = self._parse_csv(self._parse_conjunction) 4366 if len(args) < 2: 4367 return self.expression(exp.ConcatWs, expressions=args) 4368 delim, *values = args 4369 if self.CONCAT_NULL_OUTPUTS_STRING: 4370 values = self._ensure_string_if_null(values) 4371 4372 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4373 4374 def _parse_string_agg(self) -> exp.Expression: 4375 if self._match(TokenType.DISTINCT): 4376 args: t.List[t.Optional[exp.Expression]] = [ 4377 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4378 ] 4379 if self._match(TokenType.COMMA): 4380 args.extend(self._parse_csv(self._parse_conjunction)) 4381 else: 4382 args = self._parse_csv(self._parse_conjunction) # type: ignore 4383 4384 index = self._index 4385 if not self._match(TokenType.R_PAREN) and args: 4386 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4387 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4388 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4389 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4390 4391 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4392 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4393 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4394 if not self._match_text_seq("WITHIN", "GROUP"): 4395 self._retreat(index) 4396 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4397 4398 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4399 order = self._parse_order(this=seq_get(args, 0)) 4400 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4401 4402 def _parse_convert( 4403 self, strict: bool, safe: t.Optional[bool] = None 4404 ) -> t.Optional[exp.Expression]: 4405 this = self._parse_bitwise() 4406 4407 if self._match(TokenType.USING): 4408 to: t.Optional[exp.Expression] = self.expression( 4409 exp.CharacterSet, this=self._parse_var() 4410 ) 4411 elif self._match(TokenType.COMMA): 4412 to = self._parse_types() 4413 else: 4414 to = None 4415 4416 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4417 4418 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4419 """ 4420 There are generally two variants of the DECODE function: 4421 4422 - DECODE(bin, charset) 4423 - DECODE(expression, search, result [, search, result] ... [, default]) 4424 4425 The second variant will always be parsed into a CASE expression. Note that NULL 4426 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4427 instead of relying on pattern matching. 4428 """ 4429 args = self._parse_csv(self._parse_conjunction) 4430 4431 if len(args) < 3: 4432 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4433 4434 expression, *expressions = args 4435 if not expression: 4436 return None 4437 4438 ifs = [] 4439 for search, result in zip(expressions[::2], expressions[1::2]): 4440 if not search or not result: 4441 return None 4442 4443 if isinstance(search, exp.Literal): 4444 ifs.append( 4445 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4446 ) 4447 elif isinstance(search, exp.Null): 4448 ifs.append( 4449 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4450 ) 4451 else: 4452 cond = exp.or_( 4453 exp.EQ(this=expression.copy(), expression=search), 4454 exp.and_( 4455 exp.Is(this=expression.copy(), expression=exp.Null()), 4456 exp.Is(this=search.copy(), expression=exp.Null()), 4457 copy=False, 4458 ), 4459 copy=False, 4460 ) 4461 ifs.append(exp.If(this=cond, true=result)) 4462 4463 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4464 4465 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4466 self._match_text_seq("KEY") 4467 key = self._parse_column() 4468 self._match_set((TokenType.COLON, TokenType.COMMA)) 4469 self._match_text_seq("VALUE") 4470 value = self._parse_bitwise() 4471 4472 if not key and not value: 4473 return None 4474 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4475 4476 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4477 if not this or not self._match_text_seq("FORMAT", "JSON"): 4478 return this 4479 4480 return self.expression(exp.FormatJson, this=this) 4481 4482 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4483 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4484 for value in values: 4485 if self._match_text_seq(value, "ON", on): 4486 return f"{value} ON {on}" 4487 4488 return None 4489 4490 def _parse_json_object(self) -> exp.JSONObject: 4491 star = self._parse_star() 4492 expressions = ( 4493 [star] 4494 if star 4495 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4496 ) 4497 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4498 4499 unique_keys = None 4500 if self._match_text_seq("WITH", "UNIQUE"): 4501 unique_keys = True 4502 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4503 unique_keys = False 4504 4505 self._match_text_seq("KEYS") 4506 4507 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4508 self._parse_type() 4509 ) 4510 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4511 4512 return self.expression( 4513 exp.JSONObject, 4514 expressions=expressions, 4515 null_handling=null_handling, 4516 unique_keys=unique_keys, 4517 return_type=return_type, 4518 encoding=encoding, 4519 ) 4520 4521 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4522 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4523 if not self._match_text_seq("NESTED"): 4524 this = self._parse_id_var() 4525 kind = self._parse_types(allow_identifiers=False) 4526 nested = None 4527 else: 4528 this = None 4529 kind = None 4530 nested = True 4531 4532 path = self._match_text_seq("PATH") and self._parse_string() 4533 nested_schema = nested and self._parse_json_schema() 4534 4535 return self.expression( 4536 exp.JSONColumnDef, 4537 this=this, 4538 kind=kind, 4539 path=path, 4540 nested_schema=nested_schema, 4541 ) 4542 4543 def _parse_json_schema(self) -> exp.JSONSchema: 4544 self._match_text_seq("COLUMNS") 4545 return self.expression( 4546 exp.JSONSchema, 4547 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4548 ) 4549 4550 def _parse_json_table(self) -> exp.JSONTable: 4551 this = self._parse_format_json(self._parse_bitwise()) 4552 path = self._match(TokenType.COMMA) and self._parse_string() 4553 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4554 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4555 schema = self._parse_json_schema() 4556 4557 return exp.JSONTable( 4558 this=this, 4559 schema=schema, 4560 path=path, 4561 error_handling=error_handling, 4562 empty_handling=empty_handling, 4563 ) 4564 4565 def _parse_logarithm(self) -> exp.Func: 4566 # Default argument order is base, expression 4567 args = self._parse_csv(self._parse_range) 4568 4569 if len(args) > 1: 4570 if not self.LOG_BASE_FIRST: 4571 args.reverse() 4572 return exp.Log.from_arg_list(args) 4573 4574 return self.expression( 4575 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4576 ) 4577 4578 def _parse_match_against(self) -> exp.MatchAgainst: 4579 expressions = self._parse_csv(self._parse_column) 4580 4581 self._match_text_seq(")", "AGAINST", "(") 4582 4583 this = self._parse_string() 4584 4585 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4586 modifier = "IN NATURAL LANGUAGE MODE" 4587 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4588 modifier = f"{modifier} WITH QUERY EXPANSION" 4589 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4590 modifier = "IN BOOLEAN MODE" 4591 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4592 modifier = "WITH QUERY EXPANSION" 4593 else: 4594 modifier = None 4595 4596 return self.expression( 4597 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4598 ) 4599 4600 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4601 def _parse_open_json(self) -> exp.OpenJSON: 4602 this = self._parse_bitwise() 4603 path = self._match(TokenType.COMMA) and self._parse_string() 4604 4605 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4606 this = self._parse_field(any_token=True) 4607 kind = self._parse_types() 4608 path = self._parse_string() 4609 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4610 4611 return self.expression( 4612 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4613 ) 4614 4615 expressions = None 4616 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4617 self._match_l_paren() 4618 expressions = self._parse_csv(_parse_open_json_column_def) 4619 4620 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4621 4622 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4623 args = self._parse_csv(self._parse_bitwise) 4624 4625 if self._match(TokenType.IN): 4626 return self.expression( 4627 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4628 ) 4629 4630 if haystack_first: 4631 haystack = seq_get(args, 0) 4632 needle = seq_get(args, 1) 4633 else: 4634 needle = seq_get(args, 0) 4635 haystack = seq_get(args, 1) 4636 4637 return self.expression( 4638 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4639 ) 4640 4641 def _parse_predict(self) -> exp.Predict: 4642 self._match_text_seq("MODEL") 4643 this = self._parse_table() 4644 4645 self._match(TokenType.COMMA) 4646 self._match_text_seq("TABLE") 4647 4648 return self.expression( 4649 exp.Predict, 4650 this=this, 4651 expression=self._parse_table(), 4652 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4653 ) 4654 4655 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4656 args = self._parse_csv(self._parse_table) 4657 return exp.JoinHint(this=func_name.upper(), expressions=args) 4658 4659 def _parse_substring(self) -> exp.Substring: 4660 # Postgres supports the form: substring(string [from int] [for int]) 4661 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4662 4663 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4664 4665 if self._match(TokenType.FROM): 4666 args.append(self._parse_bitwise()) 4667 if self._match(TokenType.FOR): 4668 args.append(self._parse_bitwise()) 4669 4670 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4671 4672 def _parse_trim(self) -> exp.Trim: 4673 # https://www.w3resource.com/sql/character-functions/trim.php 4674 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4675 4676 position = None 4677 collation = None 4678 expression = None 4679 4680 if self._match_texts(self.TRIM_TYPES): 4681 position = self._prev.text.upper() 4682 4683 this = self._parse_bitwise() 4684 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4685 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4686 expression = self._parse_bitwise() 4687 4688 if invert_order: 4689 this, expression = expression, this 4690 4691 if self._match(TokenType.COLLATE): 4692 collation = self._parse_bitwise() 4693 4694 return self.expression( 4695 exp.Trim, this=this, position=position, expression=expression, collation=collation 4696 ) 4697 4698 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4699 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4700 4701 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4702 return self._parse_window(self._parse_id_var(), alias=True) 4703 4704 def _parse_respect_or_ignore_nulls( 4705 self, this: t.Optional[exp.Expression] 4706 ) -> t.Optional[exp.Expression]: 4707 if self._match_text_seq("IGNORE", "NULLS"): 4708 return self.expression(exp.IgnoreNulls, this=this) 4709 if self._match_text_seq("RESPECT", "NULLS"): 4710 return self.expression(exp.RespectNulls, this=this) 4711 return this 4712 4713 def _parse_window( 4714 self, this: t.Optional[exp.Expression], alias: bool = False 4715 ) -> t.Optional[exp.Expression]: 4716 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4717 self._match(TokenType.WHERE) 4718 this = self.expression( 4719 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4720 ) 4721 self._match_r_paren() 4722 4723 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4724 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4725 if self._match_text_seq("WITHIN", "GROUP"): 4726 order = self._parse_wrapped(self._parse_order) 4727 this = self.expression(exp.WithinGroup, this=this, expression=order) 4728 4729 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4730 # Some dialects choose to implement and some do not. 4731 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4732 4733 # There is some code above in _parse_lambda that handles 4734 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4735 4736 # The below changes handle 4737 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4738 4739 # Oracle allows both formats 4740 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4741 # and Snowflake chose to do the same for familiarity 4742 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4743 this = self._parse_respect_or_ignore_nulls(this) 4744 4745 # bigquery select from window x AS (partition by ...) 4746 if alias: 4747 over = None 4748 self._match(TokenType.ALIAS) 4749 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4750 return this 4751 else: 4752 over = self._prev.text.upper() 4753 4754 if not self._match(TokenType.L_PAREN): 4755 return self.expression( 4756 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4757 ) 4758 4759 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4760 4761 first = self._match(TokenType.FIRST) 4762 if self._match_text_seq("LAST"): 4763 first = False 4764 4765 partition, order = self._parse_partition_and_order() 4766 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4767 4768 if kind: 4769 self._match(TokenType.BETWEEN) 4770 start = self._parse_window_spec() 4771 self._match(TokenType.AND) 4772 end = self._parse_window_spec() 4773 4774 spec = self.expression( 4775 exp.WindowSpec, 4776 kind=kind, 4777 start=start["value"], 4778 start_side=start["side"], 4779 end=end["value"], 4780 end_side=end["side"], 4781 ) 4782 else: 4783 spec = None 4784 4785 self._match_r_paren() 4786 4787 window = self.expression( 4788 exp.Window, 4789 this=this, 4790 partition_by=partition, 4791 order=order, 4792 spec=spec, 4793 alias=window_alias, 4794 over=over, 4795 first=first, 4796 ) 4797 4798 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4799 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4800 return self._parse_window(window, alias=alias) 4801 4802 return window 4803 4804 def _parse_partition_and_order( 4805 self, 4806 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4807 return self._parse_partition_by(), self._parse_order() 4808 4809 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4810 self._match(TokenType.BETWEEN) 4811 4812 return { 4813 "value": ( 4814 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4815 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4816 or self._parse_bitwise() 4817 ), 4818 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4819 } 4820 4821 def _parse_alias( 4822 self, this: t.Optional[exp.Expression], explicit: bool = False 4823 ) -> t.Optional[exp.Expression]: 4824 any_token = self._match(TokenType.ALIAS) 4825 4826 if explicit and not any_token: 4827 return this 4828 4829 if self._match(TokenType.L_PAREN): 4830 aliases = self.expression( 4831 exp.Aliases, 4832 this=this, 4833 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4834 ) 4835 self._match_r_paren(aliases) 4836 return aliases 4837 4838 alias = self._parse_id_var(any_token) 4839 4840 if alias: 4841 return self.expression(exp.Alias, this=this, alias=alias) 4842 4843 return this 4844 4845 def _parse_id_var( 4846 self, 4847 any_token: bool = True, 4848 tokens: t.Optional[t.Collection[TokenType]] = None, 4849 ) -> t.Optional[exp.Expression]: 4850 identifier = self._parse_identifier() 4851 4852 if identifier: 4853 return identifier 4854 4855 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4856 quoted = self._prev.token_type == TokenType.STRING 4857 return exp.Identifier(this=self._prev.text, quoted=quoted) 4858 4859 return None 4860 4861 def _parse_string(self) -> t.Optional[exp.Expression]: 4862 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4863 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4864 return self._parse_placeholder() 4865 4866 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4867 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4868 4869 def _parse_number(self) -> t.Optional[exp.Expression]: 4870 if self._match(TokenType.NUMBER): 4871 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4872 return self._parse_placeholder() 4873 4874 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4875 if self._match(TokenType.IDENTIFIER): 4876 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4877 return self._parse_placeholder() 4878 4879 def _parse_var( 4880 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4881 ) -> t.Optional[exp.Expression]: 4882 if ( 4883 (any_token and self._advance_any()) 4884 or self._match(TokenType.VAR) 4885 or (self._match_set(tokens) if tokens else False) 4886 ): 4887 return self.expression(exp.Var, this=self._prev.text) 4888 return self._parse_placeholder() 4889 4890 def _advance_any(self) -> t.Optional[Token]: 4891 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4892 self._advance() 4893 return self._prev 4894 return None 4895 4896 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4897 return self._parse_var() or self._parse_string() 4898 4899 def _parse_null(self) -> t.Optional[exp.Expression]: 4900 if self._match_set(self.NULL_TOKENS): 4901 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4902 return self._parse_placeholder() 4903 4904 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4905 if self._match(TokenType.TRUE): 4906 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4907 if self._match(TokenType.FALSE): 4908 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4909 return self._parse_placeholder() 4910 4911 def _parse_star(self) -> t.Optional[exp.Expression]: 4912 if self._match(TokenType.STAR): 4913 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4914 return self._parse_placeholder() 4915 4916 def _parse_parameter(self) -> exp.Parameter: 4917 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4918 return ( 4919 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4920 ) 4921 4922 self._match(TokenType.L_BRACE) 4923 this = _parse_parameter_part() 4924 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4925 self._match(TokenType.R_BRACE) 4926 4927 return self.expression(exp.Parameter, this=this, expression=expression) 4928 4929 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4930 if self._match_set(self.PLACEHOLDER_PARSERS): 4931 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4932 if placeholder: 4933 return placeholder 4934 self._advance(-1) 4935 return None 4936 4937 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4938 if not self._match(TokenType.EXCEPT): 4939 return None 4940 if self._match(TokenType.L_PAREN, advance=False): 4941 return self._parse_wrapped_csv(self._parse_column) 4942 4943 except_column = self._parse_column() 4944 return [except_column] if except_column else None 4945 4946 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4947 if not self._match(TokenType.REPLACE): 4948 return None 4949 if self._match(TokenType.L_PAREN, advance=False): 4950 return self._parse_wrapped_csv(self._parse_expression) 4951 4952 replace_expression = self._parse_expression() 4953 return [replace_expression] if replace_expression else None 4954 4955 def _parse_csv( 4956 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4957 ) -> t.List[exp.Expression]: 4958 parse_result = parse_method() 4959 items = [parse_result] if parse_result is not None else [] 4960 4961 while self._match(sep): 4962 self._add_comments(parse_result) 4963 parse_result = parse_method() 4964 if parse_result is not None: 4965 items.append(parse_result) 4966 4967 return items 4968 4969 def _parse_tokens( 4970 self, parse_method: t.Callable, expressions: t.Dict 4971 ) -> t.Optional[exp.Expression]: 4972 this = parse_method() 4973 4974 while self._match_set(expressions): 4975 this = self.expression( 4976 expressions[self._prev.token_type], 4977 this=this, 4978 comments=self._prev_comments, 4979 expression=parse_method(), 4980 ) 4981 4982 return this 4983 4984 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4985 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4986 4987 def _parse_wrapped_csv( 4988 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4989 ) -> t.List[exp.Expression]: 4990 return self._parse_wrapped( 4991 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4992 ) 4993 4994 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4995 wrapped = self._match(TokenType.L_PAREN) 4996 if not wrapped and not optional: 4997 self.raise_error("Expecting (") 4998 parse_result = parse_method() 4999 if wrapped: 5000 self._match_r_paren() 5001 return parse_result 5002 5003 def _parse_expressions(self) -> t.List[exp.Expression]: 5004 return self._parse_csv(self._parse_expression) 5005 5006 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5007 return self._parse_select() or self._parse_set_operations( 5008 self._parse_expression() if alias else self._parse_conjunction() 5009 ) 5010 5011 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5012 return self._parse_query_modifiers( 5013 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5014 ) 5015 5016 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5017 this = None 5018 if self._match_texts(self.TRANSACTION_KIND): 5019 this = self._prev.text 5020 5021 self._match_texts(("TRANSACTION", "WORK")) 5022 5023 modes = [] 5024 while True: 5025 mode = [] 5026 while self._match(TokenType.VAR): 5027 mode.append(self._prev.text) 5028 5029 if mode: 5030 modes.append(" ".join(mode)) 5031 if not self._match(TokenType.COMMA): 5032 break 5033 5034 return self.expression(exp.Transaction, this=this, modes=modes) 5035 5036 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5037 chain = None 5038 savepoint = None 5039 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5040 5041 self._match_texts(("TRANSACTION", "WORK")) 5042 5043 if self._match_text_seq("TO"): 5044 self._match_text_seq("SAVEPOINT") 5045 savepoint = self._parse_id_var() 5046 5047 if self._match(TokenType.AND): 5048 chain = not self._match_text_seq("NO") 5049 self._match_text_seq("CHAIN") 5050 5051 if is_rollback: 5052 return self.expression(exp.Rollback, savepoint=savepoint) 5053 5054 return self.expression(exp.Commit, chain=chain) 5055 5056 def _parse_refresh(self) -> exp.Refresh: 5057 self._match(TokenType.TABLE) 5058 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5059 5060 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5061 if not self._match_text_seq("ADD"): 5062 return None 5063 5064 self._match(TokenType.COLUMN) 5065 exists_column = self._parse_exists(not_=True) 5066 expression = self._parse_field_def() 5067 5068 if expression: 5069 expression.set("exists", exists_column) 5070 5071 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5072 if self._match_texts(("FIRST", "AFTER")): 5073 position = self._prev.text 5074 column_position = self.expression( 5075 exp.ColumnPosition, this=self._parse_column(), position=position 5076 ) 5077 expression.set("position", column_position) 5078 5079 return expression 5080 5081 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5082 drop = self._match(TokenType.DROP) and self._parse_drop() 5083 if drop and not isinstance(drop, exp.Command): 5084 drop.set("kind", drop.args.get("kind", "COLUMN")) 5085 return drop 5086 5087 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5088 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5089 return self.expression( 5090 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5091 ) 5092 5093 def _parse_add_constraint(self) -> exp.AddConstraint: 5094 this = None 5095 kind = self._prev.token_type 5096 5097 if kind == TokenType.CONSTRAINT: 5098 this = self._parse_id_var() 5099 5100 if self._match_text_seq("CHECK"): 5101 expression = self._parse_wrapped(self._parse_conjunction) 5102 enforced = self._match_text_seq("ENFORCED") 5103 5104 return self.expression( 5105 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5106 ) 5107 5108 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5109 expression = self._parse_foreign_key() 5110 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5111 expression = self._parse_primary_key() 5112 else: 5113 expression = None 5114 5115 return self.expression(exp.AddConstraint, this=this, expression=expression) 5116 5117 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5118 index = self._index - 1 5119 5120 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5121 return self._parse_csv(self._parse_add_constraint) 5122 5123 self._retreat(index) 5124 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5125 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5126 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5127 5128 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5129 self._match(TokenType.COLUMN) 5130 column = self._parse_field(any_token=True) 5131 5132 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5133 return self.expression(exp.AlterColumn, this=column, drop=True) 5134 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5135 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5136 5137 self._match_text_seq("SET", "DATA") 5138 return self.expression( 5139 exp.AlterColumn, 5140 this=column, 5141 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5142 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5143 using=self._match(TokenType.USING) and self._parse_conjunction(), 5144 ) 5145 5146 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5147 index = self._index - 1 5148 5149 partition_exists = self._parse_exists() 5150 if self._match(TokenType.PARTITION, advance=False): 5151 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5152 5153 self._retreat(index) 5154 return self._parse_csv(self._parse_drop_column) 5155 5156 def _parse_alter_table_rename(self) -> exp.RenameTable: 5157 self._match_text_seq("TO") 5158 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5159 5160 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5161 start = self._prev 5162 5163 if not self._match(TokenType.TABLE): 5164 return self._parse_as_command(start) 5165 5166 exists = self._parse_exists() 5167 only = self._match_text_seq("ONLY") 5168 this = self._parse_table(schema=True) 5169 5170 if self._next: 5171 self._advance() 5172 5173 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5174 if parser: 5175 actions = ensure_list(parser(self)) 5176 5177 if not self._curr: 5178 return self.expression( 5179 exp.AlterTable, 5180 this=this, 5181 exists=exists, 5182 actions=actions, 5183 only=only, 5184 ) 5185 5186 return self._parse_as_command(start) 5187 5188 def _parse_merge(self) -> exp.Merge: 5189 self._match(TokenType.INTO) 5190 target = self._parse_table() 5191 5192 if target and self._match(TokenType.ALIAS, advance=False): 5193 target.set("alias", self._parse_table_alias()) 5194 5195 self._match(TokenType.USING) 5196 using = self._parse_table() 5197 5198 self._match(TokenType.ON) 5199 on = self._parse_conjunction() 5200 5201 return self.expression( 5202 exp.Merge, 5203 this=target, 5204 using=using, 5205 on=on, 5206 expressions=self._parse_when_matched(), 5207 ) 5208 5209 def _parse_when_matched(self) -> t.List[exp.When]: 5210 whens = [] 5211 5212 while self._match(TokenType.WHEN): 5213 matched = not self._match(TokenType.NOT) 5214 self._match_text_seq("MATCHED") 5215 source = ( 5216 False 5217 if self._match_text_seq("BY", "TARGET") 5218 else self._match_text_seq("BY", "SOURCE") 5219 ) 5220 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5221 5222 self._match(TokenType.THEN) 5223 5224 if self._match(TokenType.INSERT): 5225 _this = self._parse_star() 5226 if _this: 5227 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5228 else: 5229 then = self.expression( 5230 exp.Insert, 5231 this=self._parse_value(), 5232 expression=self._match(TokenType.VALUES) and self._parse_value(), 5233 ) 5234 elif self._match(TokenType.UPDATE): 5235 expressions = self._parse_star() 5236 if expressions: 5237 then = self.expression(exp.Update, expressions=expressions) 5238 else: 5239 then = self.expression( 5240 exp.Update, 5241 expressions=self._match(TokenType.SET) 5242 and self._parse_csv(self._parse_equality), 5243 ) 5244 elif self._match(TokenType.DELETE): 5245 then = self.expression(exp.Var, this=self._prev.text) 5246 else: 5247 then = None 5248 5249 whens.append( 5250 self.expression( 5251 exp.When, 5252 matched=matched, 5253 source=source, 5254 condition=condition, 5255 then=then, 5256 ) 5257 ) 5258 return whens 5259 5260 def _parse_show(self) -> t.Optional[exp.Expression]: 5261 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5262 if parser: 5263 return parser(self) 5264 return self._parse_as_command(self._prev) 5265 5266 def _parse_set_item_assignment( 5267 self, kind: t.Optional[str] = None 5268 ) -> t.Optional[exp.Expression]: 5269 index = self._index 5270 5271 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5272 return self._parse_set_transaction(global_=kind == "GLOBAL") 5273 5274 left = self._parse_primary() or self._parse_id_var() 5275 assignment_delimiter = self._match_texts(("=", "TO")) 5276 5277 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5278 self._retreat(index) 5279 return None 5280 5281 right = self._parse_statement() or self._parse_id_var() 5282 this = self.expression(exp.EQ, this=left, expression=right) 5283 5284 return self.expression(exp.SetItem, this=this, kind=kind) 5285 5286 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5287 self._match_text_seq("TRANSACTION") 5288 characteristics = self._parse_csv( 5289 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5290 ) 5291 return self.expression( 5292 exp.SetItem, 5293 expressions=characteristics, 5294 kind="TRANSACTION", 5295 **{"global": global_}, # type: ignore 5296 ) 5297 5298 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5299 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5300 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5301 5302 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5303 index = self._index 5304 set_ = self.expression( 5305 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5306 ) 5307 5308 if self._curr: 5309 self._retreat(index) 5310 return self._parse_as_command(self._prev) 5311 5312 return set_ 5313 5314 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5315 for option in options: 5316 if self._match_text_seq(*option.split(" ")): 5317 return exp.var(option) 5318 return None 5319 5320 def _parse_as_command(self, start: Token) -> exp.Command: 5321 while self._curr: 5322 self._advance() 5323 text = self._find_sql(start, self._prev) 5324 size = len(start.text) 5325 return exp.Command(this=text[:size], expression=text[size:]) 5326 5327 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5328 settings = [] 5329 5330 self._match_l_paren() 5331 kind = self._parse_id_var() 5332 5333 if self._match(TokenType.L_PAREN): 5334 while True: 5335 key = self._parse_id_var() 5336 value = self._parse_primary() 5337 5338 if not key and value is None: 5339 break 5340 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5341 self._match(TokenType.R_PAREN) 5342 5343 self._match_r_paren() 5344 5345 return self.expression( 5346 exp.DictProperty, 5347 this=this, 5348 kind=kind.this if kind else None, 5349 settings=settings, 5350 ) 5351 5352 def _parse_dict_range(self, this: str) -> exp.DictRange: 5353 self._match_l_paren() 5354 has_min = self._match_text_seq("MIN") 5355 if has_min: 5356 min = self._parse_var() or self._parse_primary() 5357 self._match_text_seq("MAX") 5358 max = self._parse_var() or self._parse_primary() 5359 else: 5360 max = self._parse_var() or self._parse_primary() 5361 min = exp.Literal.number(0) 5362 self._match_r_paren() 5363 return self.expression(exp.DictRange, this=this, min=min, max=max) 5364 5365 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5366 index = self._index 5367 expression = self._parse_column() 5368 if not self._match(TokenType.IN): 5369 self._retreat(index - 1) 5370 return None 5371 iterator = self._parse_column() 5372 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5373 return self.expression( 5374 exp.Comprehension, 5375 this=this, 5376 expression=expression, 5377 iterator=iterator, 5378 condition=condition, 5379 ) 5380 5381 def _find_parser( 5382 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5383 ) -> t.Optional[t.Callable]: 5384 if not self._curr: 5385 return None 5386 5387 index = self._index 5388 this = [] 5389 while True: 5390 # The current token might be multiple words 5391 curr = self._curr.text.upper() 5392 key = curr.split(" ") 5393 this.append(curr) 5394 5395 self._advance() 5396 result, trie = in_trie(trie, key) 5397 if result == TrieResult.FAILED: 5398 break 5399 5400 if result == TrieResult.EXISTS: 5401 subparser = parsers[" ".join(this)] 5402 return subparser 5403 5404 self._retreat(index) 5405 return None 5406 5407 def _match(self, token_type, advance=True, expression=None): 5408 if not self._curr: 5409 return None 5410 5411 if self._curr.token_type == token_type: 5412 if advance: 5413 self._advance() 5414 self._add_comments(expression) 5415 return True 5416 5417 return None 5418 5419 def _match_set(self, types, advance=True): 5420 if not self._curr: 5421 return None 5422 5423 if self._curr.token_type in types: 5424 if advance: 5425 self._advance() 5426 return True 5427 5428 return None 5429 5430 def _match_pair(self, token_type_a, token_type_b, advance=True): 5431 if not self._curr or not self._next: 5432 return None 5433 5434 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5435 if advance: 5436 self._advance(2) 5437 return True 5438 5439 return None 5440 5441 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5442 if not self._match(TokenType.L_PAREN, expression=expression): 5443 self.raise_error("Expecting (") 5444 5445 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5446 if not self._match(TokenType.R_PAREN, expression=expression): 5447 self.raise_error("Expecting )") 5448 5449 def _match_texts(self, texts, advance=True): 5450 if self._curr and self._curr.text.upper() in texts: 5451 if advance: 5452 self._advance() 5453 return True 5454 return False 5455 5456 def _match_text_seq(self, *texts, advance=True): 5457 index = self._index 5458 for text in texts: 5459 if self._curr and self._curr.text.upper() == text: 5460 self._advance() 5461 else: 5462 self._retreat(index) 5463 return False 5464 5465 if not advance: 5466 self._retreat(index) 5467 5468 return True 5469 5470 @t.overload 5471 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5472 ... 5473 5474 @t.overload 5475 def _replace_columns_with_dots( 5476 self, this: t.Optional[exp.Expression] 5477 ) -> t.Optional[exp.Expression]: 5478 ... 5479 5480 def _replace_columns_with_dots(self, this): 5481 if isinstance(this, exp.Dot): 5482 exp.replace_children(this, self._replace_columns_with_dots) 5483 elif isinstance(this, exp.Column): 5484 exp.replace_children(this, self._replace_columns_with_dots) 5485 table = this.args.get("table") 5486 this = ( 5487 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5488 ) 5489 5490 return this 5491 5492 def _replace_lambda( 5493 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5494 ) -> t.Optional[exp.Expression]: 5495 if not node: 5496 return node 5497 5498 for column in node.find_all(exp.Column): 5499 if column.parts[0].name in lambda_variables: 5500 dot_or_id = column.to_dot() if column.table else column.this 5501 parent = column.parent 5502 5503 while isinstance(parent, exp.Dot): 5504 if not isinstance(parent.parent, exp.Dot): 5505 parent.replace(dot_or_id) 5506 break 5507 parent = parent.parent 5508 else: 5509 if column is node: 5510 node = dot_or_id 5511 else: 5512 column.replace(dot_or_id) 5513 return node 5514 5515 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5516 return [ 5517 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5518 for value in values 5519 if value 5520 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.CONSTRAINT, 250 TokenType.FUNCTION, 251 TokenType.INDEX, 252 TokenType.PROCEDURE, 253 TokenType.FOREIGN_KEY, 254 *DB_CREATABLES, 255 } 256 257 # Tokens that can represent identifiers 258 ID_VAR_TOKENS = { 259 TokenType.VAR, 260 TokenType.ANTI, 261 TokenType.APPLY, 262 TokenType.ASC, 263 TokenType.AUTO_INCREMENT, 264 TokenType.BEGIN, 265 TokenType.CACHE, 266 TokenType.CASE, 267 TokenType.COLLATE, 268 TokenType.COMMAND, 269 TokenType.COMMENT, 270 TokenType.COMMIT, 271 TokenType.CONSTRAINT, 272 TokenType.DEFAULT, 273 TokenType.DELETE, 274 TokenType.DESC, 275 TokenType.DESCRIBE, 276 TokenType.DICTIONARY, 277 TokenType.DIV, 278 TokenType.END, 279 TokenType.EXECUTE, 280 TokenType.ESCAPE, 281 TokenType.FALSE, 282 TokenType.FIRST, 283 TokenType.FILTER, 284 TokenType.FORMAT, 285 TokenType.FULL, 286 TokenType.IS, 287 TokenType.ISNULL, 288 TokenType.INTERVAL, 289 TokenType.KEEP, 290 TokenType.KILL, 291 TokenType.LEFT, 292 TokenType.LOAD, 293 TokenType.MERGE, 294 TokenType.NATURAL, 295 TokenType.NEXT, 296 TokenType.OFFSET, 297 TokenType.ORDINALITY, 298 TokenType.OVERLAPS, 299 TokenType.OVERWRITE, 300 TokenType.PARTITION, 301 TokenType.PERCENT, 302 TokenType.PIVOT, 303 TokenType.PRAGMA, 304 TokenType.RANGE, 305 TokenType.RECURSIVE, 306 TokenType.REFERENCES, 307 TokenType.REFRESH, 308 TokenType.RIGHT, 309 TokenType.ROW, 310 TokenType.ROWS, 311 TokenType.SEMI, 312 TokenType.SET, 313 TokenType.SETTINGS, 314 TokenType.SHOW, 315 TokenType.TEMPORARY, 316 TokenType.TOP, 317 TokenType.TRUE, 318 TokenType.UNIQUE, 319 TokenType.UNPIVOT, 320 TokenType.UPDATE, 321 TokenType.USE, 322 TokenType.VOLATILE, 323 TokenType.WINDOW, 324 *CREATABLES, 325 *SUBQUERY_PREDICATES, 326 *TYPE_TOKENS, 327 *NO_PAREN_FUNCTIONS, 328 } 329 330 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 331 332 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 333 TokenType.ANTI, 334 TokenType.APPLY, 335 TokenType.ASOF, 336 TokenType.FULL, 337 TokenType.LEFT, 338 TokenType.LOCK, 339 TokenType.NATURAL, 340 TokenType.OFFSET, 341 TokenType.RIGHT, 342 TokenType.SEMI, 343 TokenType.WINDOW, 344 } 345 346 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 347 348 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 349 350 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 351 352 FUNC_TOKENS = { 353 TokenType.COLLATE, 354 TokenType.COMMAND, 355 TokenType.CURRENT_DATE, 356 TokenType.CURRENT_DATETIME, 357 TokenType.CURRENT_TIMESTAMP, 358 TokenType.CURRENT_TIME, 359 TokenType.CURRENT_USER, 360 TokenType.FILTER, 361 TokenType.FIRST, 362 TokenType.FORMAT, 363 TokenType.GLOB, 364 TokenType.IDENTIFIER, 365 TokenType.INDEX, 366 TokenType.ISNULL, 367 TokenType.ILIKE, 368 TokenType.INSERT, 369 TokenType.LIKE, 370 TokenType.MERGE, 371 TokenType.OFFSET, 372 TokenType.PRIMARY_KEY, 373 TokenType.RANGE, 374 TokenType.REPLACE, 375 TokenType.RLIKE, 376 TokenType.ROW, 377 TokenType.UNNEST, 378 TokenType.VAR, 379 TokenType.LEFT, 380 TokenType.RIGHT, 381 TokenType.DATE, 382 TokenType.DATETIME, 383 TokenType.TABLE, 384 TokenType.TIMESTAMP, 385 TokenType.TIMESTAMPTZ, 386 TokenType.WINDOW, 387 TokenType.XOR, 388 *TYPE_TOKENS, 389 *SUBQUERY_PREDICATES, 390 } 391 392 CONJUNCTION = { 393 TokenType.AND: exp.And, 394 TokenType.OR: exp.Or, 395 } 396 397 EQUALITY = { 398 TokenType.COLON_EQ: exp.PropertyEQ, 399 TokenType.EQ: exp.EQ, 400 TokenType.NEQ: exp.NEQ, 401 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 402 } 403 404 COMPARISON = { 405 TokenType.GT: exp.GT, 406 TokenType.GTE: exp.GTE, 407 TokenType.LT: exp.LT, 408 TokenType.LTE: exp.LTE, 409 } 410 411 BITWISE = { 412 TokenType.AMP: exp.BitwiseAnd, 413 TokenType.CARET: exp.BitwiseXor, 414 TokenType.PIPE: exp.BitwiseOr, 415 TokenType.DPIPE: exp.DPipe, 416 } 417 418 TERM = { 419 TokenType.DASH: exp.Sub, 420 TokenType.PLUS: exp.Add, 421 TokenType.MOD: exp.Mod, 422 TokenType.COLLATE: exp.Collate, 423 } 424 425 FACTOR = { 426 TokenType.DIV: exp.IntDiv, 427 TokenType.LR_ARROW: exp.Distance, 428 TokenType.SLASH: exp.Div, 429 TokenType.STAR: exp.Mul, 430 } 431 432 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 433 434 TIMES = { 435 TokenType.TIME, 436 TokenType.TIMETZ, 437 } 438 439 TIMESTAMPS = { 440 TokenType.TIMESTAMP, 441 TokenType.TIMESTAMPTZ, 442 TokenType.TIMESTAMPLTZ, 443 *TIMES, 444 } 445 446 SET_OPERATIONS = { 447 TokenType.UNION, 448 TokenType.INTERSECT, 449 TokenType.EXCEPT, 450 } 451 452 JOIN_METHODS = { 453 TokenType.NATURAL, 454 TokenType.ASOF, 455 } 456 457 JOIN_SIDES = { 458 TokenType.LEFT, 459 TokenType.RIGHT, 460 TokenType.FULL, 461 } 462 463 JOIN_KINDS = { 464 TokenType.INNER, 465 TokenType.OUTER, 466 TokenType.CROSS, 467 TokenType.SEMI, 468 TokenType.ANTI, 469 } 470 471 JOIN_HINTS: t.Set[str] = set() 472 473 LAMBDAS = { 474 TokenType.ARROW: lambda self, expressions: self.expression( 475 exp.Lambda, 476 this=self._replace_lambda( 477 self._parse_conjunction(), 478 {node.name for node in expressions}, 479 ), 480 expressions=expressions, 481 ), 482 TokenType.FARROW: lambda self, expressions: self.expression( 483 exp.Kwarg, 484 this=exp.var(expressions[0].name), 485 expression=self._parse_conjunction(), 486 ), 487 } 488 489 COLUMN_OPERATORS = { 490 TokenType.DOT: None, 491 TokenType.DCOLON: lambda self, this, to: self.expression( 492 exp.Cast if self.STRICT_CAST else exp.TryCast, 493 this=this, 494 to=to, 495 ), 496 TokenType.ARROW: lambda self, this, path: self.expression( 497 exp.JSONExtract, 498 this=this, 499 expression=path, 500 ), 501 TokenType.DARROW: lambda self, this, path: self.expression( 502 exp.JSONExtractScalar, 503 this=this, 504 expression=path, 505 ), 506 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 507 exp.JSONBExtract, 508 this=this, 509 expression=path, 510 ), 511 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 512 exp.JSONBExtractScalar, 513 this=this, 514 expression=path, 515 ), 516 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 517 exp.JSONBContains, 518 this=this, 519 expression=key, 520 ), 521 } 522 523 EXPRESSION_PARSERS = { 524 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 525 exp.Column: lambda self: self._parse_column(), 526 exp.Condition: lambda self: self._parse_conjunction(), 527 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 528 exp.Expression: lambda self: self._parse_statement(), 529 exp.From: lambda self: self._parse_from(), 530 exp.Group: lambda self: self._parse_group(), 531 exp.Having: lambda self: self._parse_having(), 532 exp.Identifier: lambda self: self._parse_id_var(), 533 exp.Join: lambda self: self._parse_join(), 534 exp.Lambda: lambda self: self._parse_lambda(), 535 exp.Lateral: lambda self: self._parse_lateral(), 536 exp.Limit: lambda self: self._parse_limit(), 537 exp.Offset: lambda self: self._parse_offset(), 538 exp.Order: lambda self: self._parse_order(), 539 exp.Ordered: lambda self: self._parse_ordered(), 540 exp.Properties: lambda self: self._parse_properties(), 541 exp.Qualify: lambda self: self._parse_qualify(), 542 exp.Returning: lambda self: self._parse_returning(), 543 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 544 exp.Table: lambda self: self._parse_table_parts(), 545 exp.TableAlias: lambda self: self._parse_table_alias(), 546 exp.Where: lambda self: self._parse_where(), 547 exp.Window: lambda self: self._parse_named_window(), 548 exp.With: lambda self: self._parse_with(), 549 "JOIN_TYPE": lambda self: self._parse_join_parts(), 550 } 551 552 STATEMENT_PARSERS = { 553 TokenType.ALTER: lambda self: self._parse_alter(), 554 TokenType.BEGIN: lambda self: self._parse_transaction(), 555 TokenType.CACHE: lambda self: self._parse_cache(), 556 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 557 TokenType.COMMENT: lambda self: self._parse_comment(), 558 TokenType.CREATE: lambda self: self._parse_create(), 559 TokenType.DELETE: lambda self: self._parse_delete(), 560 TokenType.DESC: lambda self: self._parse_describe(), 561 TokenType.DESCRIBE: lambda self: self._parse_describe(), 562 TokenType.DROP: lambda self: self._parse_drop(), 563 TokenType.INSERT: lambda self: self._parse_insert(), 564 TokenType.KILL: lambda self: self._parse_kill(), 565 TokenType.LOAD: lambda self: self._parse_load(), 566 TokenType.MERGE: lambda self: self._parse_merge(), 567 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 568 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 569 TokenType.REFRESH: lambda self: self._parse_refresh(), 570 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 571 TokenType.SET: lambda self: self._parse_set(), 572 TokenType.UNCACHE: lambda self: self._parse_uncache(), 573 TokenType.UPDATE: lambda self: self._parse_update(), 574 TokenType.USE: lambda self: self.expression( 575 exp.Use, 576 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 577 and exp.var(self._prev.text), 578 this=self._parse_table(schema=False), 579 ), 580 } 581 582 UNARY_PARSERS = { 583 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 584 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 585 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 586 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 587 } 588 589 PRIMARY_PARSERS = { 590 TokenType.STRING: lambda self, token: self.expression( 591 exp.Literal, this=token.text, is_string=True 592 ), 593 TokenType.NUMBER: lambda self, token: self.expression( 594 exp.Literal, this=token.text, is_string=False 595 ), 596 TokenType.STAR: lambda self, _: self.expression( 597 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 598 ), 599 TokenType.NULL: lambda self, _: self.expression(exp.Null), 600 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 601 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 602 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 603 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 604 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 605 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 606 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 607 exp.National, this=token.text 608 ), 609 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 610 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 611 exp.RawString, this=token.text 612 ), 613 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 614 } 615 616 PLACEHOLDER_PARSERS = { 617 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 618 TokenType.PARAMETER: lambda self: self._parse_parameter(), 619 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 620 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 621 else None, 622 } 623 624 RANGE_PARSERS = { 625 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 626 TokenType.GLOB: binary_range_parser(exp.Glob), 627 TokenType.ILIKE: binary_range_parser(exp.ILike), 628 TokenType.IN: lambda self, this: self._parse_in(this), 629 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 630 TokenType.IS: lambda self, this: self._parse_is(this), 631 TokenType.LIKE: binary_range_parser(exp.Like), 632 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 633 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 634 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 635 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 636 } 637 638 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 639 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 640 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 641 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 642 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 643 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 644 "CHECKSUM": lambda self: self._parse_checksum(), 645 "CLUSTER BY": lambda self: self._parse_cluster(), 646 "CLUSTERED": lambda self: self._parse_clustered_by(), 647 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 648 exp.CollateProperty, **kwargs 649 ), 650 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 651 "COPY": lambda self: self._parse_copy_property(), 652 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 653 "DEFINER": lambda self: self._parse_definer(), 654 "DETERMINISTIC": lambda self: self.expression( 655 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 656 ), 657 "DISTKEY": lambda self: self._parse_distkey(), 658 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 659 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 660 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 661 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 662 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 663 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 664 "FREESPACE": lambda self: self._parse_freespace(), 665 "HEAP": lambda self: self.expression(exp.HeapProperty), 666 "IMMUTABLE": lambda self: self.expression( 667 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 668 ), 669 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 670 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 671 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 672 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 673 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 674 "LIKE": lambda self: self._parse_create_like(), 675 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 676 "LOCK": lambda self: self._parse_locking(), 677 "LOCKING": lambda self: self._parse_locking(), 678 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 679 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 680 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 681 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 682 "NO": lambda self: self._parse_no_property(), 683 "ON": lambda self: self._parse_on_property(), 684 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 685 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 686 "PARTITION": lambda self: self._parse_partitioned_of(), 687 "PARTITION BY": lambda self: self._parse_partitioned_by(), 688 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 689 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 690 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 691 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 692 "REMOTE": lambda self: self._parse_remote_with_connection(), 693 "RETURNS": lambda self: self._parse_returns(), 694 "ROW": lambda self: self._parse_row(), 695 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 696 "SAMPLE": lambda self: self.expression( 697 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 698 ), 699 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 700 "SETTINGS": lambda self: self.expression( 701 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 702 ), 703 "SORTKEY": lambda self: self._parse_sortkey(), 704 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 705 "STABLE": lambda self: self.expression( 706 exp.StabilityProperty, this=exp.Literal.string("STABLE") 707 ), 708 "STORED": lambda self: self._parse_stored(), 709 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 710 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 711 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 712 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 713 "TO": lambda self: self._parse_to_table(), 714 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 715 "TRANSFORM": lambda self: self.expression( 716 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 717 ), 718 "TTL": lambda self: self._parse_ttl(), 719 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 720 "VOLATILE": lambda self: self._parse_volatile_property(), 721 "WITH": lambda self: self._parse_with_property(), 722 } 723 724 CONSTRAINT_PARSERS = { 725 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 726 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 727 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 728 "CHARACTER SET": lambda self: self.expression( 729 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 730 ), 731 "CHECK": lambda self: self.expression( 732 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 733 ), 734 "COLLATE": lambda self: self.expression( 735 exp.CollateColumnConstraint, this=self._parse_var() 736 ), 737 "COMMENT": lambda self: self.expression( 738 exp.CommentColumnConstraint, this=self._parse_string() 739 ), 740 "COMPRESS": lambda self: self._parse_compress(), 741 "CLUSTERED": lambda self: self.expression( 742 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 743 ), 744 "NONCLUSTERED": lambda self: self.expression( 745 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 746 ), 747 "DEFAULT": lambda self: self.expression( 748 exp.DefaultColumnConstraint, this=self._parse_bitwise() 749 ), 750 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 751 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 752 "FORMAT": lambda self: self.expression( 753 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 754 ), 755 "GENERATED": lambda self: self._parse_generated_as_identity(), 756 "IDENTITY": lambda self: self._parse_auto_increment(), 757 "INLINE": lambda self: self._parse_inline(), 758 "LIKE": lambda self: self._parse_create_like(), 759 "NOT": lambda self: self._parse_not_constraint(), 760 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 761 "ON": lambda self: ( 762 self._match(TokenType.UPDATE) 763 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 764 ) 765 or self.expression(exp.OnProperty, this=self._parse_id_var()), 766 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 767 "PERIOD": lambda self: self._parse_period_for_system_time(), 768 "PRIMARY KEY": lambda self: self._parse_primary_key(), 769 "REFERENCES": lambda self: self._parse_references(match=False), 770 "TITLE": lambda self: self.expression( 771 exp.TitleColumnConstraint, this=self._parse_var_or_string() 772 ), 773 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 774 "UNIQUE": lambda self: self._parse_unique(), 775 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 776 "WITH": lambda self: self.expression( 777 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 778 ), 779 } 780 781 ALTER_PARSERS = { 782 "ADD": lambda self: self._parse_alter_table_add(), 783 "ALTER": lambda self: self._parse_alter_table_alter(), 784 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 785 "DROP": lambda self: self._parse_alter_table_drop(), 786 "RENAME": lambda self: self._parse_alter_table_rename(), 787 } 788 789 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 790 791 NO_PAREN_FUNCTION_PARSERS = { 792 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 793 "CASE": lambda self: self._parse_case(), 794 "IF": lambda self: self._parse_if(), 795 "NEXT": lambda self: self._parse_next_value_for(), 796 } 797 798 INVALID_FUNC_NAME_TOKENS = { 799 TokenType.IDENTIFIER, 800 TokenType.STRING, 801 } 802 803 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 804 805 FUNCTION_PARSERS = { 806 "ANY_VALUE": lambda self: self._parse_any_value(), 807 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 808 "CONCAT": lambda self: self._parse_concat(), 809 "CONCAT_WS": lambda self: self._parse_concat_ws(), 810 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 811 "DECODE": lambda self: self._parse_decode(), 812 "EXTRACT": lambda self: self._parse_extract(), 813 "JSON_OBJECT": lambda self: self._parse_json_object(), 814 "JSON_TABLE": lambda self: self._parse_json_table(), 815 "LOG": lambda self: self._parse_logarithm(), 816 "MATCH": lambda self: self._parse_match_against(), 817 "OPENJSON": lambda self: self._parse_open_json(), 818 "POSITION": lambda self: self._parse_position(), 819 "PREDICT": lambda self: self._parse_predict(), 820 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 821 "STRING_AGG": lambda self: self._parse_string_agg(), 822 "SUBSTRING": lambda self: self._parse_substring(), 823 "TRIM": lambda self: self._parse_trim(), 824 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 825 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 826 } 827 828 QUERY_MODIFIER_PARSERS = { 829 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 830 TokenType.WHERE: lambda self: ("where", self._parse_where()), 831 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 832 TokenType.HAVING: lambda self: ("having", self._parse_having()), 833 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 834 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 835 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 836 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 837 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 838 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 839 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 840 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 841 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 842 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 843 TokenType.CLUSTER_BY: lambda self: ( 844 "cluster", 845 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 846 ), 847 TokenType.DISTRIBUTE_BY: lambda self: ( 848 "distribute", 849 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 850 ), 851 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 852 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 853 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 854 } 855 856 SET_PARSERS = { 857 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 858 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 859 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 860 "TRANSACTION": lambda self: self._parse_set_transaction(), 861 } 862 863 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 864 865 TYPE_LITERAL_PARSERS = { 866 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 867 } 868 869 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 870 871 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 872 873 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 874 875 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 876 TRANSACTION_CHARACTERISTICS = { 877 "ISOLATION LEVEL REPEATABLE READ", 878 "ISOLATION LEVEL READ COMMITTED", 879 "ISOLATION LEVEL READ UNCOMMITTED", 880 "ISOLATION LEVEL SERIALIZABLE", 881 "READ WRITE", 882 "READ ONLY", 883 } 884 885 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 886 887 CLONE_KEYWORDS = {"CLONE", "COPY"} 888 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 889 890 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 891 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 892 893 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 894 895 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 896 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 897 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 898 899 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 900 901 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 902 903 DISTINCT_TOKENS = {TokenType.DISTINCT} 904 905 NULL_TOKENS = {TokenType.NULL} 906 907 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 908 909 STRICT_CAST = True 910 911 # A NULL arg in CONCAT yields NULL by default 912 CONCAT_NULL_OUTPUTS_STRING = False 913 914 PREFIXED_PIVOT_COLUMNS = False 915 IDENTIFY_PIVOT_STRINGS = False 916 917 LOG_BASE_FIRST = True 918 LOG_DEFAULTS_TO_LN = False 919 920 # Whether or not ADD is present for each column added by ALTER TABLE 921 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 922 923 # Whether or not the table sample clause expects CSV syntax 924 TABLESAMPLE_CSV = False 925 926 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 927 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 928 929 # Whether the TRIM function expects the characters to trim as its first argument 930 TRIM_PATTERN_FIRST = False 931 932 # Whether the behavior of a / b depends on the types of a and b. 933 # False means a / b is always float division. 934 # True means a / b is integer division if both a and b are integers. 935 TYPED_DIVISION = False 936 937 # False means 1 / 0 throws an error. 938 # True means 1 / 0 returns null. 939 SAFE_DIVISION = False 940 941 __slots__ = ( 942 "error_level", 943 "error_message_context", 944 "max_errors", 945 "sql", 946 "errors", 947 "_tokens", 948 "_index", 949 "_curr", 950 "_next", 951 "_prev", 952 "_prev_comments", 953 "_tokenizer", 954 ) 955 956 # Autofilled 957 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 958 INDEX_OFFSET: int = 0 959 UNNEST_COLUMN_ONLY: bool = False 960 ALIAS_POST_TABLESAMPLE: bool = False 961 STRICT_STRING_CONCAT = False 962 SUPPORTS_USER_DEFINED_TYPES = True 963 NORMALIZE_FUNCTIONS = "upper" 964 NULL_ORDERING: str = "nulls_are_small" 965 SHOW_TRIE: t.Dict = {} 966 SET_TRIE: t.Dict = {} 967 FORMAT_MAPPING: t.Dict[str, str] = {} 968 FORMAT_TRIE: t.Dict = {} 969 TIME_MAPPING: t.Dict[str, str] = {} 970 TIME_TRIE: t.Dict = {} 971 972 def __init__( 973 self, 974 error_level: t.Optional[ErrorLevel] = None, 975 error_message_context: int = 100, 976 max_errors: int = 3, 977 ): 978 self.error_level = error_level or ErrorLevel.IMMEDIATE 979 self.error_message_context = error_message_context 980 self.max_errors = max_errors 981 self._tokenizer = self.TOKENIZER_CLASS() 982 self.reset() 983 984 def reset(self): 985 self.sql = "" 986 self.errors = [] 987 self._tokens = [] 988 self._index = 0 989 self._curr = None 990 self._next = None 991 self._prev = None 992 self._prev_comments = None 993 994 def parse( 995 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 996 ) -> t.List[t.Optional[exp.Expression]]: 997 """ 998 Parses a list of tokens and returns a list of syntax trees, one tree 999 per parsed SQL statement. 1000 1001 Args: 1002 raw_tokens: The list of tokens. 1003 sql: The original SQL string, used to produce helpful debug messages. 1004 1005 Returns: 1006 The list of the produced syntax trees. 1007 """ 1008 return self._parse( 1009 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1010 ) 1011 1012 def parse_into( 1013 self, 1014 expression_types: exp.IntoType, 1015 raw_tokens: t.List[Token], 1016 sql: t.Optional[str] = None, 1017 ) -> t.List[t.Optional[exp.Expression]]: 1018 """ 1019 Parses a list of tokens into a given Expression type. If a collection of Expression 1020 types is given instead, this method will try to parse the token list into each one 1021 of them, stopping at the first for which the parsing succeeds. 1022 1023 Args: 1024 expression_types: The expression type(s) to try and parse the token list into. 1025 raw_tokens: The list of tokens. 1026 sql: The original SQL string, used to produce helpful debug messages. 1027 1028 Returns: 1029 The target Expression. 1030 """ 1031 errors = [] 1032 for expression_type in ensure_list(expression_types): 1033 parser = self.EXPRESSION_PARSERS.get(expression_type) 1034 if not parser: 1035 raise TypeError(f"No parser registered for {expression_type}") 1036 1037 try: 1038 return self._parse(parser, raw_tokens, sql) 1039 except ParseError as e: 1040 e.errors[0]["into_expression"] = expression_type 1041 errors.append(e) 1042 1043 raise ParseError( 1044 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1045 errors=merge_errors(errors), 1046 ) from errors[-1] 1047 1048 def _parse( 1049 self, 1050 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1051 raw_tokens: t.List[Token], 1052 sql: t.Optional[str] = None, 1053 ) -> t.List[t.Optional[exp.Expression]]: 1054 self.reset() 1055 self.sql = sql or "" 1056 1057 total = len(raw_tokens) 1058 chunks: t.List[t.List[Token]] = [[]] 1059 1060 for i, token in enumerate(raw_tokens): 1061 if token.token_type == TokenType.SEMICOLON: 1062 if i < total - 1: 1063 chunks.append([]) 1064 else: 1065 chunks[-1].append(token) 1066 1067 expressions = [] 1068 1069 for tokens in chunks: 1070 self._index = -1 1071 self._tokens = tokens 1072 self._advance() 1073 1074 expressions.append(parse_method(self)) 1075 1076 if self._index < len(self._tokens): 1077 self.raise_error("Invalid expression / Unexpected token") 1078 1079 self.check_errors() 1080 1081 return expressions 1082 1083 def check_errors(self) -> None: 1084 """Logs or raises any found errors, depending on the chosen error level setting.""" 1085 if self.error_level == ErrorLevel.WARN: 1086 for error in self.errors: 1087 logger.error(str(error)) 1088 elif self.error_level == ErrorLevel.RAISE and self.errors: 1089 raise ParseError( 1090 concat_messages(self.errors, self.max_errors), 1091 errors=merge_errors(self.errors), 1092 ) 1093 1094 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1095 """ 1096 Appends an error in the list of recorded errors or raises it, depending on the chosen 1097 error level setting. 1098 """ 1099 token = token or self._curr or self._prev or Token.string("") 1100 start = token.start 1101 end = token.end + 1 1102 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1103 highlight = self.sql[start:end] 1104 end_context = self.sql[end : end + self.error_message_context] 1105 1106 error = ParseError.new( 1107 f"{message}. Line {token.line}, Col: {token.col}.\n" 1108 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1109 description=message, 1110 line=token.line, 1111 col=token.col, 1112 start_context=start_context, 1113 highlight=highlight, 1114 end_context=end_context, 1115 ) 1116 1117 if self.error_level == ErrorLevel.IMMEDIATE: 1118 raise error 1119 1120 self.errors.append(error) 1121 1122 def expression( 1123 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1124 ) -> E: 1125 """ 1126 Creates a new, validated Expression. 1127 1128 Args: 1129 exp_class: The expression class to instantiate. 1130 comments: An optional list of comments to attach to the expression. 1131 kwargs: The arguments to set for the expression along with their respective values. 1132 1133 Returns: 1134 The target expression. 1135 """ 1136 instance = exp_class(**kwargs) 1137 instance.add_comments(comments) if comments else self._add_comments(instance) 1138 return self.validate_expression(instance) 1139 1140 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1141 if expression and self._prev_comments: 1142 expression.add_comments(self._prev_comments) 1143 self._prev_comments = None 1144 1145 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1146 """ 1147 Validates an Expression, making sure that all its mandatory arguments are set. 1148 1149 Args: 1150 expression: The expression to validate. 1151 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1152 1153 Returns: 1154 The validated expression. 1155 """ 1156 if self.error_level != ErrorLevel.IGNORE: 1157 for error_message in expression.error_messages(args): 1158 self.raise_error(error_message) 1159 1160 return expression 1161 1162 def _find_sql(self, start: Token, end: Token) -> str: 1163 return self.sql[start.start : end.end + 1] 1164 1165 def _advance(self, times: int = 1) -> None: 1166 self._index += times 1167 self._curr = seq_get(self._tokens, self._index) 1168 self._next = seq_get(self._tokens, self._index + 1) 1169 1170 if self._index > 0: 1171 self._prev = self._tokens[self._index - 1] 1172 self._prev_comments = self._prev.comments 1173 else: 1174 self._prev = None 1175 self._prev_comments = None 1176 1177 def _retreat(self, index: int) -> None: 1178 if index != self._index: 1179 self._advance(index - self._index) 1180 1181 def _parse_command(self) -> exp.Command: 1182 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1183 1184 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1185 start = self._prev 1186 exists = self._parse_exists() if allow_exists else None 1187 1188 self._match(TokenType.ON) 1189 1190 kind = self._match_set(self.CREATABLES) and self._prev 1191 if not kind: 1192 return self._parse_as_command(start) 1193 1194 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1195 this = self._parse_user_defined_function(kind=kind.token_type) 1196 elif kind.token_type == TokenType.TABLE: 1197 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1198 elif kind.token_type == TokenType.COLUMN: 1199 this = self._parse_column() 1200 else: 1201 this = self._parse_id_var() 1202 1203 self._match(TokenType.IS) 1204 1205 return self.expression( 1206 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1207 ) 1208 1209 def _parse_to_table( 1210 self, 1211 ) -> exp.ToTableProperty: 1212 table = self._parse_table_parts(schema=True) 1213 return self.expression(exp.ToTableProperty, this=table) 1214 1215 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1216 def _parse_ttl(self) -> exp.Expression: 1217 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1218 this = self._parse_bitwise() 1219 1220 if self._match_text_seq("DELETE"): 1221 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1222 if self._match_text_seq("RECOMPRESS"): 1223 return self.expression( 1224 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1225 ) 1226 if self._match_text_seq("TO", "DISK"): 1227 return self.expression( 1228 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1229 ) 1230 if self._match_text_seq("TO", "VOLUME"): 1231 return self.expression( 1232 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1233 ) 1234 1235 return this 1236 1237 expressions = self._parse_csv(_parse_ttl_action) 1238 where = self._parse_where() 1239 group = self._parse_group() 1240 1241 aggregates = None 1242 if group and self._match(TokenType.SET): 1243 aggregates = self._parse_csv(self._parse_set_item) 1244 1245 return self.expression( 1246 exp.MergeTreeTTL, 1247 expressions=expressions, 1248 where=where, 1249 group=group, 1250 aggregates=aggregates, 1251 ) 1252 1253 def _parse_statement(self) -> t.Optional[exp.Expression]: 1254 if self._curr is None: 1255 return None 1256 1257 if self._match_set(self.STATEMENT_PARSERS): 1258 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1259 1260 if self._match_set(Tokenizer.COMMANDS): 1261 return self._parse_command() 1262 1263 expression = self._parse_expression() 1264 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1265 return self._parse_query_modifiers(expression) 1266 1267 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1268 start = self._prev 1269 temporary = self._match(TokenType.TEMPORARY) 1270 materialized = self._match_text_seq("MATERIALIZED") 1271 1272 kind = self._match_set(self.CREATABLES) and self._prev.text 1273 if not kind: 1274 return self._parse_as_command(start) 1275 1276 return self.expression( 1277 exp.Drop, 1278 comments=start.comments, 1279 exists=exists or self._parse_exists(), 1280 this=self._parse_table(schema=True), 1281 kind=kind, 1282 temporary=temporary, 1283 materialized=materialized, 1284 cascade=self._match_text_seq("CASCADE"), 1285 constraints=self._match_text_seq("CONSTRAINTS"), 1286 purge=self._match_text_seq("PURGE"), 1287 ) 1288 1289 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1290 return ( 1291 self._match_text_seq("IF") 1292 and (not not_ or self._match(TokenType.NOT)) 1293 and self._match(TokenType.EXISTS) 1294 ) 1295 1296 def _parse_create(self) -> exp.Create | exp.Command: 1297 # Note: this can't be None because we've matched a statement parser 1298 start = self._prev 1299 comments = self._prev_comments 1300 1301 replace = start.text.upper() == "REPLACE" or self._match_pair( 1302 TokenType.OR, TokenType.REPLACE 1303 ) 1304 unique = self._match(TokenType.UNIQUE) 1305 1306 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1307 self._advance() 1308 1309 properties = None 1310 create_token = self._match_set(self.CREATABLES) and self._prev 1311 1312 if not create_token: 1313 # exp.Properties.Location.POST_CREATE 1314 properties = self._parse_properties() 1315 create_token = self._match_set(self.CREATABLES) and self._prev 1316 1317 if not properties or not create_token: 1318 return self._parse_as_command(start) 1319 1320 exists = self._parse_exists(not_=True) 1321 this = None 1322 expression: t.Optional[exp.Expression] = None 1323 indexes = None 1324 no_schema_binding = None 1325 begin = None 1326 end = None 1327 clone = None 1328 1329 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1330 nonlocal properties 1331 if properties and temp_props: 1332 properties.expressions.extend(temp_props.expressions) 1333 elif temp_props: 1334 properties = temp_props 1335 1336 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1337 this = self._parse_user_defined_function(kind=create_token.token_type) 1338 1339 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1340 extend_props(self._parse_properties()) 1341 1342 self._match(TokenType.ALIAS) 1343 1344 if self._match(TokenType.COMMAND): 1345 expression = self._parse_as_command(self._prev) 1346 else: 1347 begin = self._match(TokenType.BEGIN) 1348 return_ = self._match_text_seq("RETURN") 1349 1350 if self._match(TokenType.STRING, advance=False): 1351 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1352 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1353 expression = self._parse_string() 1354 extend_props(self._parse_properties()) 1355 else: 1356 expression = self._parse_statement() 1357 1358 end = self._match_text_seq("END") 1359 1360 if return_: 1361 expression = self.expression(exp.Return, this=expression) 1362 elif create_token.token_type == TokenType.INDEX: 1363 this = self._parse_index(index=self._parse_id_var()) 1364 elif create_token.token_type in self.DB_CREATABLES: 1365 table_parts = self._parse_table_parts(schema=True) 1366 1367 # exp.Properties.Location.POST_NAME 1368 self._match(TokenType.COMMA) 1369 extend_props(self._parse_properties(before=True)) 1370 1371 this = self._parse_schema(this=table_parts) 1372 1373 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1374 extend_props(self._parse_properties()) 1375 1376 self._match(TokenType.ALIAS) 1377 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1378 # exp.Properties.Location.POST_ALIAS 1379 extend_props(self._parse_properties()) 1380 1381 expression = self._parse_ddl_select() 1382 1383 if create_token.token_type == TokenType.TABLE: 1384 # exp.Properties.Location.POST_EXPRESSION 1385 extend_props(self._parse_properties()) 1386 1387 indexes = [] 1388 while True: 1389 index = self._parse_index() 1390 1391 # exp.Properties.Location.POST_INDEX 1392 extend_props(self._parse_properties()) 1393 1394 if not index: 1395 break 1396 else: 1397 self._match(TokenType.COMMA) 1398 indexes.append(index) 1399 elif create_token.token_type == TokenType.VIEW: 1400 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1401 no_schema_binding = True 1402 1403 shallow = self._match_text_seq("SHALLOW") 1404 1405 if self._match_texts(self.CLONE_KEYWORDS): 1406 copy = self._prev.text.lower() == "copy" 1407 clone = self._parse_table(schema=True) 1408 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1409 clone_kind = ( 1410 self._match(TokenType.L_PAREN) 1411 and self._match_texts(self.CLONE_KINDS) 1412 and self._prev.text.upper() 1413 ) 1414 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1415 self._match(TokenType.R_PAREN) 1416 clone = self.expression( 1417 exp.Clone, 1418 this=clone, 1419 when=when, 1420 kind=clone_kind, 1421 shallow=shallow, 1422 expression=clone_expression, 1423 copy=copy, 1424 ) 1425 1426 return self.expression( 1427 exp.Create, 1428 comments=comments, 1429 this=this, 1430 kind=create_token.text, 1431 replace=replace, 1432 unique=unique, 1433 expression=expression, 1434 exists=exists, 1435 properties=properties, 1436 indexes=indexes, 1437 no_schema_binding=no_schema_binding, 1438 begin=begin, 1439 end=end, 1440 clone=clone, 1441 ) 1442 1443 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1444 # only used for teradata currently 1445 self._match(TokenType.COMMA) 1446 1447 kwargs = { 1448 "no": self._match_text_seq("NO"), 1449 "dual": self._match_text_seq("DUAL"), 1450 "before": self._match_text_seq("BEFORE"), 1451 "default": self._match_text_seq("DEFAULT"), 1452 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1453 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1454 "after": self._match_text_seq("AFTER"), 1455 "minimum": self._match_texts(("MIN", "MINIMUM")), 1456 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1457 } 1458 1459 if self._match_texts(self.PROPERTY_PARSERS): 1460 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1461 try: 1462 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1463 except TypeError: 1464 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1465 1466 return None 1467 1468 def _parse_property(self) -> t.Optional[exp.Expression]: 1469 if self._match_texts(self.PROPERTY_PARSERS): 1470 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1471 1472 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1473 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1474 1475 if self._match_text_seq("COMPOUND", "SORTKEY"): 1476 return self._parse_sortkey(compound=True) 1477 1478 if self._match_text_seq("SQL", "SECURITY"): 1479 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1480 1481 index = self._index 1482 key = self._parse_column() 1483 1484 if not self._match(TokenType.EQ): 1485 self._retreat(index) 1486 return None 1487 1488 return self.expression( 1489 exp.Property, 1490 this=key.to_dot() if isinstance(key, exp.Column) else key, 1491 value=self._parse_column() or self._parse_var(any_token=True), 1492 ) 1493 1494 def _parse_stored(self) -> exp.FileFormatProperty: 1495 self._match(TokenType.ALIAS) 1496 1497 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1498 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1499 1500 return self.expression( 1501 exp.FileFormatProperty, 1502 this=self.expression( 1503 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1504 ) 1505 if input_format or output_format 1506 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1507 ) 1508 1509 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1510 self._match(TokenType.EQ) 1511 self._match(TokenType.ALIAS) 1512 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1513 1514 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1515 properties = [] 1516 while True: 1517 if before: 1518 prop = self._parse_property_before() 1519 else: 1520 prop = self._parse_property() 1521 1522 if not prop: 1523 break 1524 for p in ensure_list(prop): 1525 properties.append(p) 1526 1527 if properties: 1528 return self.expression(exp.Properties, expressions=properties) 1529 1530 return None 1531 1532 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1533 return self.expression( 1534 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1535 ) 1536 1537 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1538 if self._index >= 2: 1539 pre_volatile_token = self._tokens[self._index - 2] 1540 else: 1541 pre_volatile_token = None 1542 1543 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1544 return exp.VolatileProperty() 1545 1546 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1547 1548 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1549 self._match_pair(TokenType.EQ, TokenType.ON) 1550 1551 prop = self.expression(exp.WithSystemVersioningProperty) 1552 if self._match(TokenType.L_PAREN): 1553 self._match_text_seq("HISTORY_TABLE", "=") 1554 prop.set("this", self._parse_table_parts()) 1555 1556 if self._match(TokenType.COMMA): 1557 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1558 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1559 1560 self._match_r_paren() 1561 1562 return prop 1563 1564 def _parse_with_property( 1565 self, 1566 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1567 if self._match(TokenType.L_PAREN, advance=False): 1568 return self._parse_wrapped_csv(self._parse_property) 1569 1570 if self._match_text_seq("JOURNAL"): 1571 return self._parse_withjournaltable() 1572 1573 if self._match_text_seq("DATA"): 1574 return self._parse_withdata(no=False) 1575 elif self._match_text_seq("NO", "DATA"): 1576 return self._parse_withdata(no=True) 1577 1578 if not self._next: 1579 return None 1580 1581 return self._parse_withisolatedloading() 1582 1583 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1584 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1585 self._match(TokenType.EQ) 1586 1587 user = self._parse_id_var() 1588 self._match(TokenType.PARAMETER) 1589 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1590 1591 if not user or not host: 1592 return None 1593 1594 return exp.DefinerProperty(this=f"{user}@{host}") 1595 1596 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1597 self._match(TokenType.TABLE) 1598 self._match(TokenType.EQ) 1599 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1600 1601 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1602 return self.expression(exp.LogProperty, no=no) 1603 1604 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1605 return self.expression(exp.JournalProperty, **kwargs) 1606 1607 def _parse_checksum(self) -> exp.ChecksumProperty: 1608 self._match(TokenType.EQ) 1609 1610 on = None 1611 if self._match(TokenType.ON): 1612 on = True 1613 elif self._match_text_seq("OFF"): 1614 on = False 1615 1616 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1617 1618 def _parse_cluster(self) -> exp.Cluster: 1619 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1620 1621 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1622 self._match_text_seq("BY") 1623 1624 self._match_l_paren() 1625 expressions = self._parse_csv(self._parse_column) 1626 self._match_r_paren() 1627 1628 if self._match_text_seq("SORTED", "BY"): 1629 self._match_l_paren() 1630 sorted_by = self._parse_csv(self._parse_ordered) 1631 self._match_r_paren() 1632 else: 1633 sorted_by = None 1634 1635 self._match(TokenType.INTO) 1636 buckets = self._parse_number() 1637 self._match_text_seq("BUCKETS") 1638 1639 return self.expression( 1640 exp.ClusteredByProperty, 1641 expressions=expressions, 1642 sorted_by=sorted_by, 1643 buckets=buckets, 1644 ) 1645 1646 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1647 if not self._match_text_seq("GRANTS"): 1648 self._retreat(self._index - 1) 1649 return None 1650 1651 return self.expression(exp.CopyGrantsProperty) 1652 1653 def _parse_freespace(self) -> exp.FreespaceProperty: 1654 self._match(TokenType.EQ) 1655 return self.expression( 1656 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1657 ) 1658 1659 def _parse_mergeblockratio( 1660 self, no: bool = False, default: bool = False 1661 ) -> exp.MergeBlockRatioProperty: 1662 if self._match(TokenType.EQ): 1663 return self.expression( 1664 exp.MergeBlockRatioProperty, 1665 this=self._parse_number(), 1666 percent=self._match(TokenType.PERCENT), 1667 ) 1668 1669 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1670 1671 def _parse_datablocksize( 1672 self, 1673 default: t.Optional[bool] = None, 1674 minimum: t.Optional[bool] = None, 1675 maximum: t.Optional[bool] = None, 1676 ) -> exp.DataBlocksizeProperty: 1677 self._match(TokenType.EQ) 1678 size = self._parse_number() 1679 1680 units = None 1681 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1682 units = self._prev.text 1683 1684 return self.expression( 1685 exp.DataBlocksizeProperty, 1686 size=size, 1687 units=units, 1688 default=default, 1689 minimum=minimum, 1690 maximum=maximum, 1691 ) 1692 1693 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1694 self._match(TokenType.EQ) 1695 always = self._match_text_seq("ALWAYS") 1696 manual = self._match_text_seq("MANUAL") 1697 never = self._match_text_seq("NEVER") 1698 default = self._match_text_seq("DEFAULT") 1699 1700 autotemp = None 1701 if self._match_text_seq("AUTOTEMP"): 1702 autotemp = self._parse_schema() 1703 1704 return self.expression( 1705 exp.BlockCompressionProperty, 1706 always=always, 1707 manual=manual, 1708 never=never, 1709 default=default, 1710 autotemp=autotemp, 1711 ) 1712 1713 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1714 no = self._match_text_seq("NO") 1715 concurrent = self._match_text_seq("CONCURRENT") 1716 self._match_text_seq("ISOLATED", "LOADING") 1717 for_all = self._match_text_seq("FOR", "ALL") 1718 for_insert = self._match_text_seq("FOR", "INSERT") 1719 for_none = self._match_text_seq("FOR", "NONE") 1720 return self.expression( 1721 exp.IsolatedLoadingProperty, 1722 no=no, 1723 concurrent=concurrent, 1724 for_all=for_all, 1725 for_insert=for_insert, 1726 for_none=for_none, 1727 ) 1728 1729 def _parse_locking(self) -> exp.LockingProperty: 1730 if self._match(TokenType.TABLE): 1731 kind = "TABLE" 1732 elif self._match(TokenType.VIEW): 1733 kind = "VIEW" 1734 elif self._match(TokenType.ROW): 1735 kind = "ROW" 1736 elif self._match_text_seq("DATABASE"): 1737 kind = "DATABASE" 1738 else: 1739 kind = None 1740 1741 if kind in ("DATABASE", "TABLE", "VIEW"): 1742 this = self._parse_table_parts() 1743 else: 1744 this = None 1745 1746 if self._match(TokenType.FOR): 1747 for_or_in = "FOR" 1748 elif self._match(TokenType.IN): 1749 for_or_in = "IN" 1750 else: 1751 for_or_in = None 1752 1753 if self._match_text_seq("ACCESS"): 1754 lock_type = "ACCESS" 1755 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1756 lock_type = "EXCLUSIVE" 1757 elif self._match_text_seq("SHARE"): 1758 lock_type = "SHARE" 1759 elif self._match_text_seq("READ"): 1760 lock_type = "READ" 1761 elif self._match_text_seq("WRITE"): 1762 lock_type = "WRITE" 1763 elif self._match_text_seq("CHECKSUM"): 1764 lock_type = "CHECKSUM" 1765 else: 1766 lock_type = None 1767 1768 override = self._match_text_seq("OVERRIDE") 1769 1770 return self.expression( 1771 exp.LockingProperty, 1772 this=this, 1773 kind=kind, 1774 for_or_in=for_or_in, 1775 lock_type=lock_type, 1776 override=override, 1777 ) 1778 1779 def _parse_partition_by(self) -> t.List[exp.Expression]: 1780 if self._match(TokenType.PARTITION_BY): 1781 return self._parse_csv(self._parse_conjunction) 1782 return [] 1783 1784 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1785 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1786 if self._match_text_seq("MINVALUE"): 1787 return exp.var("MINVALUE") 1788 if self._match_text_seq("MAXVALUE"): 1789 return exp.var("MAXVALUE") 1790 return self._parse_bitwise() 1791 1792 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1793 expression = None 1794 from_expressions = None 1795 to_expressions = None 1796 1797 if self._match(TokenType.IN): 1798 this = self._parse_wrapped_csv(self._parse_bitwise) 1799 elif self._match(TokenType.FROM): 1800 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1801 self._match_text_seq("TO") 1802 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1803 elif self._match_text_seq("WITH", "(", "MODULUS"): 1804 this = self._parse_number() 1805 self._match_text_seq(",", "REMAINDER") 1806 expression = self._parse_number() 1807 self._match_r_paren() 1808 else: 1809 self.raise_error("Failed to parse partition bound spec.") 1810 1811 return self.expression( 1812 exp.PartitionBoundSpec, 1813 this=this, 1814 expression=expression, 1815 from_expressions=from_expressions, 1816 to_expressions=to_expressions, 1817 ) 1818 1819 # https://www.postgresql.org/docs/current/sql-createtable.html 1820 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1821 if not self._match_text_seq("OF"): 1822 self._retreat(self._index - 1) 1823 return None 1824 1825 this = self._parse_table(schema=True) 1826 1827 if self._match(TokenType.DEFAULT): 1828 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1829 elif self._match_text_seq("FOR", "VALUES"): 1830 expression = self._parse_partition_bound_spec() 1831 else: 1832 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1833 1834 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1835 1836 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1837 self._match(TokenType.EQ) 1838 return self.expression( 1839 exp.PartitionedByProperty, 1840 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1841 ) 1842 1843 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1844 if self._match_text_seq("AND", "STATISTICS"): 1845 statistics = True 1846 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1847 statistics = False 1848 else: 1849 statistics = None 1850 1851 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1852 1853 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1854 if self._match_text_seq("PRIMARY", "INDEX"): 1855 return exp.NoPrimaryIndexProperty() 1856 return None 1857 1858 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1859 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1860 return exp.OnCommitProperty() 1861 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1862 return exp.OnCommitProperty(delete=True) 1863 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1864 1865 def _parse_distkey(self) -> exp.DistKeyProperty: 1866 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1867 1868 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1869 table = self._parse_table(schema=True) 1870 1871 options = [] 1872 while self._match_texts(("INCLUDING", "EXCLUDING")): 1873 this = self._prev.text.upper() 1874 1875 id_var = self._parse_id_var() 1876 if not id_var: 1877 return None 1878 1879 options.append( 1880 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1881 ) 1882 1883 return self.expression(exp.LikeProperty, this=table, expressions=options) 1884 1885 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1886 return self.expression( 1887 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1888 ) 1889 1890 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1891 self._match(TokenType.EQ) 1892 return self.expression( 1893 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1894 ) 1895 1896 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1897 self._match_text_seq("WITH", "CONNECTION") 1898 return self.expression( 1899 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1900 ) 1901 1902 def _parse_returns(self) -> exp.ReturnsProperty: 1903 value: t.Optional[exp.Expression] 1904 is_table = self._match(TokenType.TABLE) 1905 1906 if is_table: 1907 if self._match(TokenType.LT): 1908 value = self.expression( 1909 exp.Schema, 1910 this="TABLE", 1911 expressions=self._parse_csv(self._parse_struct_types), 1912 ) 1913 if not self._match(TokenType.GT): 1914 self.raise_error("Expecting >") 1915 else: 1916 value = self._parse_schema(exp.var("TABLE")) 1917 else: 1918 value = self._parse_types() 1919 1920 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1921 1922 def _parse_describe(self) -> exp.Describe: 1923 kind = self._match_set(self.CREATABLES) and self._prev.text 1924 this = self._parse_table(schema=True) 1925 properties = self._parse_properties() 1926 expressions = properties.expressions if properties else None 1927 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1928 1929 def _parse_insert(self) -> exp.Insert: 1930 comments = ensure_list(self._prev_comments) 1931 overwrite = self._match(TokenType.OVERWRITE) 1932 ignore = self._match(TokenType.IGNORE) 1933 local = self._match_text_seq("LOCAL") 1934 alternative = None 1935 1936 if self._match_text_seq("DIRECTORY"): 1937 this: t.Optional[exp.Expression] = self.expression( 1938 exp.Directory, 1939 this=self._parse_var_or_string(), 1940 local=local, 1941 row_format=self._parse_row_format(match_row=True), 1942 ) 1943 else: 1944 if self._match(TokenType.OR): 1945 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1946 1947 self._match(TokenType.INTO) 1948 comments += ensure_list(self._prev_comments) 1949 self._match(TokenType.TABLE) 1950 this = self._parse_table(schema=True) 1951 1952 returning = self._parse_returning() 1953 1954 return self.expression( 1955 exp.Insert, 1956 comments=comments, 1957 this=this, 1958 by_name=self._match_text_seq("BY", "NAME"), 1959 exists=self._parse_exists(), 1960 partition=self._parse_partition(), 1961 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1962 and self._parse_conjunction(), 1963 expression=self._parse_ddl_select(), 1964 conflict=self._parse_on_conflict(), 1965 returning=returning or self._parse_returning(), 1966 overwrite=overwrite, 1967 alternative=alternative, 1968 ignore=ignore, 1969 ) 1970 1971 def _parse_kill(self) -> exp.Kill: 1972 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1973 1974 return self.expression( 1975 exp.Kill, 1976 this=self._parse_primary(), 1977 kind=kind, 1978 ) 1979 1980 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1981 conflict = self._match_text_seq("ON", "CONFLICT") 1982 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1983 1984 if not conflict and not duplicate: 1985 return None 1986 1987 nothing = None 1988 expressions = None 1989 key = None 1990 constraint = None 1991 1992 if conflict: 1993 if self._match_text_seq("ON", "CONSTRAINT"): 1994 constraint = self._parse_id_var() 1995 else: 1996 key = self._parse_csv(self._parse_value) 1997 1998 self._match_text_seq("DO") 1999 if self._match_text_seq("NOTHING"): 2000 nothing = True 2001 else: 2002 self._match(TokenType.UPDATE) 2003 self._match(TokenType.SET) 2004 expressions = self._parse_csv(self._parse_equality) 2005 2006 return self.expression( 2007 exp.OnConflict, 2008 duplicate=duplicate, 2009 expressions=expressions, 2010 nothing=nothing, 2011 key=key, 2012 constraint=constraint, 2013 ) 2014 2015 def _parse_returning(self) -> t.Optional[exp.Returning]: 2016 if not self._match(TokenType.RETURNING): 2017 return None 2018 return self.expression( 2019 exp.Returning, 2020 expressions=self._parse_csv(self._parse_expression), 2021 into=self._match(TokenType.INTO) and self._parse_table_part(), 2022 ) 2023 2024 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2025 if not self._match(TokenType.FORMAT): 2026 return None 2027 return self._parse_row_format() 2028 2029 def _parse_row_format( 2030 self, match_row: bool = False 2031 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2032 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2033 return None 2034 2035 if self._match_text_seq("SERDE"): 2036 this = self._parse_string() 2037 2038 serde_properties = None 2039 if self._match(TokenType.SERDE_PROPERTIES): 2040 serde_properties = self.expression( 2041 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2042 ) 2043 2044 return self.expression( 2045 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2046 ) 2047 2048 self._match_text_seq("DELIMITED") 2049 2050 kwargs = {} 2051 2052 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2053 kwargs["fields"] = self._parse_string() 2054 if self._match_text_seq("ESCAPED", "BY"): 2055 kwargs["escaped"] = self._parse_string() 2056 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2057 kwargs["collection_items"] = self._parse_string() 2058 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2059 kwargs["map_keys"] = self._parse_string() 2060 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2061 kwargs["lines"] = self._parse_string() 2062 if self._match_text_seq("NULL", "DEFINED", "AS"): 2063 kwargs["null"] = self._parse_string() 2064 2065 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2066 2067 def _parse_load(self) -> exp.LoadData | exp.Command: 2068 if self._match_text_seq("DATA"): 2069 local = self._match_text_seq("LOCAL") 2070 self._match_text_seq("INPATH") 2071 inpath = self._parse_string() 2072 overwrite = self._match(TokenType.OVERWRITE) 2073 self._match_pair(TokenType.INTO, TokenType.TABLE) 2074 2075 return self.expression( 2076 exp.LoadData, 2077 this=self._parse_table(schema=True), 2078 local=local, 2079 overwrite=overwrite, 2080 inpath=inpath, 2081 partition=self._parse_partition(), 2082 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2083 serde=self._match_text_seq("SERDE") and self._parse_string(), 2084 ) 2085 return self._parse_as_command(self._prev) 2086 2087 def _parse_delete(self) -> exp.Delete: 2088 # This handles MySQL's "Multiple-Table Syntax" 2089 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2090 tables = None 2091 comments = self._prev_comments 2092 if not self._match(TokenType.FROM, advance=False): 2093 tables = self._parse_csv(self._parse_table) or None 2094 2095 returning = self._parse_returning() 2096 2097 return self.expression( 2098 exp.Delete, 2099 comments=comments, 2100 tables=tables, 2101 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2102 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2103 where=self._parse_where(), 2104 returning=returning or self._parse_returning(), 2105 limit=self._parse_limit(), 2106 ) 2107 2108 def _parse_update(self) -> exp.Update: 2109 comments = self._prev_comments 2110 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2111 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2112 returning = self._parse_returning() 2113 return self.expression( 2114 exp.Update, 2115 comments=comments, 2116 **{ # type: ignore 2117 "this": this, 2118 "expressions": expressions, 2119 "from": self._parse_from(joins=True), 2120 "where": self._parse_where(), 2121 "returning": returning or self._parse_returning(), 2122 "order": self._parse_order(), 2123 "limit": self._parse_limit(), 2124 }, 2125 ) 2126 2127 def _parse_uncache(self) -> exp.Uncache: 2128 if not self._match(TokenType.TABLE): 2129 self.raise_error("Expecting TABLE after UNCACHE") 2130 2131 return self.expression( 2132 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2133 ) 2134 2135 def _parse_cache(self) -> exp.Cache: 2136 lazy = self._match_text_seq("LAZY") 2137 self._match(TokenType.TABLE) 2138 table = self._parse_table(schema=True) 2139 2140 options = [] 2141 if self._match_text_seq("OPTIONS"): 2142 self._match_l_paren() 2143 k = self._parse_string() 2144 self._match(TokenType.EQ) 2145 v = self._parse_string() 2146 options = [k, v] 2147 self._match_r_paren() 2148 2149 self._match(TokenType.ALIAS) 2150 return self.expression( 2151 exp.Cache, 2152 this=table, 2153 lazy=lazy, 2154 options=options, 2155 expression=self._parse_select(nested=True), 2156 ) 2157 2158 def _parse_partition(self) -> t.Optional[exp.Partition]: 2159 if not self._match(TokenType.PARTITION): 2160 return None 2161 2162 return self.expression( 2163 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2164 ) 2165 2166 def _parse_value(self) -> exp.Tuple: 2167 if self._match(TokenType.L_PAREN): 2168 expressions = self._parse_csv(self._parse_conjunction) 2169 self._match_r_paren() 2170 return self.expression(exp.Tuple, expressions=expressions) 2171 2172 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2173 # https://prestodb.io/docs/current/sql/values.html 2174 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2175 2176 def _parse_projections(self) -> t.List[exp.Expression]: 2177 return self._parse_expressions() 2178 2179 def _parse_select( 2180 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2181 ) -> t.Optional[exp.Expression]: 2182 cte = self._parse_with() 2183 2184 if cte: 2185 this = self._parse_statement() 2186 2187 if not this: 2188 self.raise_error("Failed to parse any statement following CTE") 2189 return cte 2190 2191 if "with" in this.arg_types: 2192 this.set("with", cte) 2193 else: 2194 self.raise_error(f"{this.key} does not support CTE") 2195 this = cte 2196 2197 return this 2198 2199 # duckdb supports leading with FROM x 2200 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2201 2202 if self._match(TokenType.SELECT): 2203 comments = self._prev_comments 2204 2205 hint = self._parse_hint() 2206 all_ = self._match(TokenType.ALL) 2207 distinct = self._match_set(self.DISTINCT_TOKENS) 2208 2209 kind = ( 2210 self._match(TokenType.ALIAS) 2211 and self._match_texts(("STRUCT", "VALUE")) 2212 and self._prev.text 2213 ) 2214 2215 if distinct: 2216 distinct = self.expression( 2217 exp.Distinct, 2218 on=self._parse_value() if self._match(TokenType.ON) else None, 2219 ) 2220 2221 if all_ and distinct: 2222 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2223 2224 limit = self._parse_limit(top=True) 2225 projections = self._parse_projections() 2226 2227 this = self.expression( 2228 exp.Select, 2229 kind=kind, 2230 hint=hint, 2231 distinct=distinct, 2232 expressions=projections, 2233 limit=limit, 2234 ) 2235 this.comments = comments 2236 2237 into = self._parse_into() 2238 if into: 2239 this.set("into", into) 2240 2241 if not from_: 2242 from_ = self._parse_from() 2243 2244 if from_: 2245 this.set("from", from_) 2246 2247 this = self._parse_query_modifiers(this) 2248 elif (table or nested) and self._match(TokenType.L_PAREN): 2249 if self._match(TokenType.PIVOT): 2250 this = self._parse_simplified_pivot() 2251 elif self._match(TokenType.FROM): 2252 this = exp.select("*").from_( 2253 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2254 ) 2255 else: 2256 this = self._parse_table() if table else self._parse_select(nested=True) 2257 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2258 2259 self._match_r_paren() 2260 2261 # We return early here so that the UNION isn't attached to the subquery by the 2262 # following call to _parse_set_operations, but instead becomes the parent node 2263 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2264 elif self._match(TokenType.VALUES): 2265 this = self.expression( 2266 exp.Values, 2267 expressions=self._parse_csv(self._parse_value), 2268 alias=self._parse_table_alias(), 2269 ) 2270 elif from_: 2271 this = exp.select("*").from_(from_.this, copy=False) 2272 else: 2273 this = None 2274 2275 return self._parse_set_operations(this) 2276 2277 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2278 if not skip_with_token and not self._match(TokenType.WITH): 2279 return None 2280 2281 comments = self._prev_comments 2282 recursive = self._match(TokenType.RECURSIVE) 2283 2284 expressions = [] 2285 while True: 2286 expressions.append(self._parse_cte()) 2287 2288 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2289 break 2290 else: 2291 self._match(TokenType.WITH) 2292 2293 return self.expression( 2294 exp.With, comments=comments, expressions=expressions, recursive=recursive 2295 ) 2296 2297 def _parse_cte(self) -> exp.CTE: 2298 alias = self._parse_table_alias() 2299 if not alias or not alias.this: 2300 self.raise_error("Expected CTE to have alias") 2301 2302 self._match(TokenType.ALIAS) 2303 return self.expression( 2304 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2305 ) 2306 2307 def _parse_table_alias( 2308 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2309 ) -> t.Optional[exp.TableAlias]: 2310 any_token = self._match(TokenType.ALIAS) 2311 alias = ( 2312 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2313 or self._parse_string_as_identifier() 2314 ) 2315 2316 index = self._index 2317 if self._match(TokenType.L_PAREN): 2318 columns = self._parse_csv(self._parse_function_parameter) 2319 self._match_r_paren() if columns else self._retreat(index) 2320 else: 2321 columns = None 2322 2323 if not alias and not columns: 2324 return None 2325 2326 return self.expression(exp.TableAlias, this=alias, columns=columns) 2327 2328 def _parse_subquery( 2329 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2330 ) -> t.Optional[exp.Subquery]: 2331 if not this: 2332 return None 2333 2334 return self.expression( 2335 exp.Subquery, 2336 this=this, 2337 pivots=self._parse_pivots(), 2338 alias=self._parse_table_alias() if parse_alias else None, 2339 ) 2340 2341 def _parse_query_modifiers( 2342 self, this: t.Optional[exp.Expression] 2343 ) -> t.Optional[exp.Expression]: 2344 if isinstance(this, self.MODIFIABLES): 2345 for join in iter(self._parse_join, None): 2346 this.append("joins", join) 2347 for lateral in iter(self._parse_lateral, None): 2348 this.append("laterals", lateral) 2349 2350 while True: 2351 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2352 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2353 key, expression = parser(self) 2354 2355 if expression: 2356 this.set(key, expression) 2357 if key == "limit": 2358 offset = expression.args.pop("offset", None) 2359 if offset: 2360 this.set("offset", exp.Offset(expression=offset)) 2361 continue 2362 break 2363 return this 2364 2365 def _parse_hint(self) -> t.Optional[exp.Hint]: 2366 if self._match(TokenType.HINT): 2367 hints = [] 2368 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2369 hints.extend(hint) 2370 2371 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2372 self.raise_error("Expected */ after HINT") 2373 2374 return self.expression(exp.Hint, expressions=hints) 2375 2376 return None 2377 2378 def _parse_into(self) -> t.Optional[exp.Into]: 2379 if not self._match(TokenType.INTO): 2380 return None 2381 2382 temp = self._match(TokenType.TEMPORARY) 2383 unlogged = self._match_text_seq("UNLOGGED") 2384 self._match(TokenType.TABLE) 2385 2386 return self.expression( 2387 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2388 ) 2389 2390 def _parse_from( 2391 self, joins: bool = False, skip_from_token: bool = False 2392 ) -> t.Optional[exp.From]: 2393 if not skip_from_token and not self._match(TokenType.FROM): 2394 return None 2395 2396 return self.expression( 2397 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2398 ) 2399 2400 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2401 if not self._match(TokenType.MATCH_RECOGNIZE): 2402 return None 2403 2404 self._match_l_paren() 2405 2406 partition = self._parse_partition_by() 2407 order = self._parse_order() 2408 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2409 2410 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2411 rows = exp.var("ONE ROW PER MATCH") 2412 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2413 text = "ALL ROWS PER MATCH" 2414 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2415 text += f" SHOW EMPTY MATCHES" 2416 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2417 text += f" OMIT EMPTY MATCHES" 2418 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2419 text += f" WITH UNMATCHED ROWS" 2420 rows = exp.var(text) 2421 else: 2422 rows = None 2423 2424 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2425 text = "AFTER MATCH SKIP" 2426 if self._match_text_seq("PAST", "LAST", "ROW"): 2427 text += f" PAST LAST ROW" 2428 elif self._match_text_seq("TO", "NEXT", "ROW"): 2429 text += f" TO NEXT ROW" 2430 elif self._match_text_seq("TO", "FIRST"): 2431 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2432 elif self._match_text_seq("TO", "LAST"): 2433 text += f" TO LAST {self._advance_any().text}" # type: ignore 2434 after = exp.var(text) 2435 else: 2436 after = None 2437 2438 if self._match_text_seq("PATTERN"): 2439 self._match_l_paren() 2440 2441 if not self._curr: 2442 self.raise_error("Expecting )", self._curr) 2443 2444 paren = 1 2445 start = self._curr 2446 2447 while self._curr and paren > 0: 2448 if self._curr.token_type == TokenType.L_PAREN: 2449 paren += 1 2450 if self._curr.token_type == TokenType.R_PAREN: 2451 paren -= 1 2452 2453 end = self._prev 2454 self._advance() 2455 2456 if paren > 0: 2457 self.raise_error("Expecting )", self._curr) 2458 2459 pattern = exp.var(self._find_sql(start, end)) 2460 else: 2461 pattern = None 2462 2463 define = ( 2464 self._parse_csv( 2465 lambda: self.expression( 2466 exp.Alias, 2467 alias=self._parse_id_var(any_token=True), 2468 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2469 ) 2470 ) 2471 if self._match_text_seq("DEFINE") 2472 else None 2473 ) 2474 2475 self._match_r_paren() 2476 2477 return self.expression( 2478 exp.MatchRecognize, 2479 partition_by=partition, 2480 order=order, 2481 measures=measures, 2482 rows=rows, 2483 after=after, 2484 pattern=pattern, 2485 define=define, 2486 alias=self._parse_table_alias(), 2487 ) 2488 2489 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2490 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2491 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2492 2493 if outer_apply or cross_apply: 2494 this = self._parse_select(table=True) 2495 view = None 2496 outer = not cross_apply 2497 elif self._match(TokenType.LATERAL): 2498 this = self._parse_select(table=True) 2499 view = self._match(TokenType.VIEW) 2500 outer = self._match(TokenType.OUTER) 2501 else: 2502 return None 2503 2504 if not this: 2505 this = ( 2506 self._parse_unnest() 2507 or self._parse_function() 2508 or self._parse_id_var(any_token=False) 2509 ) 2510 2511 while self._match(TokenType.DOT): 2512 this = exp.Dot( 2513 this=this, 2514 expression=self._parse_function() or self._parse_id_var(any_token=False), 2515 ) 2516 2517 if view: 2518 table = self._parse_id_var(any_token=False) 2519 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2520 table_alias: t.Optional[exp.TableAlias] = self.expression( 2521 exp.TableAlias, this=table, columns=columns 2522 ) 2523 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2524 # We move the alias from the lateral's child node to the lateral itself 2525 table_alias = this.args["alias"].pop() 2526 else: 2527 table_alias = self._parse_table_alias() 2528 2529 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2530 2531 def _parse_join_parts( 2532 self, 2533 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2534 return ( 2535 self._match_set(self.JOIN_METHODS) and self._prev, 2536 self._match_set(self.JOIN_SIDES) and self._prev, 2537 self._match_set(self.JOIN_KINDS) and self._prev, 2538 ) 2539 2540 def _parse_join( 2541 self, skip_join_token: bool = False, parse_bracket: bool = False 2542 ) -> t.Optional[exp.Join]: 2543 if self._match(TokenType.COMMA): 2544 return self.expression(exp.Join, this=self._parse_table()) 2545 2546 index = self._index 2547 method, side, kind = self._parse_join_parts() 2548 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2549 join = self._match(TokenType.JOIN) 2550 2551 if not skip_join_token and not join: 2552 self._retreat(index) 2553 kind = None 2554 method = None 2555 side = None 2556 2557 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2558 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2559 2560 if not skip_join_token and not join and not outer_apply and not cross_apply: 2561 return None 2562 2563 if outer_apply: 2564 side = Token(TokenType.LEFT, "LEFT") 2565 2566 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2567 2568 if method: 2569 kwargs["method"] = method.text 2570 if side: 2571 kwargs["side"] = side.text 2572 if kind: 2573 kwargs["kind"] = kind.text 2574 if hint: 2575 kwargs["hint"] = hint 2576 2577 if self._match(TokenType.ON): 2578 kwargs["on"] = self._parse_conjunction() 2579 elif self._match(TokenType.USING): 2580 kwargs["using"] = self._parse_wrapped_id_vars() 2581 elif not (kind and kind.token_type == TokenType.CROSS): 2582 index = self._index 2583 join = self._parse_join() 2584 2585 if join and self._match(TokenType.ON): 2586 kwargs["on"] = self._parse_conjunction() 2587 elif join and self._match(TokenType.USING): 2588 kwargs["using"] = self._parse_wrapped_id_vars() 2589 else: 2590 join = None 2591 self._retreat(index) 2592 2593 kwargs["this"].set("joins", [join] if join else None) 2594 2595 comments = [c for token in (method, side, kind) if token for c in token.comments] 2596 return self.expression(exp.Join, comments=comments, **kwargs) 2597 2598 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2599 this = self._parse_conjunction() 2600 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2601 return this 2602 2603 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2604 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2605 2606 return this 2607 2608 def _parse_index( 2609 self, 2610 index: t.Optional[exp.Expression] = None, 2611 ) -> t.Optional[exp.Index]: 2612 if index: 2613 unique = None 2614 primary = None 2615 amp = None 2616 2617 self._match(TokenType.ON) 2618 self._match(TokenType.TABLE) # hive 2619 table = self._parse_table_parts(schema=True) 2620 else: 2621 unique = self._match(TokenType.UNIQUE) 2622 primary = self._match_text_seq("PRIMARY") 2623 amp = self._match_text_seq("AMP") 2624 2625 if not self._match(TokenType.INDEX): 2626 return None 2627 2628 index = self._parse_id_var() 2629 table = None 2630 2631 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2632 2633 if self._match(TokenType.L_PAREN, advance=False): 2634 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2635 else: 2636 columns = None 2637 2638 return self.expression( 2639 exp.Index, 2640 this=index, 2641 table=table, 2642 using=using, 2643 columns=columns, 2644 unique=unique, 2645 primary=primary, 2646 amp=amp, 2647 partition_by=self._parse_partition_by(), 2648 where=self._parse_where(), 2649 ) 2650 2651 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2652 hints: t.List[exp.Expression] = [] 2653 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2654 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2655 hints.append( 2656 self.expression( 2657 exp.WithTableHint, 2658 expressions=self._parse_csv( 2659 lambda: self._parse_function() or self._parse_var(any_token=True) 2660 ), 2661 ) 2662 ) 2663 self._match_r_paren() 2664 else: 2665 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2666 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2667 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2668 2669 self._match_texts(("INDEX", "KEY")) 2670 if self._match(TokenType.FOR): 2671 hint.set("target", self._advance_any() and self._prev.text.upper()) 2672 2673 hint.set("expressions", self._parse_wrapped_id_vars()) 2674 hints.append(hint) 2675 2676 return hints or None 2677 2678 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2679 return ( 2680 (not schema and self._parse_function(optional_parens=False)) 2681 or self._parse_id_var(any_token=False) 2682 or self._parse_string_as_identifier() 2683 or self._parse_placeholder() 2684 ) 2685 2686 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2687 catalog = None 2688 db = None 2689 table = self._parse_table_part(schema=schema) 2690 2691 while self._match(TokenType.DOT): 2692 if catalog: 2693 # This allows nesting the table in arbitrarily many dot expressions if needed 2694 table = self.expression( 2695 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2696 ) 2697 else: 2698 catalog = db 2699 db = table 2700 table = self._parse_table_part(schema=schema) 2701 2702 if not table: 2703 self.raise_error(f"Expected table name but got {self._curr}") 2704 2705 return self.expression( 2706 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2707 ) 2708 2709 def _parse_table( 2710 self, 2711 schema: bool = False, 2712 joins: bool = False, 2713 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2714 parse_bracket: bool = False, 2715 ) -> t.Optional[exp.Expression]: 2716 lateral = self._parse_lateral() 2717 if lateral: 2718 return lateral 2719 2720 unnest = self._parse_unnest() 2721 if unnest: 2722 return unnest 2723 2724 values = self._parse_derived_table_values() 2725 if values: 2726 return values 2727 2728 subquery = self._parse_select(table=True) 2729 if subquery: 2730 if not subquery.args.get("pivots"): 2731 subquery.set("pivots", self._parse_pivots()) 2732 return subquery 2733 2734 bracket = parse_bracket and self._parse_bracket(None) 2735 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2736 this = t.cast( 2737 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2738 ) 2739 2740 if schema: 2741 return self._parse_schema(this=this) 2742 2743 version = self._parse_version() 2744 2745 if version: 2746 this.set("version", version) 2747 2748 if self.ALIAS_POST_TABLESAMPLE: 2749 table_sample = self._parse_table_sample() 2750 2751 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2752 if alias: 2753 this.set("alias", alias) 2754 2755 if self._match_text_seq("AT"): 2756 this.set("index", self._parse_id_var()) 2757 2758 this.set("hints", self._parse_table_hints()) 2759 2760 if not this.args.get("pivots"): 2761 this.set("pivots", self._parse_pivots()) 2762 2763 if not self.ALIAS_POST_TABLESAMPLE: 2764 table_sample = self._parse_table_sample() 2765 2766 if table_sample: 2767 table_sample.set("this", this) 2768 this = table_sample 2769 2770 if joins: 2771 for join in iter(self._parse_join, None): 2772 this.append("joins", join) 2773 2774 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2775 this.set("ordinality", True) 2776 this.set("alias", self._parse_table_alias()) 2777 2778 return this 2779 2780 def _parse_version(self) -> t.Optional[exp.Version]: 2781 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2782 this = "TIMESTAMP" 2783 elif self._match(TokenType.VERSION_SNAPSHOT): 2784 this = "VERSION" 2785 else: 2786 return None 2787 2788 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2789 kind = self._prev.text.upper() 2790 start = self._parse_bitwise() 2791 self._match_texts(("TO", "AND")) 2792 end = self._parse_bitwise() 2793 expression: t.Optional[exp.Expression] = self.expression( 2794 exp.Tuple, expressions=[start, end] 2795 ) 2796 elif self._match_text_seq("CONTAINED", "IN"): 2797 kind = "CONTAINED IN" 2798 expression = self.expression( 2799 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2800 ) 2801 elif self._match(TokenType.ALL): 2802 kind = "ALL" 2803 expression = None 2804 else: 2805 self._match_text_seq("AS", "OF") 2806 kind = "AS OF" 2807 expression = self._parse_type() 2808 2809 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2810 2811 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2812 if not self._match(TokenType.UNNEST): 2813 return None 2814 2815 expressions = self._parse_wrapped_csv(self._parse_equality) 2816 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2817 2818 alias = self._parse_table_alias() if with_alias else None 2819 2820 if alias: 2821 if self.UNNEST_COLUMN_ONLY: 2822 if alias.args.get("columns"): 2823 self.raise_error("Unexpected extra column alias in unnest.") 2824 2825 alias.set("columns", [alias.this]) 2826 alias.set("this", None) 2827 2828 columns = alias.args.get("columns") or [] 2829 if offset and len(expressions) < len(columns): 2830 offset = columns.pop() 2831 2832 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2833 self._match(TokenType.ALIAS) 2834 offset = self._parse_id_var( 2835 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2836 ) or exp.to_identifier("offset") 2837 2838 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2839 2840 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2841 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2842 if not is_derived and not self._match(TokenType.VALUES): 2843 return None 2844 2845 expressions = self._parse_csv(self._parse_value) 2846 alias = self._parse_table_alias() 2847 2848 if is_derived: 2849 self._match_r_paren() 2850 2851 return self.expression( 2852 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2853 ) 2854 2855 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2856 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2857 as_modifier and self._match_text_seq("USING", "SAMPLE") 2858 ): 2859 return None 2860 2861 bucket_numerator = None 2862 bucket_denominator = None 2863 bucket_field = None 2864 percent = None 2865 rows = None 2866 size = None 2867 seed = None 2868 2869 kind = ( 2870 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2871 ) 2872 method = self._parse_var(tokens=(TokenType.ROW,)) 2873 2874 matched_l_paren = self._match(TokenType.L_PAREN) 2875 2876 if self.TABLESAMPLE_CSV: 2877 num = None 2878 expressions = self._parse_csv(self._parse_primary) 2879 else: 2880 expressions = None 2881 num = ( 2882 self._parse_factor() 2883 if self._match(TokenType.NUMBER, advance=False) 2884 else self._parse_primary() 2885 ) 2886 2887 if self._match_text_seq("BUCKET"): 2888 bucket_numerator = self._parse_number() 2889 self._match_text_seq("OUT", "OF") 2890 bucket_denominator = bucket_denominator = self._parse_number() 2891 self._match(TokenType.ON) 2892 bucket_field = self._parse_field() 2893 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2894 percent = num 2895 elif self._match(TokenType.ROWS): 2896 rows = num 2897 elif num: 2898 size = num 2899 2900 if matched_l_paren: 2901 self._match_r_paren() 2902 2903 if self._match(TokenType.L_PAREN): 2904 method = self._parse_var() 2905 seed = self._match(TokenType.COMMA) and self._parse_number() 2906 self._match_r_paren() 2907 elif self._match_texts(("SEED", "REPEATABLE")): 2908 seed = self._parse_wrapped(self._parse_number) 2909 2910 return self.expression( 2911 exp.TableSample, 2912 expressions=expressions, 2913 method=method, 2914 bucket_numerator=bucket_numerator, 2915 bucket_denominator=bucket_denominator, 2916 bucket_field=bucket_field, 2917 percent=percent, 2918 rows=rows, 2919 size=size, 2920 seed=seed, 2921 kind=kind, 2922 ) 2923 2924 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2925 return list(iter(self._parse_pivot, None)) or None 2926 2927 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2928 return list(iter(self._parse_join, None)) or None 2929 2930 # https://duckdb.org/docs/sql/statements/pivot 2931 def _parse_simplified_pivot(self) -> exp.Pivot: 2932 def _parse_on() -> t.Optional[exp.Expression]: 2933 this = self._parse_bitwise() 2934 return self._parse_in(this) if self._match(TokenType.IN) else this 2935 2936 this = self._parse_table() 2937 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2938 using = self._match(TokenType.USING) and self._parse_csv( 2939 lambda: self._parse_alias(self._parse_function()) 2940 ) 2941 group = self._parse_group() 2942 return self.expression( 2943 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2944 ) 2945 2946 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2947 index = self._index 2948 include_nulls = None 2949 2950 if self._match(TokenType.PIVOT): 2951 unpivot = False 2952 elif self._match(TokenType.UNPIVOT): 2953 unpivot = True 2954 2955 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2956 if self._match_text_seq("INCLUDE", "NULLS"): 2957 include_nulls = True 2958 elif self._match_text_seq("EXCLUDE", "NULLS"): 2959 include_nulls = False 2960 else: 2961 return None 2962 2963 expressions = [] 2964 field = None 2965 2966 if not self._match(TokenType.L_PAREN): 2967 self._retreat(index) 2968 return None 2969 2970 if unpivot: 2971 expressions = self._parse_csv(self._parse_column) 2972 else: 2973 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2974 2975 if not expressions: 2976 self.raise_error("Failed to parse PIVOT's aggregation list") 2977 2978 if not self._match(TokenType.FOR): 2979 self.raise_error("Expecting FOR") 2980 2981 value = self._parse_column() 2982 2983 if not self._match(TokenType.IN): 2984 self.raise_error("Expecting IN") 2985 2986 field = self._parse_in(value, alias=True) 2987 2988 self._match_r_paren() 2989 2990 pivot = self.expression( 2991 exp.Pivot, 2992 expressions=expressions, 2993 field=field, 2994 unpivot=unpivot, 2995 include_nulls=include_nulls, 2996 ) 2997 2998 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2999 pivot.set("alias", self._parse_table_alias()) 3000 3001 if not unpivot: 3002 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3003 3004 columns: t.List[exp.Expression] = [] 3005 for fld in pivot.args["field"].expressions: 3006 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3007 for name in names: 3008 if self.PREFIXED_PIVOT_COLUMNS: 3009 name = f"{name}_{field_name}" if name else field_name 3010 else: 3011 name = f"{field_name}_{name}" if name else field_name 3012 3013 columns.append(exp.to_identifier(name)) 3014 3015 pivot.set("columns", columns) 3016 3017 return pivot 3018 3019 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3020 return [agg.alias for agg in aggregations] 3021 3022 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3023 if not skip_where_token and not self._match(TokenType.WHERE): 3024 return None 3025 3026 return self.expression( 3027 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3028 ) 3029 3030 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3031 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3032 return None 3033 3034 elements = defaultdict(list) 3035 3036 if self._match(TokenType.ALL): 3037 return self.expression(exp.Group, all=True) 3038 3039 while True: 3040 expressions = self._parse_csv(self._parse_conjunction) 3041 if expressions: 3042 elements["expressions"].extend(expressions) 3043 3044 grouping_sets = self._parse_grouping_sets() 3045 if grouping_sets: 3046 elements["grouping_sets"].extend(grouping_sets) 3047 3048 rollup = None 3049 cube = None 3050 totals = None 3051 3052 index = self._index 3053 with_ = self._match(TokenType.WITH) 3054 if self._match(TokenType.ROLLUP): 3055 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3056 elements["rollup"].extend(ensure_list(rollup)) 3057 3058 if self._match(TokenType.CUBE): 3059 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3060 elements["cube"].extend(ensure_list(cube)) 3061 3062 if self._match_text_seq("TOTALS"): 3063 totals = True 3064 elements["totals"] = True # type: ignore 3065 3066 if not (grouping_sets or rollup or cube or totals): 3067 if with_: 3068 self._retreat(index) 3069 break 3070 3071 return self.expression(exp.Group, **elements) # type: ignore 3072 3073 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3074 if not self._match(TokenType.GROUPING_SETS): 3075 return None 3076 3077 return self._parse_wrapped_csv(self._parse_grouping_set) 3078 3079 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3080 if self._match(TokenType.L_PAREN): 3081 grouping_set = self._parse_csv(self._parse_column) 3082 self._match_r_paren() 3083 return self.expression(exp.Tuple, expressions=grouping_set) 3084 3085 return self._parse_column() 3086 3087 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3088 if not skip_having_token and not self._match(TokenType.HAVING): 3089 return None 3090 return self.expression(exp.Having, this=self._parse_conjunction()) 3091 3092 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3093 if not self._match(TokenType.QUALIFY): 3094 return None 3095 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3096 3097 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3098 if skip_start_token: 3099 start = None 3100 elif self._match(TokenType.START_WITH): 3101 start = self._parse_conjunction() 3102 else: 3103 return None 3104 3105 self._match(TokenType.CONNECT_BY) 3106 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3107 exp.Prior, this=self._parse_bitwise() 3108 ) 3109 connect = self._parse_conjunction() 3110 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3111 3112 if not start and self._match(TokenType.START_WITH): 3113 start = self._parse_conjunction() 3114 3115 return self.expression(exp.Connect, start=start, connect=connect) 3116 3117 def _parse_order( 3118 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3119 ) -> t.Optional[exp.Expression]: 3120 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3121 return this 3122 3123 return self.expression( 3124 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3125 ) 3126 3127 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3128 if not self._match(token): 3129 return None 3130 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3131 3132 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3133 this = parse_method() if parse_method else self._parse_conjunction() 3134 3135 asc = self._match(TokenType.ASC) 3136 desc = self._match(TokenType.DESC) or (asc and False) 3137 3138 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3139 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3140 3141 nulls_first = is_nulls_first or False 3142 explicitly_null_ordered = is_nulls_first or is_nulls_last 3143 3144 if ( 3145 not explicitly_null_ordered 3146 and ( 3147 (not desc and self.NULL_ORDERING == "nulls_are_small") 3148 or (desc and self.NULL_ORDERING != "nulls_are_small") 3149 ) 3150 and self.NULL_ORDERING != "nulls_are_last" 3151 ): 3152 nulls_first = True 3153 3154 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3155 3156 def _parse_limit( 3157 self, this: t.Optional[exp.Expression] = None, top: bool = False 3158 ) -> t.Optional[exp.Expression]: 3159 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3160 comments = self._prev_comments 3161 if top: 3162 limit_paren = self._match(TokenType.L_PAREN) 3163 expression = self._parse_term() if limit_paren else self._parse_number() 3164 3165 if limit_paren: 3166 self._match_r_paren() 3167 else: 3168 expression = self._parse_term() 3169 3170 if self._match(TokenType.COMMA): 3171 offset = expression 3172 expression = self._parse_term() 3173 else: 3174 offset = None 3175 3176 limit_exp = self.expression( 3177 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3178 ) 3179 3180 return limit_exp 3181 3182 if self._match(TokenType.FETCH): 3183 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3184 direction = self._prev.text if direction else "FIRST" 3185 3186 count = self._parse_field(tokens=self.FETCH_TOKENS) 3187 percent = self._match(TokenType.PERCENT) 3188 3189 self._match_set((TokenType.ROW, TokenType.ROWS)) 3190 3191 only = self._match_text_seq("ONLY") 3192 with_ties = self._match_text_seq("WITH", "TIES") 3193 3194 if only and with_ties: 3195 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3196 3197 return self.expression( 3198 exp.Fetch, 3199 direction=direction, 3200 count=count, 3201 percent=percent, 3202 with_ties=with_ties, 3203 ) 3204 3205 return this 3206 3207 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3208 if not self._match(TokenType.OFFSET): 3209 return this 3210 3211 count = self._parse_term() 3212 self._match_set((TokenType.ROW, TokenType.ROWS)) 3213 return self.expression(exp.Offset, this=this, expression=count) 3214 3215 def _parse_locks(self) -> t.List[exp.Lock]: 3216 locks = [] 3217 while True: 3218 if self._match_text_seq("FOR", "UPDATE"): 3219 update = True 3220 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3221 "LOCK", "IN", "SHARE", "MODE" 3222 ): 3223 update = False 3224 else: 3225 break 3226 3227 expressions = None 3228 if self._match_text_seq("OF"): 3229 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3230 3231 wait: t.Optional[bool | exp.Expression] = None 3232 if self._match_text_seq("NOWAIT"): 3233 wait = True 3234 elif self._match_text_seq("WAIT"): 3235 wait = self._parse_primary() 3236 elif self._match_text_seq("SKIP", "LOCKED"): 3237 wait = False 3238 3239 locks.append( 3240 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3241 ) 3242 3243 return locks 3244 3245 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3246 if not self._match_set(self.SET_OPERATIONS): 3247 return this 3248 3249 token_type = self._prev.token_type 3250 3251 if token_type == TokenType.UNION: 3252 expression = exp.Union 3253 elif token_type == TokenType.EXCEPT: 3254 expression = exp.Except 3255 else: 3256 expression = exp.Intersect 3257 3258 return self.expression( 3259 expression, 3260 comments=self._prev.comments, 3261 this=this, 3262 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3263 by_name=self._match_text_seq("BY", "NAME"), 3264 expression=self._parse_set_operations(self._parse_select(nested=True)), 3265 ) 3266 3267 def _parse_expression(self) -> t.Optional[exp.Expression]: 3268 return self._parse_alias(self._parse_conjunction()) 3269 3270 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3271 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3272 3273 def _parse_equality(self) -> t.Optional[exp.Expression]: 3274 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3275 3276 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3277 return self._parse_tokens(self._parse_range, self.COMPARISON) 3278 3279 def _parse_range(self) -> t.Optional[exp.Expression]: 3280 this = self._parse_bitwise() 3281 negate = self._match(TokenType.NOT) 3282 3283 if self._match_set(self.RANGE_PARSERS): 3284 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3285 if not expression: 3286 return this 3287 3288 this = expression 3289 elif self._match(TokenType.ISNULL): 3290 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3291 3292 # Postgres supports ISNULL and NOTNULL for conditions. 3293 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3294 if self._match(TokenType.NOTNULL): 3295 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3296 this = self.expression(exp.Not, this=this) 3297 3298 if negate: 3299 this = self.expression(exp.Not, this=this) 3300 3301 if self._match(TokenType.IS): 3302 this = self._parse_is(this) 3303 3304 return this 3305 3306 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3307 index = self._index - 1 3308 negate = self._match(TokenType.NOT) 3309 3310 if self._match_text_seq("DISTINCT", "FROM"): 3311 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3312 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3313 3314 expression = self._parse_null() or self._parse_boolean() 3315 if not expression: 3316 self._retreat(index) 3317 return None 3318 3319 this = self.expression(exp.Is, this=this, expression=expression) 3320 return self.expression(exp.Not, this=this) if negate else this 3321 3322 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3323 unnest = self._parse_unnest(with_alias=False) 3324 if unnest: 3325 this = self.expression(exp.In, this=this, unnest=unnest) 3326 elif self._match(TokenType.L_PAREN): 3327 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3328 3329 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3330 this = self.expression(exp.In, this=this, query=expressions[0]) 3331 else: 3332 this = self.expression(exp.In, this=this, expressions=expressions) 3333 3334 self._match_r_paren(this) 3335 else: 3336 this = self.expression(exp.In, this=this, field=self._parse_field()) 3337 3338 return this 3339 3340 def _parse_between(self, this: exp.Expression) -> exp.Between: 3341 low = self._parse_bitwise() 3342 self._match(TokenType.AND) 3343 high = self._parse_bitwise() 3344 return self.expression(exp.Between, this=this, low=low, high=high) 3345 3346 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3347 if not self._match(TokenType.ESCAPE): 3348 return this 3349 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3350 3351 def _parse_interval(self) -> t.Optional[exp.Interval]: 3352 index = self._index 3353 3354 if not self._match(TokenType.INTERVAL): 3355 return None 3356 3357 if self._match(TokenType.STRING, advance=False): 3358 this = self._parse_primary() 3359 else: 3360 this = self._parse_term() 3361 3362 if not this: 3363 self._retreat(index) 3364 return None 3365 3366 unit = self._parse_function() or self._parse_var(any_token=True) 3367 3368 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3369 # each INTERVAL expression into this canonical form so it's easy to transpile 3370 if this and this.is_number: 3371 this = exp.Literal.string(this.name) 3372 elif this and this.is_string: 3373 parts = this.name.split() 3374 3375 if len(parts) == 2: 3376 if unit: 3377 # This is not actually a unit, it's something else (e.g. a "window side") 3378 unit = None 3379 self._retreat(self._index - 1) 3380 3381 this = exp.Literal.string(parts[0]) 3382 unit = self.expression(exp.Var, this=parts[1]) 3383 3384 return self.expression(exp.Interval, this=this, unit=unit) 3385 3386 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3387 this = self._parse_term() 3388 3389 while True: 3390 if self._match_set(self.BITWISE): 3391 this = self.expression( 3392 self.BITWISE[self._prev.token_type], 3393 this=this, 3394 expression=self._parse_term(), 3395 ) 3396 elif self._match(TokenType.DQMARK): 3397 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3398 elif self._match_pair(TokenType.LT, TokenType.LT): 3399 this = self.expression( 3400 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3401 ) 3402 elif self._match_pair(TokenType.GT, TokenType.GT): 3403 this = self.expression( 3404 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3405 ) 3406 else: 3407 break 3408 3409 return this 3410 3411 def _parse_term(self) -> t.Optional[exp.Expression]: 3412 return self._parse_tokens(self._parse_factor, self.TERM) 3413 3414 def _parse_factor(self) -> t.Optional[exp.Expression]: 3415 if self.EXPONENT: 3416 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3417 else: 3418 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3419 if isinstance(factor, exp.Div): 3420 factor.args["typed"] = self.TYPED_DIVISION 3421 factor.args["safe"] = self.SAFE_DIVISION 3422 return factor 3423 3424 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3425 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3426 3427 def _parse_unary(self) -> t.Optional[exp.Expression]: 3428 if self._match_set(self.UNARY_PARSERS): 3429 return self.UNARY_PARSERS[self._prev.token_type](self) 3430 return self._parse_at_time_zone(self._parse_type()) 3431 3432 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3433 interval = parse_interval and self._parse_interval() 3434 if interval: 3435 return interval 3436 3437 index = self._index 3438 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3439 this = self._parse_column() 3440 3441 if data_type: 3442 if isinstance(this, exp.Literal): 3443 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3444 if parser: 3445 return parser(self, this, data_type) 3446 return self.expression(exp.Cast, this=this, to=data_type) 3447 if not data_type.expressions: 3448 self._retreat(index) 3449 return self._parse_column() 3450 return self._parse_column_ops(data_type) 3451 3452 return this and self._parse_column_ops(this) 3453 3454 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3455 this = self._parse_type() 3456 if not this: 3457 return None 3458 3459 return self.expression( 3460 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3461 ) 3462 3463 def _parse_types( 3464 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3465 ) -> t.Optional[exp.Expression]: 3466 index = self._index 3467 3468 prefix = self._match_text_seq("SYSUDTLIB", ".") 3469 3470 if not self._match_set(self.TYPE_TOKENS): 3471 identifier = allow_identifiers and self._parse_id_var( 3472 any_token=False, tokens=(TokenType.VAR,) 3473 ) 3474 3475 if identifier: 3476 tokens = self._tokenizer.tokenize(identifier.name) 3477 3478 if len(tokens) != 1: 3479 self.raise_error("Unexpected identifier", self._prev) 3480 3481 if tokens[0].token_type in self.TYPE_TOKENS: 3482 self._prev = tokens[0] 3483 elif self.SUPPORTS_USER_DEFINED_TYPES: 3484 type_name = identifier.name 3485 3486 while self._match(TokenType.DOT): 3487 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3488 3489 return exp.DataType.build(type_name, udt=True) 3490 else: 3491 return None 3492 else: 3493 return None 3494 3495 type_token = self._prev.token_type 3496 3497 if type_token == TokenType.PSEUDO_TYPE: 3498 return self.expression(exp.PseudoType, this=self._prev.text) 3499 3500 if type_token == TokenType.OBJECT_IDENTIFIER: 3501 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3502 3503 nested = type_token in self.NESTED_TYPE_TOKENS 3504 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3505 expressions = None 3506 maybe_func = False 3507 3508 if self._match(TokenType.L_PAREN): 3509 if is_struct: 3510 expressions = self._parse_csv(self._parse_struct_types) 3511 elif nested: 3512 expressions = self._parse_csv( 3513 lambda: self._parse_types( 3514 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3515 ) 3516 ) 3517 elif type_token in self.ENUM_TYPE_TOKENS: 3518 expressions = self._parse_csv(self._parse_equality) 3519 else: 3520 expressions = self._parse_csv(self._parse_type_size) 3521 3522 if not expressions or not self._match(TokenType.R_PAREN): 3523 self._retreat(index) 3524 return None 3525 3526 maybe_func = True 3527 3528 this: t.Optional[exp.Expression] = None 3529 values: t.Optional[t.List[exp.Expression]] = None 3530 3531 if nested and self._match(TokenType.LT): 3532 if is_struct: 3533 expressions = self._parse_csv(self._parse_struct_types) 3534 else: 3535 expressions = self._parse_csv( 3536 lambda: self._parse_types( 3537 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3538 ) 3539 ) 3540 3541 if not self._match(TokenType.GT): 3542 self.raise_error("Expecting >") 3543 3544 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3545 values = self._parse_csv(self._parse_conjunction) 3546 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3547 3548 if type_token in self.TIMESTAMPS: 3549 if self._match_text_seq("WITH", "TIME", "ZONE"): 3550 maybe_func = False 3551 tz_type = ( 3552 exp.DataType.Type.TIMETZ 3553 if type_token in self.TIMES 3554 else exp.DataType.Type.TIMESTAMPTZ 3555 ) 3556 this = exp.DataType(this=tz_type, expressions=expressions) 3557 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3558 maybe_func = False 3559 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3560 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3561 maybe_func = False 3562 elif type_token == TokenType.INTERVAL: 3563 unit = self._parse_var() 3564 3565 if self._match_text_seq("TO"): 3566 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3567 else: 3568 span = None 3569 3570 if span or not unit: 3571 this = self.expression( 3572 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3573 ) 3574 else: 3575 this = self.expression(exp.Interval, unit=unit) 3576 3577 if maybe_func and check_func: 3578 index2 = self._index 3579 peek = self._parse_string() 3580 3581 if not peek: 3582 self._retreat(index) 3583 return None 3584 3585 self._retreat(index2) 3586 3587 if not this: 3588 if self._match_text_seq("UNSIGNED"): 3589 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3590 if not unsigned_type_token: 3591 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3592 3593 type_token = unsigned_type_token or type_token 3594 3595 this = exp.DataType( 3596 this=exp.DataType.Type[type_token.value], 3597 expressions=expressions, 3598 nested=nested, 3599 values=values, 3600 prefix=prefix, 3601 ) 3602 3603 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3604 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3605 3606 return this 3607 3608 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3609 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3610 self._match(TokenType.COLON) 3611 return self._parse_column_def(this) 3612 3613 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3614 if not self._match_text_seq("AT", "TIME", "ZONE"): 3615 return this 3616 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3617 3618 def _parse_column(self) -> t.Optional[exp.Expression]: 3619 this = self._parse_field() 3620 if isinstance(this, exp.Identifier): 3621 this = self.expression(exp.Column, this=this) 3622 elif not this: 3623 return self._parse_bracket(this) 3624 return self._parse_column_ops(this) 3625 3626 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3627 this = self._parse_bracket(this) 3628 3629 while self._match_set(self.COLUMN_OPERATORS): 3630 op_token = self._prev.token_type 3631 op = self.COLUMN_OPERATORS.get(op_token) 3632 3633 if op_token == TokenType.DCOLON: 3634 field = self._parse_types() 3635 if not field: 3636 self.raise_error("Expected type") 3637 elif op and self._curr: 3638 self._advance() 3639 value = self._prev.text 3640 field = ( 3641 exp.Literal.number(value) 3642 if self._prev.token_type == TokenType.NUMBER 3643 else exp.Literal.string(value) 3644 ) 3645 else: 3646 field = self._parse_field(anonymous_func=True, any_token=True) 3647 3648 if isinstance(field, exp.Func): 3649 # bigquery allows function calls like x.y.count(...) 3650 # SAFE.SUBSTR(...) 3651 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3652 this = self._replace_columns_with_dots(this) 3653 3654 if op: 3655 this = op(self, this, field) 3656 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3657 this = self.expression( 3658 exp.Column, 3659 this=field, 3660 table=this.this, 3661 db=this.args.get("table"), 3662 catalog=this.args.get("db"), 3663 ) 3664 else: 3665 this = self.expression(exp.Dot, this=this, expression=field) 3666 this = self._parse_bracket(this) 3667 return this 3668 3669 def _parse_primary(self) -> t.Optional[exp.Expression]: 3670 if self._match_set(self.PRIMARY_PARSERS): 3671 token_type = self._prev.token_type 3672 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3673 3674 if token_type == TokenType.STRING: 3675 expressions = [primary] 3676 while self._match(TokenType.STRING): 3677 expressions.append(exp.Literal.string(self._prev.text)) 3678 3679 if len(expressions) > 1: 3680 return self.expression(exp.Concat, expressions=expressions) 3681 3682 return primary 3683 3684 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3685 return exp.Literal.number(f"0.{self._prev.text}") 3686 3687 if self._match(TokenType.L_PAREN): 3688 comments = self._prev_comments 3689 query = self._parse_select() 3690 3691 if query: 3692 expressions = [query] 3693 else: 3694 expressions = self._parse_expressions() 3695 3696 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3697 3698 if isinstance(this, exp.Subqueryable): 3699 this = self._parse_set_operations( 3700 self._parse_subquery(this=this, parse_alias=False) 3701 ) 3702 elif len(expressions) > 1: 3703 this = self.expression(exp.Tuple, expressions=expressions) 3704 else: 3705 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3706 3707 if this: 3708 this.add_comments(comments) 3709 3710 self._match_r_paren(expression=this) 3711 return this 3712 3713 return None 3714 3715 def _parse_field( 3716 self, 3717 any_token: bool = False, 3718 tokens: t.Optional[t.Collection[TokenType]] = None, 3719 anonymous_func: bool = False, 3720 ) -> t.Optional[exp.Expression]: 3721 return ( 3722 self._parse_primary() 3723 or self._parse_function(anonymous=anonymous_func) 3724 or self._parse_id_var(any_token=any_token, tokens=tokens) 3725 ) 3726 3727 def _parse_function( 3728 self, 3729 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3730 anonymous: bool = False, 3731 optional_parens: bool = True, 3732 ) -> t.Optional[exp.Expression]: 3733 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3734 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3735 fn_syntax = False 3736 if ( 3737 self._match(TokenType.L_BRACE, advance=False) 3738 and self._next 3739 and self._next.text.upper() == "FN" 3740 ): 3741 self._advance(2) 3742 fn_syntax = True 3743 3744 func = self._parse_function_call( 3745 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3746 ) 3747 3748 if fn_syntax: 3749 self._match(TokenType.R_BRACE) 3750 3751 return func 3752 3753 def _parse_function_call( 3754 self, 3755 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3756 anonymous: bool = False, 3757 optional_parens: bool = True, 3758 ) -> t.Optional[exp.Expression]: 3759 if not self._curr: 3760 return None 3761 3762 token_type = self._curr.token_type 3763 this = self._curr.text 3764 upper = this.upper() 3765 3766 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3767 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3768 self._advance() 3769 return parser(self) 3770 3771 if not self._next or self._next.token_type != TokenType.L_PAREN: 3772 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3773 self._advance() 3774 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3775 3776 return None 3777 3778 if token_type not in self.FUNC_TOKENS: 3779 return None 3780 3781 self._advance(2) 3782 3783 parser = self.FUNCTION_PARSERS.get(upper) 3784 if parser and not anonymous: 3785 this = parser(self) 3786 else: 3787 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3788 3789 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3790 this = self.expression(subquery_predicate, this=self._parse_select()) 3791 self._match_r_paren() 3792 return this 3793 3794 if functions is None: 3795 functions = self.FUNCTIONS 3796 3797 function = functions.get(upper) 3798 3799 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3800 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3801 3802 if function and not anonymous: 3803 func = self.validate_expression(function(args), args) 3804 if not self.NORMALIZE_FUNCTIONS: 3805 func.meta["name"] = this 3806 this = func 3807 else: 3808 this = self.expression(exp.Anonymous, this=this, expressions=args) 3809 3810 self._match_r_paren(this) 3811 return self._parse_window(this) 3812 3813 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3814 return self._parse_column_def(self._parse_id_var()) 3815 3816 def _parse_user_defined_function( 3817 self, kind: t.Optional[TokenType] = None 3818 ) -> t.Optional[exp.Expression]: 3819 this = self._parse_id_var() 3820 3821 while self._match(TokenType.DOT): 3822 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3823 3824 if not self._match(TokenType.L_PAREN): 3825 return this 3826 3827 expressions = self._parse_csv(self._parse_function_parameter) 3828 self._match_r_paren() 3829 return self.expression( 3830 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3831 ) 3832 3833 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3834 literal = self._parse_primary() 3835 if literal: 3836 return self.expression(exp.Introducer, this=token.text, expression=literal) 3837 3838 return self.expression(exp.Identifier, this=token.text) 3839 3840 def _parse_session_parameter(self) -> exp.SessionParameter: 3841 kind = None 3842 this = self._parse_id_var() or self._parse_primary() 3843 3844 if this and self._match(TokenType.DOT): 3845 kind = this.name 3846 this = self._parse_var() or self._parse_primary() 3847 3848 return self.expression(exp.SessionParameter, this=this, kind=kind) 3849 3850 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3851 index = self._index 3852 3853 if self._match(TokenType.L_PAREN): 3854 expressions = t.cast( 3855 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3856 ) 3857 3858 if not self._match(TokenType.R_PAREN): 3859 self._retreat(index) 3860 else: 3861 expressions = [self._parse_id_var()] 3862 3863 if self._match_set(self.LAMBDAS): 3864 return self.LAMBDAS[self._prev.token_type](self, expressions) 3865 3866 self._retreat(index) 3867 3868 this: t.Optional[exp.Expression] 3869 3870 if self._match(TokenType.DISTINCT): 3871 this = self.expression( 3872 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3873 ) 3874 else: 3875 this = self._parse_select_or_expression(alias=alias) 3876 3877 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3878 3879 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3880 index = self._index 3881 3882 if not self.errors: 3883 try: 3884 if self._parse_select(nested=True): 3885 return this 3886 except ParseError: 3887 pass 3888 finally: 3889 self.errors.clear() 3890 self._retreat(index) 3891 3892 if not self._match(TokenType.L_PAREN): 3893 return this 3894 3895 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3896 3897 self._match_r_paren() 3898 return self.expression(exp.Schema, this=this, expressions=args) 3899 3900 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3901 return self._parse_column_def(self._parse_field(any_token=True)) 3902 3903 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3904 # column defs are not really columns, they're identifiers 3905 if isinstance(this, exp.Column): 3906 this = this.this 3907 3908 kind = self._parse_types(schema=True) 3909 3910 if self._match_text_seq("FOR", "ORDINALITY"): 3911 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3912 3913 constraints: t.List[exp.Expression] = [] 3914 3915 if not kind and self._match(TokenType.ALIAS): 3916 constraints.append( 3917 self.expression( 3918 exp.ComputedColumnConstraint, 3919 this=self._parse_conjunction(), 3920 persisted=self._match_text_seq("PERSISTED"), 3921 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3922 ) 3923 ) 3924 3925 while True: 3926 constraint = self._parse_column_constraint() 3927 if not constraint: 3928 break 3929 constraints.append(constraint) 3930 3931 if not kind and not constraints: 3932 return this 3933 3934 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3935 3936 def _parse_auto_increment( 3937 self, 3938 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3939 start = None 3940 increment = None 3941 3942 if self._match(TokenType.L_PAREN, advance=False): 3943 args = self._parse_wrapped_csv(self._parse_bitwise) 3944 start = seq_get(args, 0) 3945 increment = seq_get(args, 1) 3946 elif self._match_text_seq("START"): 3947 start = self._parse_bitwise() 3948 self._match_text_seq("INCREMENT") 3949 increment = self._parse_bitwise() 3950 3951 if start and increment: 3952 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3953 3954 return exp.AutoIncrementColumnConstraint() 3955 3956 def _parse_compress(self) -> exp.CompressColumnConstraint: 3957 if self._match(TokenType.L_PAREN, advance=False): 3958 return self.expression( 3959 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3960 ) 3961 3962 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3963 3964 def _parse_generated_as_identity( 3965 self, 3966 ) -> ( 3967 exp.GeneratedAsIdentityColumnConstraint 3968 | exp.ComputedColumnConstraint 3969 | exp.GeneratedAsRowColumnConstraint 3970 ): 3971 if self._match_text_seq("BY", "DEFAULT"): 3972 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3973 this = self.expression( 3974 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3975 ) 3976 else: 3977 self._match_text_seq("ALWAYS") 3978 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3979 3980 self._match(TokenType.ALIAS) 3981 3982 if self._match_text_seq("ROW"): 3983 start = self._match_text_seq("START") 3984 if not start: 3985 self._match(TokenType.END) 3986 hidden = self._match_text_seq("HIDDEN") 3987 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 3988 3989 identity = self._match_text_seq("IDENTITY") 3990 3991 if self._match(TokenType.L_PAREN): 3992 if self._match(TokenType.START_WITH): 3993 this.set("start", self._parse_bitwise()) 3994 if self._match_text_seq("INCREMENT", "BY"): 3995 this.set("increment", self._parse_bitwise()) 3996 if self._match_text_seq("MINVALUE"): 3997 this.set("minvalue", self._parse_bitwise()) 3998 if self._match_text_seq("MAXVALUE"): 3999 this.set("maxvalue", self._parse_bitwise()) 4000 4001 if self._match_text_seq("CYCLE"): 4002 this.set("cycle", True) 4003 elif self._match_text_seq("NO", "CYCLE"): 4004 this.set("cycle", False) 4005 4006 if not identity: 4007 this.set("expression", self._parse_bitwise()) 4008 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4009 args = self._parse_csv(self._parse_bitwise) 4010 this.set("start", seq_get(args, 0)) 4011 this.set("increment", seq_get(args, 1)) 4012 4013 self._match_r_paren() 4014 4015 return this 4016 4017 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4018 self._match_text_seq("LENGTH") 4019 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4020 4021 def _parse_not_constraint( 4022 self, 4023 ) -> t.Optional[exp.Expression]: 4024 if self._match_text_seq("NULL"): 4025 return self.expression(exp.NotNullColumnConstraint) 4026 if self._match_text_seq("CASESPECIFIC"): 4027 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4028 if self._match_text_seq("FOR", "REPLICATION"): 4029 return self.expression(exp.NotForReplicationColumnConstraint) 4030 return None 4031 4032 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4033 if self._match(TokenType.CONSTRAINT): 4034 this = self._parse_id_var() 4035 else: 4036 this = None 4037 4038 if self._match_texts(self.CONSTRAINT_PARSERS): 4039 return self.expression( 4040 exp.ColumnConstraint, 4041 this=this, 4042 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4043 ) 4044 4045 return this 4046 4047 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4048 if not self._match(TokenType.CONSTRAINT): 4049 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4050 4051 this = self._parse_id_var() 4052 expressions = [] 4053 4054 while True: 4055 constraint = self._parse_unnamed_constraint() or self._parse_function() 4056 if not constraint: 4057 break 4058 expressions.append(constraint) 4059 4060 return self.expression(exp.Constraint, this=this, expressions=expressions) 4061 4062 def _parse_unnamed_constraint( 4063 self, constraints: t.Optional[t.Collection[str]] = None 4064 ) -> t.Optional[exp.Expression]: 4065 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4066 constraints or self.CONSTRAINT_PARSERS 4067 ): 4068 return None 4069 4070 constraint = self._prev.text.upper() 4071 if constraint not in self.CONSTRAINT_PARSERS: 4072 self.raise_error(f"No parser found for schema constraint {constraint}.") 4073 4074 return self.CONSTRAINT_PARSERS[constraint](self) 4075 4076 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4077 self._match_text_seq("KEY") 4078 return self.expression( 4079 exp.UniqueColumnConstraint, 4080 this=self._parse_schema(self._parse_id_var(any_token=False)), 4081 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4082 ) 4083 4084 def _parse_key_constraint_options(self) -> t.List[str]: 4085 options = [] 4086 while True: 4087 if not self._curr: 4088 break 4089 4090 if self._match(TokenType.ON): 4091 action = None 4092 on = self._advance_any() and self._prev.text 4093 4094 if self._match_text_seq("NO", "ACTION"): 4095 action = "NO ACTION" 4096 elif self._match_text_seq("CASCADE"): 4097 action = "CASCADE" 4098 elif self._match_text_seq("RESTRICT"): 4099 action = "RESTRICT" 4100 elif self._match_pair(TokenType.SET, TokenType.NULL): 4101 action = "SET NULL" 4102 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4103 action = "SET DEFAULT" 4104 else: 4105 self.raise_error("Invalid key constraint") 4106 4107 options.append(f"ON {on} {action}") 4108 elif self._match_text_seq("NOT", "ENFORCED"): 4109 options.append("NOT ENFORCED") 4110 elif self._match_text_seq("DEFERRABLE"): 4111 options.append("DEFERRABLE") 4112 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4113 options.append("INITIALLY DEFERRED") 4114 elif self._match_text_seq("NORELY"): 4115 options.append("NORELY") 4116 elif self._match_text_seq("MATCH", "FULL"): 4117 options.append("MATCH FULL") 4118 else: 4119 break 4120 4121 return options 4122 4123 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4124 if match and not self._match(TokenType.REFERENCES): 4125 return None 4126 4127 expressions = None 4128 this = self._parse_table(schema=True) 4129 options = self._parse_key_constraint_options() 4130 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4131 4132 def _parse_foreign_key(self) -> exp.ForeignKey: 4133 expressions = self._parse_wrapped_id_vars() 4134 reference = self._parse_references() 4135 options = {} 4136 4137 while self._match(TokenType.ON): 4138 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4139 self.raise_error("Expected DELETE or UPDATE") 4140 4141 kind = self._prev.text.lower() 4142 4143 if self._match_text_seq("NO", "ACTION"): 4144 action = "NO ACTION" 4145 elif self._match(TokenType.SET): 4146 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4147 action = "SET " + self._prev.text.upper() 4148 else: 4149 self._advance() 4150 action = self._prev.text.upper() 4151 4152 options[kind] = action 4153 4154 return self.expression( 4155 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4156 ) 4157 4158 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4159 return self._parse_field() 4160 4161 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4162 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4163 4164 id_vars = self._parse_wrapped_id_vars() 4165 return self.expression( 4166 exp.PeriodForSystemTimeConstraint, 4167 this=seq_get(id_vars, 0), 4168 expression=seq_get(id_vars, 1), 4169 ) 4170 4171 def _parse_primary_key( 4172 self, wrapped_optional: bool = False, in_props: bool = False 4173 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4174 desc = ( 4175 self._match_set((TokenType.ASC, TokenType.DESC)) 4176 and self._prev.token_type == TokenType.DESC 4177 ) 4178 4179 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4180 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4181 4182 expressions = self._parse_wrapped_csv( 4183 self._parse_primary_key_part, optional=wrapped_optional 4184 ) 4185 options = self._parse_key_constraint_options() 4186 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4187 4188 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4189 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4190 return this 4191 4192 bracket_kind = self._prev.token_type 4193 4194 if self._match(TokenType.COLON): 4195 expressions: t.List[exp.Expression] = [ 4196 self.expression(exp.Slice, expression=self._parse_conjunction()) 4197 ] 4198 else: 4199 expressions = self._parse_csv( 4200 lambda: self._parse_slice( 4201 self._parse_alias(self._parse_conjunction(), explicit=True) 4202 ) 4203 ) 4204 4205 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4206 self.raise_error("Expected ]") 4207 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4208 self.raise_error("Expected }") 4209 4210 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4211 if bracket_kind == TokenType.L_BRACE: 4212 this = self.expression(exp.Struct, expressions=expressions) 4213 elif not this or this.name.upper() == "ARRAY": 4214 this = self.expression(exp.Array, expressions=expressions) 4215 else: 4216 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4217 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4218 4219 self._add_comments(this) 4220 return self._parse_bracket(this) 4221 4222 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4223 if self._match(TokenType.COLON): 4224 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4225 return this 4226 4227 def _parse_case(self) -> t.Optional[exp.Expression]: 4228 ifs = [] 4229 default = None 4230 4231 comments = self._prev_comments 4232 expression = self._parse_conjunction() 4233 4234 while self._match(TokenType.WHEN): 4235 this = self._parse_conjunction() 4236 self._match(TokenType.THEN) 4237 then = self._parse_conjunction() 4238 ifs.append(self.expression(exp.If, this=this, true=then)) 4239 4240 if self._match(TokenType.ELSE): 4241 default = self._parse_conjunction() 4242 4243 if not self._match(TokenType.END): 4244 self.raise_error("Expected END after CASE", self._prev) 4245 4246 return self._parse_window( 4247 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4248 ) 4249 4250 def _parse_if(self) -> t.Optional[exp.Expression]: 4251 if self._match(TokenType.L_PAREN): 4252 args = self._parse_csv(self._parse_conjunction) 4253 this = self.validate_expression(exp.If.from_arg_list(args), args) 4254 self._match_r_paren() 4255 else: 4256 index = self._index - 1 4257 condition = self._parse_conjunction() 4258 4259 if not condition: 4260 self._retreat(index) 4261 return None 4262 4263 self._match(TokenType.THEN) 4264 true = self._parse_conjunction() 4265 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4266 self._match(TokenType.END) 4267 this = self.expression(exp.If, this=condition, true=true, false=false) 4268 4269 return self._parse_window(this) 4270 4271 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4272 if not self._match_text_seq("VALUE", "FOR"): 4273 self._retreat(self._index - 1) 4274 return None 4275 4276 return self.expression( 4277 exp.NextValueFor, 4278 this=self._parse_column(), 4279 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4280 ) 4281 4282 def _parse_extract(self) -> exp.Extract: 4283 this = self._parse_function() or self._parse_var() or self._parse_type() 4284 4285 if self._match(TokenType.FROM): 4286 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4287 4288 if not self._match(TokenType.COMMA): 4289 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4290 4291 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4292 4293 def _parse_any_value(self) -> exp.AnyValue: 4294 this = self._parse_lambda() 4295 is_max = None 4296 having = None 4297 4298 if self._match(TokenType.HAVING): 4299 self._match_texts(("MAX", "MIN")) 4300 is_max = self._prev.text == "MAX" 4301 having = self._parse_column() 4302 4303 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4304 4305 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4306 this = self._parse_conjunction() 4307 4308 if not self._match(TokenType.ALIAS): 4309 if self._match(TokenType.COMMA): 4310 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4311 4312 self.raise_error("Expected AS after CAST") 4313 4314 fmt = None 4315 to = self._parse_types() 4316 4317 if self._match(TokenType.FORMAT): 4318 fmt_string = self._parse_string() 4319 fmt = self._parse_at_time_zone(fmt_string) 4320 4321 if not to: 4322 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4323 if to.this in exp.DataType.TEMPORAL_TYPES: 4324 this = self.expression( 4325 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4326 this=this, 4327 format=exp.Literal.string( 4328 format_time( 4329 fmt_string.this if fmt_string else "", 4330 self.FORMAT_MAPPING or self.TIME_MAPPING, 4331 self.FORMAT_TRIE or self.TIME_TRIE, 4332 ) 4333 ), 4334 ) 4335 4336 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4337 this.set("zone", fmt.args["zone"]) 4338 return this 4339 elif not to: 4340 self.raise_error("Expected TYPE after CAST") 4341 elif isinstance(to, exp.Identifier): 4342 to = exp.DataType.build(to.name, udt=True) 4343 elif to.this == exp.DataType.Type.CHAR: 4344 if self._match(TokenType.CHARACTER_SET): 4345 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4346 4347 return self.expression( 4348 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4349 ) 4350 4351 def _parse_concat(self) -> t.Optional[exp.Expression]: 4352 args = self._parse_csv(self._parse_conjunction) 4353 if self.CONCAT_NULL_OUTPUTS_STRING: 4354 args = self._ensure_string_if_null(args) 4355 4356 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4357 # we find such a call we replace it with its argument. 4358 if len(args) == 1: 4359 return args[0] 4360 4361 return self.expression( 4362 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4363 ) 4364 4365 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4366 args = self._parse_csv(self._parse_conjunction) 4367 if len(args) < 2: 4368 return self.expression(exp.ConcatWs, expressions=args) 4369 delim, *values = args 4370 if self.CONCAT_NULL_OUTPUTS_STRING: 4371 values = self._ensure_string_if_null(values) 4372 4373 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4374 4375 def _parse_string_agg(self) -> exp.Expression: 4376 if self._match(TokenType.DISTINCT): 4377 args: t.List[t.Optional[exp.Expression]] = [ 4378 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4379 ] 4380 if self._match(TokenType.COMMA): 4381 args.extend(self._parse_csv(self._parse_conjunction)) 4382 else: 4383 args = self._parse_csv(self._parse_conjunction) # type: ignore 4384 4385 index = self._index 4386 if not self._match(TokenType.R_PAREN) and args: 4387 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4388 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4389 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4390 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4391 4392 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4393 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4394 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4395 if not self._match_text_seq("WITHIN", "GROUP"): 4396 self._retreat(index) 4397 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4398 4399 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4400 order = self._parse_order(this=seq_get(args, 0)) 4401 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4402 4403 def _parse_convert( 4404 self, strict: bool, safe: t.Optional[bool] = None 4405 ) -> t.Optional[exp.Expression]: 4406 this = self._parse_bitwise() 4407 4408 if self._match(TokenType.USING): 4409 to: t.Optional[exp.Expression] = self.expression( 4410 exp.CharacterSet, this=self._parse_var() 4411 ) 4412 elif self._match(TokenType.COMMA): 4413 to = self._parse_types() 4414 else: 4415 to = None 4416 4417 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4418 4419 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4420 """ 4421 There are generally two variants of the DECODE function: 4422 4423 - DECODE(bin, charset) 4424 - DECODE(expression, search, result [, search, result] ... [, default]) 4425 4426 The second variant will always be parsed into a CASE expression. Note that NULL 4427 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4428 instead of relying on pattern matching. 4429 """ 4430 args = self._parse_csv(self._parse_conjunction) 4431 4432 if len(args) < 3: 4433 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4434 4435 expression, *expressions = args 4436 if not expression: 4437 return None 4438 4439 ifs = [] 4440 for search, result in zip(expressions[::2], expressions[1::2]): 4441 if not search or not result: 4442 return None 4443 4444 if isinstance(search, exp.Literal): 4445 ifs.append( 4446 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4447 ) 4448 elif isinstance(search, exp.Null): 4449 ifs.append( 4450 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4451 ) 4452 else: 4453 cond = exp.or_( 4454 exp.EQ(this=expression.copy(), expression=search), 4455 exp.and_( 4456 exp.Is(this=expression.copy(), expression=exp.Null()), 4457 exp.Is(this=search.copy(), expression=exp.Null()), 4458 copy=False, 4459 ), 4460 copy=False, 4461 ) 4462 ifs.append(exp.If(this=cond, true=result)) 4463 4464 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4465 4466 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4467 self._match_text_seq("KEY") 4468 key = self._parse_column() 4469 self._match_set((TokenType.COLON, TokenType.COMMA)) 4470 self._match_text_seq("VALUE") 4471 value = self._parse_bitwise() 4472 4473 if not key and not value: 4474 return None 4475 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4476 4477 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4478 if not this or not self._match_text_seq("FORMAT", "JSON"): 4479 return this 4480 4481 return self.expression(exp.FormatJson, this=this) 4482 4483 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4484 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4485 for value in values: 4486 if self._match_text_seq(value, "ON", on): 4487 return f"{value} ON {on}" 4488 4489 return None 4490 4491 def _parse_json_object(self) -> exp.JSONObject: 4492 star = self._parse_star() 4493 expressions = ( 4494 [star] 4495 if star 4496 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4497 ) 4498 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4499 4500 unique_keys = None 4501 if self._match_text_seq("WITH", "UNIQUE"): 4502 unique_keys = True 4503 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4504 unique_keys = False 4505 4506 self._match_text_seq("KEYS") 4507 4508 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4509 self._parse_type() 4510 ) 4511 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4512 4513 return self.expression( 4514 exp.JSONObject, 4515 expressions=expressions, 4516 null_handling=null_handling, 4517 unique_keys=unique_keys, 4518 return_type=return_type, 4519 encoding=encoding, 4520 ) 4521 4522 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4523 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4524 if not self._match_text_seq("NESTED"): 4525 this = self._parse_id_var() 4526 kind = self._parse_types(allow_identifiers=False) 4527 nested = None 4528 else: 4529 this = None 4530 kind = None 4531 nested = True 4532 4533 path = self._match_text_seq("PATH") and self._parse_string() 4534 nested_schema = nested and self._parse_json_schema() 4535 4536 return self.expression( 4537 exp.JSONColumnDef, 4538 this=this, 4539 kind=kind, 4540 path=path, 4541 nested_schema=nested_schema, 4542 ) 4543 4544 def _parse_json_schema(self) -> exp.JSONSchema: 4545 self._match_text_seq("COLUMNS") 4546 return self.expression( 4547 exp.JSONSchema, 4548 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4549 ) 4550 4551 def _parse_json_table(self) -> exp.JSONTable: 4552 this = self._parse_format_json(self._parse_bitwise()) 4553 path = self._match(TokenType.COMMA) and self._parse_string() 4554 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4555 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4556 schema = self._parse_json_schema() 4557 4558 return exp.JSONTable( 4559 this=this, 4560 schema=schema, 4561 path=path, 4562 error_handling=error_handling, 4563 empty_handling=empty_handling, 4564 ) 4565 4566 def _parse_logarithm(self) -> exp.Func: 4567 # Default argument order is base, expression 4568 args = self._parse_csv(self._parse_range) 4569 4570 if len(args) > 1: 4571 if not self.LOG_BASE_FIRST: 4572 args.reverse() 4573 return exp.Log.from_arg_list(args) 4574 4575 return self.expression( 4576 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4577 ) 4578 4579 def _parse_match_against(self) -> exp.MatchAgainst: 4580 expressions = self._parse_csv(self._parse_column) 4581 4582 self._match_text_seq(")", "AGAINST", "(") 4583 4584 this = self._parse_string() 4585 4586 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4587 modifier = "IN NATURAL LANGUAGE MODE" 4588 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4589 modifier = f"{modifier} WITH QUERY EXPANSION" 4590 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4591 modifier = "IN BOOLEAN MODE" 4592 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4593 modifier = "WITH QUERY EXPANSION" 4594 else: 4595 modifier = None 4596 4597 return self.expression( 4598 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4599 ) 4600 4601 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4602 def _parse_open_json(self) -> exp.OpenJSON: 4603 this = self._parse_bitwise() 4604 path = self._match(TokenType.COMMA) and self._parse_string() 4605 4606 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4607 this = self._parse_field(any_token=True) 4608 kind = self._parse_types() 4609 path = self._parse_string() 4610 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4611 4612 return self.expression( 4613 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4614 ) 4615 4616 expressions = None 4617 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4618 self._match_l_paren() 4619 expressions = self._parse_csv(_parse_open_json_column_def) 4620 4621 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4622 4623 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4624 args = self._parse_csv(self._parse_bitwise) 4625 4626 if self._match(TokenType.IN): 4627 return self.expression( 4628 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4629 ) 4630 4631 if haystack_first: 4632 haystack = seq_get(args, 0) 4633 needle = seq_get(args, 1) 4634 else: 4635 needle = seq_get(args, 0) 4636 haystack = seq_get(args, 1) 4637 4638 return self.expression( 4639 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4640 ) 4641 4642 def _parse_predict(self) -> exp.Predict: 4643 self._match_text_seq("MODEL") 4644 this = self._parse_table() 4645 4646 self._match(TokenType.COMMA) 4647 self._match_text_seq("TABLE") 4648 4649 return self.expression( 4650 exp.Predict, 4651 this=this, 4652 expression=self._parse_table(), 4653 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4654 ) 4655 4656 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4657 args = self._parse_csv(self._parse_table) 4658 return exp.JoinHint(this=func_name.upper(), expressions=args) 4659 4660 def _parse_substring(self) -> exp.Substring: 4661 # Postgres supports the form: substring(string [from int] [for int]) 4662 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4663 4664 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4665 4666 if self._match(TokenType.FROM): 4667 args.append(self._parse_bitwise()) 4668 if self._match(TokenType.FOR): 4669 args.append(self._parse_bitwise()) 4670 4671 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4672 4673 def _parse_trim(self) -> exp.Trim: 4674 # https://www.w3resource.com/sql/character-functions/trim.php 4675 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4676 4677 position = None 4678 collation = None 4679 expression = None 4680 4681 if self._match_texts(self.TRIM_TYPES): 4682 position = self._prev.text.upper() 4683 4684 this = self._parse_bitwise() 4685 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4686 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4687 expression = self._parse_bitwise() 4688 4689 if invert_order: 4690 this, expression = expression, this 4691 4692 if self._match(TokenType.COLLATE): 4693 collation = self._parse_bitwise() 4694 4695 return self.expression( 4696 exp.Trim, this=this, position=position, expression=expression, collation=collation 4697 ) 4698 4699 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4700 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4701 4702 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4703 return self._parse_window(self._parse_id_var(), alias=True) 4704 4705 def _parse_respect_or_ignore_nulls( 4706 self, this: t.Optional[exp.Expression] 4707 ) -> t.Optional[exp.Expression]: 4708 if self._match_text_seq("IGNORE", "NULLS"): 4709 return self.expression(exp.IgnoreNulls, this=this) 4710 if self._match_text_seq("RESPECT", "NULLS"): 4711 return self.expression(exp.RespectNulls, this=this) 4712 return this 4713 4714 def _parse_window( 4715 self, this: t.Optional[exp.Expression], alias: bool = False 4716 ) -> t.Optional[exp.Expression]: 4717 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4718 self._match(TokenType.WHERE) 4719 this = self.expression( 4720 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4721 ) 4722 self._match_r_paren() 4723 4724 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4725 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4726 if self._match_text_seq("WITHIN", "GROUP"): 4727 order = self._parse_wrapped(self._parse_order) 4728 this = self.expression(exp.WithinGroup, this=this, expression=order) 4729 4730 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4731 # Some dialects choose to implement and some do not. 4732 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4733 4734 # There is some code above in _parse_lambda that handles 4735 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4736 4737 # The below changes handle 4738 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4739 4740 # Oracle allows both formats 4741 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4742 # and Snowflake chose to do the same for familiarity 4743 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4744 this = self._parse_respect_or_ignore_nulls(this) 4745 4746 # bigquery select from window x AS (partition by ...) 4747 if alias: 4748 over = None 4749 self._match(TokenType.ALIAS) 4750 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4751 return this 4752 else: 4753 over = self._prev.text.upper() 4754 4755 if not self._match(TokenType.L_PAREN): 4756 return self.expression( 4757 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4758 ) 4759 4760 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4761 4762 first = self._match(TokenType.FIRST) 4763 if self._match_text_seq("LAST"): 4764 first = False 4765 4766 partition, order = self._parse_partition_and_order() 4767 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4768 4769 if kind: 4770 self._match(TokenType.BETWEEN) 4771 start = self._parse_window_spec() 4772 self._match(TokenType.AND) 4773 end = self._parse_window_spec() 4774 4775 spec = self.expression( 4776 exp.WindowSpec, 4777 kind=kind, 4778 start=start["value"], 4779 start_side=start["side"], 4780 end=end["value"], 4781 end_side=end["side"], 4782 ) 4783 else: 4784 spec = None 4785 4786 self._match_r_paren() 4787 4788 window = self.expression( 4789 exp.Window, 4790 this=this, 4791 partition_by=partition, 4792 order=order, 4793 spec=spec, 4794 alias=window_alias, 4795 over=over, 4796 first=first, 4797 ) 4798 4799 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4800 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4801 return self._parse_window(window, alias=alias) 4802 4803 return window 4804 4805 def _parse_partition_and_order( 4806 self, 4807 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4808 return self._parse_partition_by(), self._parse_order() 4809 4810 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4811 self._match(TokenType.BETWEEN) 4812 4813 return { 4814 "value": ( 4815 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4816 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4817 or self._parse_bitwise() 4818 ), 4819 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4820 } 4821 4822 def _parse_alias( 4823 self, this: t.Optional[exp.Expression], explicit: bool = False 4824 ) -> t.Optional[exp.Expression]: 4825 any_token = self._match(TokenType.ALIAS) 4826 4827 if explicit and not any_token: 4828 return this 4829 4830 if self._match(TokenType.L_PAREN): 4831 aliases = self.expression( 4832 exp.Aliases, 4833 this=this, 4834 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4835 ) 4836 self._match_r_paren(aliases) 4837 return aliases 4838 4839 alias = self._parse_id_var(any_token) 4840 4841 if alias: 4842 return self.expression(exp.Alias, this=this, alias=alias) 4843 4844 return this 4845 4846 def _parse_id_var( 4847 self, 4848 any_token: bool = True, 4849 tokens: t.Optional[t.Collection[TokenType]] = None, 4850 ) -> t.Optional[exp.Expression]: 4851 identifier = self._parse_identifier() 4852 4853 if identifier: 4854 return identifier 4855 4856 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4857 quoted = self._prev.token_type == TokenType.STRING 4858 return exp.Identifier(this=self._prev.text, quoted=quoted) 4859 4860 return None 4861 4862 def _parse_string(self) -> t.Optional[exp.Expression]: 4863 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4864 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4865 return self._parse_placeholder() 4866 4867 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4868 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4869 4870 def _parse_number(self) -> t.Optional[exp.Expression]: 4871 if self._match(TokenType.NUMBER): 4872 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4873 return self._parse_placeholder() 4874 4875 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4876 if self._match(TokenType.IDENTIFIER): 4877 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4878 return self._parse_placeholder() 4879 4880 def _parse_var( 4881 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4882 ) -> t.Optional[exp.Expression]: 4883 if ( 4884 (any_token and self._advance_any()) 4885 or self._match(TokenType.VAR) 4886 or (self._match_set(tokens) if tokens else False) 4887 ): 4888 return self.expression(exp.Var, this=self._prev.text) 4889 return self._parse_placeholder() 4890 4891 def _advance_any(self) -> t.Optional[Token]: 4892 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4893 self._advance() 4894 return self._prev 4895 return None 4896 4897 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4898 return self._parse_var() or self._parse_string() 4899 4900 def _parse_null(self) -> t.Optional[exp.Expression]: 4901 if self._match_set(self.NULL_TOKENS): 4902 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4903 return self._parse_placeholder() 4904 4905 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4906 if self._match(TokenType.TRUE): 4907 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4908 if self._match(TokenType.FALSE): 4909 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4910 return self._parse_placeholder() 4911 4912 def _parse_star(self) -> t.Optional[exp.Expression]: 4913 if self._match(TokenType.STAR): 4914 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4915 return self._parse_placeholder() 4916 4917 def _parse_parameter(self) -> exp.Parameter: 4918 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4919 return ( 4920 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4921 ) 4922 4923 self._match(TokenType.L_BRACE) 4924 this = _parse_parameter_part() 4925 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4926 self._match(TokenType.R_BRACE) 4927 4928 return self.expression(exp.Parameter, this=this, expression=expression) 4929 4930 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4931 if self._match_set(self.PLACEHOLDER_PARSERS): 4932 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4933 if placeholder: 4934 return placeholder 4935 self._advance(-1) 4936 return None 4937 4938 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4939 if not self._match(TokenType.EXCEPT): 4940 return None 4941 if self._match(TokenType.L_PAREN, advance=False): 4942 return self._parse_wrapped_csv(self._parse_column) 4943 4944 except_column = self._parse_column() 4945 return [except_column] if except_column else None 4946 4947 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4948 if not self._match(TokenType.REPLACE): 4949 return None 4950 if self._match(TokenType.L_PAREN, advance=False): 4951 return self._parse_wrapped_csv(self._parse_expression) 4952 4953 replace_expression = self._parse_expression() 4954 return [replace_expression] if replace_expression else None 4955 4956 def _parse_csv( 4957 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4958 ) -> t.List[exp.Expression]: 4959 parse_result = parse_method() 4960 items = [parse_result] if parse_result is not None else [] 4961 4962 while self._match(sep): 4963 self._add_comments(parse_result) 4964 parse_result = parse_method() 4965 if parse_result is not None: 4966 items.append(parse_result) 4967 4968 return items 4969 4970 def _parse_tokens( 4971 self, parse_method: t.Callable, expressions: t.Dict 4972 ) -> t.Optional[exp.Expression]: 4973 this = parse_method() 4974 4975 while self._match_set(expressions): 4976 this = self.expression( 4977 expressions[self._prev.token_type], 4978 this=this, 4979 comments=self._prev_comments, 4980 expression=parse_method(), 4981 ) 4982 4983 return this 4984 4985 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4986 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4987 4988 def _parse_wrapped_csv( 4989 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4990 ) -> t.List[exp.Expression]: 4991 return self._parse_wrapped( 4992 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4993 ) 4994 4995 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4996 wrapped = self._match(TokenType.L_PAREN) 4997 if not wrapped and not optional: 4998 self.raise_error("Expecting (") 4999 parse_result = parse_method() 5000 if wrapped: 5001 self._match_r_paren() 5002 return parse_result 5003 5004 def _parse_expressions(self) -> t.List[exp.Expression]: 5005 return self._parse_csv(self._parse_expression) 5006 5007 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5008 return self._parse_select() or self._parse_set_operations( 5009 self._parse_expression() if alias else self._parse_conjunction() 5010 ) 5011 5012 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5013 return self._parse_query_modifiers( 5014 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5015 ) 5016 5017 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5018 this = None 5019 if self._match_texts(self.TRANSACTION_KIND): 5020 this = self._prev.text 5021 5022 self._match_texts(("TRANSACTION", "WORK")) 5023 5024 modes = [] 5025 while True: 5026 mode = [] 5027 while self._match(TokenType.VAR): 5028 mode.append(self._prev.text) 5029 5030 if mode: 5031 modes.append(" ".join(mode)) 5032 if not self._match(TokenType.COMMA): 5033 break 5034 5035 return self.expression(exp.Transaction, this=this, modes=modes) 5036 5037 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5038 chain = None 5039 savepoint = None 5040 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5041 5042 self._match_texts(("TRANSACTION", "WORK")) 5043 5044 if self._match_text_seq("TO"): 5045 self._match_text_seq("SAVEPOINT") 5046 savepoint = self._parse_id_var() 5047 5048 if self._match(TokenType.AND): 5049 chain = not self._match_text_seq("NO") 5050 self._match_text_seq("CHAIN") 5051 5052 if is_rollback: 5053 return self.expression(exp.Rollback, savepoint=savepoint) 5054 5055 return self.expression(exp.Commit, chain=chain) 5056 5057 def _parse_refresh(self) -> exp.Refresh: 5058 self._match(TokenType.TABLE) 5059 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5060 5061 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5062 if not self._match_text_seq("ADD"): 5063 return None 5064 5065 self._match(TokenType.COLUMN) 5066 exists_column = self._parse_exists(not_=True) 5067 expression = self._parse_field_def() 5068 5069 if expression: 5070 expression.set("exists", exists_column) 5071 5072 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5073 if self._match_texts(("FIRST", "AFTER")): 5074 position = self._prev.text 5075 column_position = self.expression( 5076 exp.ColumnPosition, this=self._parse_column(), position=position 5077 ) 5078 expression.set("position", column_position) 5079 5080 return expression 5081 5082 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5083 drop = self._match(TokenType.DROP) and self._parse_drop() 5084 if drop and not isinstance(drop, exp.Command): 5085 drop.set("kind", drop.args.get("kind", "COLUMN")) 5086 return drop 5087 5088 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5089 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5090 return self.expression( 5091 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5092 ) 5093 5094 def _parse_add_constraint(self) -> exp.AddConstraint: 5095 this = None 5096 kind = self._prev.token_type 5097 5098 if kind == TokenType.CONSTRAINT: 5099 this = self._parse_id_var() 5100 5101 if self._match_text_seq("CHECK"): 5102 expression = self._parse_wrapped(self._parse_conjunction) 5103 enforced = self._match_text_seq("ENFORCED") 5104 5105 return self.expression( 5106 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5107 ) 5108 5109 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5110 expression = self._parse_foreign_key() 5111 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5112 expression = self._parse_primary_key() 5113 else: 5114 expression = None 5115 5116 return self.expression(exp.AddConstraint, this=this, expression=expression) 5117 5118 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5119 index = self._index - 1 5120 5121 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5122 return self._parse_csv(self._parse_add_constraint) 5123 5124 self._retreat(index) 5125 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5126 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5127 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5128 5129 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5130 self._match(TokenType.COLUMN) 5131 column = self._parse_field(any_token=True) 5132 5133 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5134 return self.expression(exp.AlterColumn, this=column, drop=True) 5135 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5136 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5137 5138 self._match_text_seq("SET", "DATA") 5139 return self.expression( 5140 exp.AlterColumn, 5141 this=column, 5142 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5143 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5144 using=self._match(TokenType.USING) and self._parse_conjunction(), 5145 ) 5146 5147 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5148 index = self._index - 1 5149 5150 partition_exists = self._parse_exists() 5151 if self._match(TokenType.PARTITION, advance=False): 5152 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5153 5154 self._retreat(index) 5155 return self._parse_csv(self._parse_drop_column) 5156 5157 def _parse_alter_table_rename(self) -> exp.RenameTable: 5158 self._match_text_seq("TO") 5159 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5160 5161 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5162 start = self._prev 5163 5164 if not self._match(TokenType.TABLE): 5165 return self._parse_as_command(start) 5166 5167 exists = self._parse_exists() 5168 only = self._match_text_seq("ONLY") 5169 this = self._parse_table(schema=True) 5170 5171 if self._next: 5172 self._advance() 5173 5174 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5175 if parser: 5176 actions = ensure_list(parser(self)) 5177 5178 if not self._curr: 5179 return self.expression( 5180 exp.AlterTable, 5181 this=this, 5182 exists=exists, 5183 actions=actions, 5184 only=only, 5185 ) 5186 5187 return self._parse_as_command(start) 5188 5189 def _parse_merge(self) -> exp.Merge: 5190 self._match(TokenType.INTO) 5191 target = self._parse_table() 5192 5193 if target and self._match(TokenType.ALIAS, advance=False): 5194 target.set("alias", self._parse_table_alias()) 5195 5196 self._match(TokenType.USING) 5197 using = self._parse_table() 5198 5199 self._match(TokenType.ON) 5200 on = self._parse_conjunction() 5201 5202 return self.expression( 5203 exp.Merge, 5204 this=target, 5205 using=using, 5206 on=on, 5207 expressions=self._parse_when_matched(), 5208 ) 5209 5210 def _parse_when_matched(self) -> t.List[exp.When]: 5211 whens = [] 5212 5213 while self._match(TokenType.WHEN): 5214 matched = not self._match(TokenType.NOT) 5215 self._match_text_seq("MATCHED") 5216 source = ( 5217 False 5218 if self._match_text_seq("BY", "TARGET") 5219 else self._match_text_seq("BY", "SOURCE") 5220 ) 5221 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5222 5223 self._match(TokenType.THEN) 5224 5225 if self._match(TokenType.INSERT): 5226 _this = self._parse_star() 5227 if _this: 5228 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5229 else: 5230 then = self.expression( 5231 exp.Insert, 5232 this=self._parse_value(), 5233 expression=self._match(TokenType.VALUES) and self._parse_value(), 5234 ) 5235 elif self._match(TokenType.UPDATE): 5236 expressions = self._parse_star() 5237 if expressions: 5238 then = self.expression(exp.Update, expressions=expressions) 5239 else: 5240 then = self.expression( 5241 exp.Update, 5242 expressions=self._match(TokenType.SET) 5243 and self._parse_csv(self._parse_equality), 5244 ) 5245 elif self._match(TokenType.DELETE): 5246 then = self.expression(exp.Var, this=self._prev.text) 5247 else: 5248 then = None 5249 5250 whens.append( 5251 self.expression( 5252 exp.When, 5253 matched=matched, 5254 source=source, 5255 condition=condition, 5256 then=then, 5257 ) 5258 ) 5259 return whens 5260 5261 def _parse_show(self) -> t.Optional[exp.Expression]: 5262 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5263 if parser: 5264 return parser(self) 5265 return self._parse_as_command(self._prev) 5266 5267 def _parse_set_item_assignment( 5268 self, kind: t.Optional[str] = None 5269 ) -> t.Optional[exp.Expression]: 5270 index = self._index 5271 5272 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5273 return self._parse_set_transaction(global_=kind == "GLOBAL") 5274 5275 left = self._parse_primary() or self._parse_id_var() 5276 assignment_delimiter = self._match_texts(("=", "TO")) 5277 5278 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5279 self._retreat(index) 5280 return None 5281 5282 right = self._parse_statement() or self._parse_id_var() 5283 this = self.expression(exp.EQ, this=left, expression=right) 5284 5285 return self.expression(exp.SetItem, this=this, kind=kind) 5286 5287 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5288 self._match_text_seq("TRANSACTION") 5289 characteristics = self._parse_csv( 5290 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5291 ) 5292 return self.expression( 5293 exp.SetItem, 5294 expressions=characteristics, 5295 kind="TRANSACTION", 5296 **{"global": global_}, # type: ignore 5297 ) 5298 5299 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5300 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5301 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5302 5303 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5304 index = self._index 5305 set_ = self.expression( 5306 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5307 ) 5308 5309 if self._curr: 5310 self._retreat(index) 5311 return self._parse_as_command(self._prev) 5312 5313 return set_ 5314 5315 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5316 for option in options: 5317 if self._match_text_seq(*option.split(" ")): 5318 return exp.var(option) 5319 return None 5320 5321 def _parse_as_command(self, start: Token) -> exp.Command: 5322 while self._curr: 5323 self._advance() 5324 text = self._find_sql(start, self._prev) 5325 size = len(start.text) 5326 return exp.Command(this=text[:size], expression=text[size:]) 5327 5328 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5329 settings = [] 5330 5331 self._match_l_paren() 5332 kind = self._parse_id_var() 5333 5334 if self._match(TokenType.L_PAREN): 5335 while True: 5336 key = self._parse_id_var() 5337 value = self._parse_primary() 5338 5339 if not key and value is None: 5340 break 5341 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5342 self._match(TokenType.R_PAREN) 5343 5344 self._match_r_paren() 5345 5346 return self.expression( 5347 exp.DictProperty, 5348 this=this, 5349 kind=kind.this if kind else None, 5350 settings=settings, 5351 ) 5352 5353 def _parse_dict_range(self, this: str) -> exp.DictRange: 5354 self._match_l_paren() 5355 has_min = self._match_text_seq("MIN") 5356 if has_min: 5357 min = self._parse_var() or self._parse_primary() 5358 self._match_text_seq("MAX") 5359 max = self._parse_var() or self._parse_primary() 5360 else: 5361 max = self._parse_var() or self._parse_primary() 5362 min = exp.Literal.number(0) 5363 self._match_r_paren() 5364 return self.expression(exp.DictRange, this=this, min=min, max=max) 5365 5366 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5367 index = self._index 5368 expression = self._parse_column() 5369 if not self._match(TokenType.IN): 5370 self._retreat(index - 1) 5371 return None 5372 iterator = self._parse_column() 5373 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5374 return self.expression( 5375 exp.Comprehension, 5376 this=this, 5377 expression=expression, 5378 iterator=iterator, 5379 condition=condition, 5380 ) 5381 5382 def _find_parser( 5383 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5384 ) -> t.Optional[t.Callable]: 5385 if not self._curr: 5386 return None 5387 5388 index = self._index 5389 this = [] 5390 while True: 5391 # The current token might be multiple words 5392 curr = self._curr.text.upper() 5393 key = curr.split(" ") 5394 this.append(curr) 5395 5396 self._advance() 5397 result, trie = in_trie(trie, key) 5398 if result == TrieResult.FAILED: 5399 break 5400 5401 if result == TrieResult.EXISTS: 5402 subparser = parsers[" ".join(this)] 5403 return subparser 5404 5405 self._retreat(index) 5406 return None 5407 5408 def _match(self, token_type, advance=True, expression=None): 5409 if not self._curr: 5410 return None 5411 5412 if self._curr.token_type == token_type: 5413 if advance: 5414 self._advance() 5415 self._add_comments(expression) 5416 return True 5417 5418 return None 5419 5420 def _match_set(self, types, advance=True): 5421 if not self._curr: 5422 return None 5423 5424 if self._curr.token_type in types: 5425 if advance: 5426 self._advance() 5427 return True 5428 5429 return None 5430 5431 def _match_pair(self, token_type_a, token_type_b, advance=True): 5432 if not self._curr or not self._next: 5433 return None 5434 5435 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5436 if advance: 5437 self._advance(2) 5438 return True 5439 5440 return None 5441 5442 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5443 if not self._match(TokenType.L_PAREN, expression=expression): 5444 self.raise_error("Expecting (") 5445 5446 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5447 if not self._match(TokenType.R_PAREN, expression=expression): 5448 self.raise_error("Expecting )") 5449 5450 def _match_texts(self, texts, advance=True): 5451 if self._curr and self._curr.text.upper() in texts: 5452 if advance: 5453 self._advance() 5454 return True 5455 return False 5456 5457 def _match_text_seq(self, *texts, advance=True): 5458 index = self._index 5459 for text in texts: 5460 if self._curr and self._curr.text.upper() == text: 5461 self._advance() 5462 else: 5463 self._retreat(index) 5464 return False 5465 5466 if not advance: 5467 self._retreat(index) 5468 5469 return True 5470 5471 @t.overload 5472 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5473 ... 5474 5475 @t.overload 5476 def _replace_columns_with_dots( 5477 self, this: t.Optional[exp.Expression] 5478 ) -> t.Optional[exp.Expression]: 5479 ... 5480 5481 def _replace_columns_with_dots(self, this): 5482 if isinstance(this, exp.Dot): 5483 exp.replace_children(this, self._replace_columns_with_dots) 5484 elif isinstance(this, exp.Column): 5485 exp.replace_children(this, self._replace_columns_with_dots) 5486 table = this.args.get("table") 5487 this = ( 5488 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5489 ) 5490 5491 return this 5492 5493 def _replace_lambda( 5494 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5495 ) -> t.Optional[exp.Expression]: 5496 if not node: 5497 return node 5498 5499 for column in node.find_all(exp.Column): 5500 if column.parts[0].name in lambda_variables: 5501 dot_or_id = column.to_dot() if column.table else column.this 5502 parent = column.parent 5503 5504 while isinstance(parent, exp.Dot): 5505 if not isinstance(parent.parent, exp.Dot): 5506 parent.replace(dot_or_id) 5507 break 5508 parent = parent.parent 5509 else: 5510 if column is node: 5511 node = dot_or_id 5512 else: 5513 column.replace(dot_or_id) 5514 return node 5515 5516 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5517 return [ 5518 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5519 for value in values 5520 if value 5521 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
972 def __init__( 973 self, 974 error_level: t.Optional[ErrorLevel] = None, 975 error_message_context: int = 100, 976 max_errors: int = 3, 977 ): 978 self.error_level = error_level or ErrorLevel.IMMEDIATE 979 self.error_message_context = error_message_context 980 self.max_errors = max_errors 981 self._tokenizer = self.TOKENIZER_CLASS() 982 self.reset()
994 def parse( 995 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 996 ) -> t.List[t.Optional[exp.Expression]]: 997 """ 998 Parses a list of tokens and returns a list of syntax trees, one tree 999 per parsed SQL statement. 1000 1001 Args: 1002 raw_tokens: The list of tokens. 1003 sql: The original SQL string, used to produce helpful debug messages. 1004 1005 Returns: 1006 The list of the produced syntax trees. 1007 """ 1008 return self._parse( 1009 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1010 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1012 def parse_into( 1013 self, 1014 expression_types: exp.IntoType, 1015 raw_tokens: t.List[Token], 1016 sql: t.Optional[str] = None, 1017 ) -> t.List[t.Optional[exp.Expression]]: 1018 """ 1019 Parses a list of tokens into a given Expression type. If a collection of Expression 1020 types is given instead, this method will try to parse the token list into each one 1021 of them, stopping at the first for which the parsing succeeds. 1022 1023 Args: 1024 expression_types: The expression type(s) to try and parse the token list into. 1025 raw_tokens: The list of tokens. 1026 sql: The original SQL string, used to produce helpful debug messages. 1027 1028 Returns: 1029 The target Expression. 1030 """ 1031 errors = [] 1032 for expression_type in ensure_list(expression_types): 1033 parser = self.EXPRESSION_PARSERS.get(expression_type) 1034 if not parser: 1035 raise TypeError(f"No parser registered for {expression_type}") 1036 1037 try: 1038 return self._parse(parser, raw_tokens, sql) 1039 except ParseError as e: 1040 e.errors[0]["into_expression"] = expression_type 1041 errors.append(e) 1042 1043 raise ParseError( 1044 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1045 errors=merge_errors(errors), 1046 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1083 def check_errors(self) -> None: 1084 """Logs or raises any found errors, depending on the chosen error level setting.""" 1085 if self.error_level == ErrorLevel.WARN: 1086 for error in self.errors: 1087 logger.error(str(error)) 1088 elif self.error_level == ErrorLevel.RAISE and self.errors: 1089 raise ParseError( 1090 concat_messages(self.errors, self.max_errors), 1091 errors=merge_errors(self.errors), 1092 )
Logs or raises any found errors, depending on the chosen error level setting.
1094 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1095 """ 1096 Appends an error in the list of recorded errors or raises it, depending on the chosen 1097 error level setting. 1098 """ 1099 token = token or self._curr or self._prev or Token.string("") 1100 start = token.start 1101 end = token.end + 1 1102 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1103 highlight = self.sql[start:end] 1104 end_context = self.sql[end : end + self.error_message_context] 1105 1106 error = ParseError.new( 1107 f"{message}. Line {token.line}, Col: {token.col}.\n" 1108 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1109 description=message, 1110 line=token.line, 1111 col=token.col, 1112 start_context=start_context, 1113 highlight=highlight, 1114 end_context=end_context, 1115 ) 1116 1117 if self.error_level == ErrorLevel.IMMEDIATE: 1118 raise error 1119 1120 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1122 def expression( 1123 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1124 ) -> E: 1125 """ 1126 Creates a new, validated Expression. 1127 1128 Args: 1129 exp_class: The expression class to instantiate. 1130 comments: An optional list of comments to attach to the expression. 1131 kwargs: The arguments to set for the expression along with their respective values. 1132 1133 Returns: 1134 The target expression. 1135 """ 1136 instance = exp_class(**kwargs) 1137 instance.add_comments(comments) if comments else self._add_comments(instance) 1138 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1145 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1146 """ 1147 Validates an Expression, making sure that all its mandatory arguments are set. 1148 1149 Args: 1150 expression: The expression to validate. 1151 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1152 1153 Returns: 1154 The validated expression. 1155 """ 1156 if self.error_level != ErrorLevel.IGNORE: 1157 for error_message in expression.error_messages(args): 1158 self.raise_error(error_message) 1159 1160 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.