sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.CONSTRAINT, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 *DB_CREATABLES, 253 } 254 255 # Tokens that can represent identifiers 256 ID_VAR_TOKENS = { 257 TokenType.VAR, 258 TokenType.ANTI, 259 TokenType.APPLY, 260 TokenType.ASC, 261 TokenType.AUTO_INCREMENT, 262 TokenType.BEGIN, 263 TokenType.CACHE, 264 TokenType.CASE, 265 TokenType.COLLATE, 266 TokenType.COMMAND, 267 TokenType.COMMENT, 268 TokenType.COMMIT, 269 TokenType.CONSTRAINT, 270 TokenType.DEFAULT, 271 TokenType.DELETE, 272 TokenType.DESC, 273 TokenType.DESCRIBE, 274 TokenType.DICTIONARY, 275 TokenType.DIV, 276 TokenType.END, 277 TokenType.EXECUTE, 278 TokenType.ESCAPE, 279 TokenType.FALSE, 280 TokenType.FIRST, 281 TokenType.FILTER, 282 TokenType.FORMAT, 283 TokenType.FULL, 284 TokenType.IS, 285 TokenType.ISNULL, 286 TokenType.INTERVAL, 287 TokenType.KEEP, 288 TokenType.KILL, 289 TokenType.LEFT, 290 TokenType.LOAD, 291 TokenType.MERGE, 292 TokenType.NATURAL, 293 TokenType.NEXT, 294 TokenType.OFFSET, 295 TokenType.ORDINALITY, 296 TokenType.OVERLAPS, 297 TokenType.OVERWRITE, 298 TokenType.PARTITION, 299 TokenType.PERCENT, 300 TokenType.PIVOT, 301 TokenType.PRAGMA, 302 TokenType.RANGE, 303 TokenType.RECURSIVE, 304 TokenType.REFERENCES, 305 TokenType.REFRESH, 306 TokenType.RIGHT, 307 TokenType.ROW, 308 TokenType.ROWS, 309 TokenType.SEMI, 310 TokenType.SET, 311 TokenType.SETTINGS, 312 TokenType.SHOW, 313 TokenType.TEMPORARY, 314 TokenType.TOP, 315 TokenType.TRUE, 316 TokenType.UNIQUE, 317 TokenType.UNPIVOT, 318 TokenType.UPDATE, 319 TokenType.USE, 320 TokenType.VOLATILE, 321 TokenType.WINDOW, 322 *CREATABLES, 323 *SUBQUERY_PREDICATES, 324 *TYPE_TOKENS, 325 *NO_PAREN_FUNCTIONS, 326 } 327 328 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 329 330 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 331 TokenType.ANTI, 332 TokenType.APPLY, 333 TokenType.ASOF, 334 TokenType.FULL, 335 TokenType.LEFT, 336 TokenType.LOCK, 337 TokenType.NATURAL, 338 TokenType.OFFSET, 339 TokenType.RIGHT, 340 TokenType.SEMI, 341 TokenType.WINDOW, 342 } 343 344 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 345 346 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 347 348 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 349 350 FUNC_TOKENS = { 351 TokenType.COLLATE, 352 TokenType.COMMAND, 353 TokenType.CURRENT_DATE, 354 TokenType.CURRENT_DATETIME, 355 TokenType.CURRENT_TIMESTAMP, 356 TokenType.CURRENT_TIME, 357 TokenType.CURRENT_USER, 358 TokenType.FILTER, 359 TokenType.FIRST, 360 TokenType.FORMAT, 361 TokenType.GLOB, 362 TokenType.IDENTIFIER, 363 TokenType.INDEX, 364 TokenType.ISNULL, 365 TokenType.ILIKE, 366 TokenType.INSERT, 367 TokenType.LIKE, 368 TokenType.MERGE, 369 TokenType.OFFSET, 370 TokenType.PRIMARY_KEY, 371 TokenType.RANGE, 372 TokenType.REPLACE, 373 TokenType.RLIKE, 374 TokenType.ROW, 375 TokenType.UNNEST, 376 TokenType.VAR, 377 TokenType.LEFT, 378 TokenType.RIGHT, 379 TokenType.DATE, 380 TokenType.DATETIME, 381 TokenType.TABLE, 382 TokenType.TIMESTAMP, 383 TokenType.TIMESTAMPTZ, 384 TokenType.WINDOW, 385 TokenType.XOR, 386 *TYPE_TOKENS, 387 *SUBQUERY_PREDICATES, 388 } 389 390 CONJUNCTION = { 391 TokenType.AND: exp.And, 392 TokenType.OR: exp.Or, 393 } 394 395 EQUALITY = { 396 TokenType.COLON_EQ: exp.PropertyEQ, 397 TokenType.EQ: exp.EQ, 398 TokenType.NEQ: exp.NEQ, 399 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 400 } 401 402 COMPARISON = { 403 TokenType.GT: exp.GT, 404 TokenType.GTE: exp.GTE, 405 TokenType.LT: exp.LT, 406 TokenType.LTE: exp.LTE, 407 } 408 409 BITWISE = { 410 TokenType.AMP: exp.BitwiseAnd, 411 TokenType.CARET: exp.BitwiseXor, 412 TokenType.PIPE: exp.BitwiseOr, 413 TokenType.DPIPE: exp.DPipe, 414 } 415 416 TERM = { 417 TokenType.DASH: exp.Sub, 418 TokenType.PLUS: exp.Add, 419 TokenType.MOD: exp.Mod, 420 TokenType.COLLATE: exp.Collate, 421 } 422 423 FACTOR = { 424 TokenType.DIV: exp.IntDiv, 425 TokenType.LR_ARROW: exp.Distance, 426 TokenType.SLASH: exp.Div, 427 TokenType.STAR: exp.Mul, 428 } 429 430 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 431 432 TIMES = { 433 TokenType.TIME, 434 TokenType.TIMETZ, 435 } 436 437 TIMESTAMPS = { 438 TokenType.TIMESTAMP, 439 TokenType.TIMESTAMPTZ, 440 TokenType.TIMESTAMPLTZ, 441 *TIMES, 442 } 443 444 SET_OPERATIONS = { 445 TokenType.UNION, 446 TokenType.INTERSECT, 447 TokenType.EXCEPT, 448 } 449 450 JOIN_METHODS = { 451 TokenType.NATURAL, 452 TokenType.ASOF, 453 } 454 455 JOIN_SIDES = { 456 TokenType.LEFT, 457 TokenType.RIGHT, 458 TokenType.FULL, 459 } 460 461 JOIN_KINDS = { 462 TokenType.INNER, 463 TokenType.OUTER, 464 TokenType.CROSS, 465 TokenType.SEMI, 466 TokenType.ANTI, 467 } 468 469 JOIN_HINTS: t.Set[str] = set() 470 471 LAMBDAS = { 472 TokenType.ARROW: lambda self, expressions: self.expression( 473 exp.Lambda, 474 this=self._replace_lambda( 475 self._parse_conjunction(), 476 {node.name for node in expressions}, 477 ), 478 expressions=expressions, 479 ), 480 TokenType.FARROW: lambda self, expressions: self.expression( 481 exp.Kwarg, 482 this=exp.var(expressions[0].name), 483 expression=self._parse_conjunction(), 484 ), 485 } 486 487 COLUMN_OPERATORS = { 488 TokenType.DOT: None, 489 TokenType.DCOLON: lambda self, this, to: self.expression( 490 exp.Cast if self.STRICT_CAST else exp.TryCast, 491 this=this, 492 to=to, 493 ), 494 TokenType.ARROW: lambda self, this, path: self.expression( 495 exp.JSONExtract, 496 this=this, 497 expression=path, 498 ), 499 TokenType.DARROW: lambda self, this, path: self.expression( 500 exp.JSONExtractScalar, 501 this=this, 502 expression=path, 503 ), 504 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 505 exp.JSONBExtract, 506 this=this, 507 expression=path, 508 ), 509 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 510 exp.JSONBExtractScalar, 511 this=this, 512 expression=path, 513 ), 514 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 515 exp.JSONBContains, 516 this=this, 517 expression=key, 518 ), 519 } 520 521 EXPRESSION_PARSERS = { 522 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 523 exp.Column: lambda self: self._parse_column(), 524 exp.Condition: lambda self: self._parse_conjunction(), 525 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 526 exp.Expression: lambda self: self._parse_statement(), 527 exp.From: lambda self: self._parse_from(), 528 exp.Group: lambda self: self._parse_group(), 529 exp.Having: lambda self: self._parse_having(), 530 exp.Identifier: lambda self: self._parse_id_var(), 531 exp.Join: lambda self: self._parse_join(), 532 exp.Lambda: lambda self: self._parse_lambda(), 533 exp.Lateral: lambda self: self._parse_lateral(), 534 exp.Limit: lambda self: self._parse_limit(), 535 exp.Offset: lambda self: self._parse_offset(), 536 exp.Order: lambda self: self._parse_order(), 537 exp.Ordered: lambda self: self._parse_ordered(), 538 exp.Properties: lambda self: self._parse_properties(), 539 exp.Qualify: lambda self: self._parse_qualify(), 540 exp.Returning: lambda self: self._parse_returning(), 541 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 542 exp.Table: lambda self: self._parse_table_parts(), 543 exp.TableAlias: lambda self: self._parse_table_alias(), 544 exp.Where: lambda self: self._parse_where(), 545 exp.Window: lambda self: self._parse_named_window(), 546 exp.With: lambda self: self._parse_with(), 547 "JOIN_TYPE": lambda self: self._parse_join_parts(), 548 } 549 550 STATEMENT_PARSERS = { 551 TokenType.ALTER: lambda self: self._parse_alter(), 552 TokenType.BEGIN: lambda self: self._parse_transaction(), 553 TokenType.CACHE: lambda self: self._parse_cache(), 554 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 555 TokenType.COMMENT: lambda self: self._parse_comment(), 556 TokenType.CREATE: lambda self: self._parse_create(), 557 TokenType.DELETE: lambda self: self._parse_delete(), 558 TokenType.DESC: lambda self: self._parse_describe(), 559 TokenType.DESCRIBE: lambda self: self._parse_describe(), 560 TokenType.DROP: lambda self: self._parse_drop(), 561 TokenType.INSERT: lambda self: self._parse_insert(), 562 TokenType.KILL: lambda self: self._parse_kill(), 563 TokenType.LOAD: lambda self: self._parse_load(), 564 TokenType.MERGE: lambda self: self._parse_merge(), 565 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 566 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 567 TokenType.REFRESH: lambda self: self._parse_refresh(), 568 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 569 TokenType.SET: lambda self: self._parse_set(), 570 TokenType.UNCACHE: lambda self: self._parse_uncache(), 571 TokenType.UPDATE: lambda self: self._parse_update(), 572 TokenType.USE: lambda self: self.expression( 573 exp.Use, 574 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 575 and exp.var(self._prev.text), 576 this=self._parse_table(schema=False), 577 ), 578 } 579 580 UNARY_PARSERS = { 581 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 582 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 583 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 584 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 585 } 586 587 PRIMARY_PARSERS = { 588 TokenType.STRING: lambda self, token: self.expression( 589 exp.Literal, this=token.text, is_string=True 590 ), 591 TokenType.NUMBER: lambda self, token: self.expression( 592 exp.Literal, this=token.text, is_string=False 593 ), 594 TokenType.STAR: lambda self, _: self.expression( 595 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 596 ), 597 TokenType.NULL: lambda self, _: self.expression(exp.Null), 598 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 599 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 600 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 601 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 602 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 603 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 604 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 605 exp.National, this=token.text 606 ), 607 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 608 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 609 exp.RawString, this=token.text 610 ), 611 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 612 } 613 614 PLACEHOLDER_PARSERS = { 615 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 616 TokenType.PARAMETER: lambda self: self._parse_parameter(), 617 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 618 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 619 else None, 620 } 621 622 RANGE_PARSERS = { 623 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 624 TokenType.GLOB: binary_range_parser(exp.Glob), 625 TokenType.ILIKE: binary_range_parser(exp.ILike), 626 TokenType.IN: lambda self, this: self._parse_in(this), 627 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 628 TokenType.IS: lambda self, this: self._parse_is(this), 629 TokenType.LIKE: binary_range_parser(exp.Like), 630 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 631 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 632 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 633 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 634 } 635 636 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 637 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 638 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 639 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 640 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 641 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 642 "CHECKSUM": lambda self: self._parse_checksum(), 643 "CLUSTER BY": lambda self: self._parse_cluster(), 644 "CLUSTERED": lambda self: self._parse_clustered_by(), 645 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 646 exp.CollateProperty, **kwargs 647 ), 648 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 649 "COPY": lambda self: self._parse_copy_property(), 650 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 651 "DEFINER": lambda self: self._parse_definer(), 652 "DETERMINISTIC": lambda self: self.expression( 653 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 654 ), 655 "DISTKEY": lambda self: self._parse_distkey(), 656 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 657 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 658 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 659 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 660 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 661 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 662 "FREESPACE": lambda self: self._parse_freespace(), 663 "HEAP": lambda self: self.expression(exp.HeapProperty), 664 "IMMUTABLE": lambda self: self.expression( 665 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 666 ), 667 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 668 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 669 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 670 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 671 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 672 "LIKE": lambda self: self._parse_create_like(), 673 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 674 "LOCK": lambda self: self._parse_locking(), 675 "LOCKING": lambda self: self._parse_locking(), 676 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 677 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 678 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 679 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 680 "NO": lambda self: self._parse_no_property(), 681 "ON": lambda self: self._parse_on_property(), 682 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 683 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 684 "PARTITION": lambda self: self._parse_partitioned_of(), 685 "PARTITION BY": lambda self: self._parse_partitioned_by(), 686 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 687 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 688 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 689 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 690 "REMOTE": lambda self: self._parse_remote_with_connection(), 691 "RETURNS": lambda self: self._parse_returns(), 692 "ROW": lambda self: self._parse_row(), 693 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 694 "SAMPLE": lambda self: self.expression( 695 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 696 ), 697 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 698 "SETTINGS": lambda self: self.expression( 699 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 700 ), 701 "SORTKEY": lambda self: self._parse_sortkey(), 702 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 703 "STABLE": lambda self: self.expression( 704 exp.StabilityProperty, this=exp.Literal.string("STABLE") 705 ), 706 "STORED": lambda self: self._parse_stored(), 707 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 708 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 709 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 710 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 711 "TO": lambda self: self._parse_to_table(), 712 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 713 "TRANSFORM": lambda self: self.expression( 714 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 715 ), 716 "TTL": lambda self: self._parse_ttl(), 717 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 718 "VOLATILE": lambda self: self._parse_volatile_property(), 719 "WITH": lambda self: self._parse_with_property(), 720 } 721 722 CONSTRAINT_PARSERS = { 723 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 724 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 725 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 726 "CHARACTER SET": lambda self: self.expression( 727 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 728 ), 729 "CHECK": lambda self: self.expression( 730 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 731 ), 732 "COLLATE": lambda self: self.expression( 733 exp.CollateColumnConstraint, this=self._parse_var() 734 ), 735 "COMMENT": lambda self: self.expression( 736 exp.CommentColumnConstraint, this=self._parse_string() 737 ), 738 "COMPRESS": lambda self: self._parse_compress(), 739 "CLUSTERED": lambda self: self.expression( 740 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 741 ), 742 "NONCLUSTERED": lambda self: self.expression( 743 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 744 ), 745 "DEFAULT": lambda self: self.expression( 746 exp.DefaultColumnConstraint, this=self._parse_bitwise() 747 ), 748 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 749 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 750 "FORMAT": lambda self: self.expression( 751 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 752 ), 753 "GENERATED": lambda self: self._parse_generated_as_identity(), 754 "IDENTITY": lambda self: self._parse_auto_increment(), 755 "INLINE": lambda self: self._parse_inline(), 756 "LIKE": lambda self: self._parse_create_like(), 757 "NOT": lambda self: self._parse_not_constraint(), 758 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 759 "ON": lambda self: ( 760 self._match(TokenType.UPDATE) 761 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 762 ) 763 or self.expression(exp.OnProperty, this=self._parse_id_var()), 764 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 765 "PERIOD": lambda self: self._parse_period_for_system_time(), 766 "PRIMARY KEY": lambda self: self._parse_primary_key(), 767 "REFERENCES": lambda self: self._parse_references(match=False), 768 "TITLE": lambda self: self.expression( 769 exp.TitleColumnConstraint, this=self._parse_var_or_string() 770 ), 771 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 772 "UNIQUE": lambda self: self._parse_unique(), 773 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 774 "WITH": lambda self: self.expression( 775 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 776 ), 777 } 778 779 ALTER_PARSERS = { 780 "ADD": lambda self: self._parse_alter_table_add(), 781 "ALTER": lambda self: self._parse_alter_table_alter(), 782 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 783 "DROP": lambda self: self._parse_alter_table_drop(), 784 "RENAME": lambda self: self._parse_alter_table_rename(), 785 } 786 787 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 788 789 NO_PAREN_FUNCTION_PARSERS = { 790 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 791 "CASE": lambda self: self._parse_case(), 792 "IF": lambda self: self._parse_if(), 793 "NEXT": lambda self: self._parse_next_value_for(), 794 } 795 796 INVALID_FUNC_NAME_TOKENS = { 797 TokenType.IDENTIFIER, 798 TokenType.STRING, 799 } 800 801 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 802 803 FUNCTION_PARSERS = { 804 "ANY_VALUE": lambda self: self._parse_any_value(), 805 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 806 "CONCAT": lambda self: self._parse_concat(), 807 "CONCAT_WS": lambda self: self._parse_concat_ws(), 808 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 809 "DECODE": lambda self: self._parse_decode(), 810 "EXTRACT": lambda self: self._parse_extract(), 811 "JSON_OBJECT": lambda self: self._parse_json_object(), 812 "JSON_TABLE": lambda self: self._parse_json_table(), 813 "LOG": lambda self: self._parse_logarithm(), 814 "MATCH": lambda self: self._parse_match_against(), 815 "OPENJSON": lambda self: self._parse_open_json(), 816 "POSITION": lambda self: self._parse_position(), 817 "PREDICT": lambda self: self._parse_predict(), 818 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 819 "STRING_AGG": lambda self: self._parse_string_agg(), 820 "SUBSTRING": lambda self: self._parse_substring(), 821 "TRIM": lambda self: self._parse_trim(), 822 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 823 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 824 } 825 826 QUERY_MODIFIER_PARSERS = { 827 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 828 TokenType.WHERE: lambda self: ("where", self._parse_where()), 829 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 830 TokenType.HAVING: lambda self: ("having", self._parse_having()), 831 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 832 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 833 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 834 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 835 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 836 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 837 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 838 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 839 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 840 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 841 TokenType.CLUSTER_BY: lambda self: ( 842 "cluster", 843 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 844 ), 845 TokenType.DISTRIBUTE_BY: lambda self: ( 846 "distribute", 847 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 848 ), 849 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 850 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 851 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 852 } 853 854 SET_PARSERS = { 855 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 856 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 857 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 858 "TRANSACTION": lambda self: self._parse_set_transaction(), 859 } 860 861 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 862 863 TYPE_LITERAL_PARSERS = { 864 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 865 } 866 867 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 868 869 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 870 871 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 872 873 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 874 TRANSACTION_CHARACTERISTICS = { 875 "ISOLATION LEVEL REPEATABLE READ", 876 "ISOLATION LEVEL READ COMMITTED", 877 "ISOLATION LEVEL READ UNCOMMITTED", 878 "ISOLATION LEVEL SERIALIZABLE", 879 "READ WRITE", 880 "READ ONLY", 881 } 882 883 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 884 885 CLONE_KEYWORDS = {"CLONE", "COPY"} 886 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 887 888 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 889 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 890 891 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 892 893 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 894 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 895 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 896 897 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 898 899 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 900 901 DISTINCT_TOKENS = {TokenType.DISTINCT} 902 903 NULL_TOKENS = {TokenType.NULL} 904 905 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 906 907 STRICT_CAST = True 908 909 # A NULL arg in CONCAT yields NULL by default 910 CONCAT_NULL_OUTPUTS_STRING = False 911 912 PREFIXED_PIVOT_COLUMNS = False 913 IDENTIFY_PIVOT_STRINGS = False 914 915 LOG_BASE_FIRST = True 916 LOG_DEFAULTS_TO_LN = False 917 918 # Whether or not ADD is present for each column added by ALTER TABLE 919 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 920 921 # Whether or not the table sample clause expects CSV syntax 922 TABLESAMPLE_CSV = False 923 924 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 925 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 926 927 # Whether the TRIM function expects the characters to trim as its first argument 928 TRIM_PATTERN_FIRST = False 929 930 # Whether the behavior of a / b depends on the types of a and b. 931 # False means a / b is always float division. 932 # True means a / b is integer division if both a and b are integers. 933 TYPED_DIVISION = False 934 935 # False means 1 / 0 throws an error. 936 # True means 1 / 0 returns null. 937 SAFE_DIVISION = False 938 939 __slots__ = ( 940 "error_level", 941 "error_message_context", 942 "max_errors", 943 "sql", 944 "errors", 945 "_tokens", 946 "_index", 947 "_curr", 948 "_next", 949 "_prev", 950 "_prev_comments", 951 "_tokenizer", 952 ) 953 954 # Autofilled 955 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 956 INDEX_OFFSET: int = 0 957 UNNEST_COLUMN_ONLY: bool = False 958 ALIAS_POST_TABLESAMPLE: bool = False 959 STRICT_STRING_CONCAT = False 960 SUPPORTS_USER_DEFINED_TYPES = True 961 NORMALIZE_FUNCTIONS = "upper" 962 NULL_ORDERING: str = "nulls_are_small" 963 SHOW_TRIE: t.Dict = {} 964 SET_TRIE: t.Dict = {} 965 FORMAT_MAPPING: t.Dict[str, str] = {} 966 FORMAT_TRIE: t.Dict = {} 967 TIME_MAPPING: t.Dict[str, str] = {} 968 TIME_TRIE: t.Dict = {} 969 970 def __init__( 971 self, 972 error_level: t.Optional[ErrorLevel] = None, 973 error_message_context: int = 100, 974 max_errors: int = 3, 975 ): 976 self.error_level = error_level or ErrorLevel.IMMEDIATE 977 self.error_message_context = error_message_context 978 self.max_errors = max_errors 979 self._tokenizer = self.TOKENIZER_CLASS() 980 self.reset() 981 982 def reset(self): 983 self.sql = "" 984 self.errors = [] 985 self._tokens = [] 986 self._index = 0 987 self._curr = None 988 self._next = None 989 self._prev = None 990 self._prev_comments = None 991 992 def parse( 993 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 994 ) -> t.List[t.Optional[exp.Expression]]: 995 """ 996 Parses a list of tokens and returns a list of syntax trees, one tree 997 per parsed SQL statement. 998 999 Args: 1000 raw_tokens: The list of tokens. 1001 sql: The original SQL string, used to produce helpful debug messages. 1002 1003 Returns: 1004 The list of the produced syntax trees. 1005 """ 1006 return self._parse( 1007 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1008 ) 1009 1010 def parse_into( 1011 self, 1012 expression_types: exp.IntoType, 1013 raw_tokens: t.List[Token], 1014 sql: t.Optional[str] = None, 1015 ) -> t.List[t.Optional[exp.Expression]]: 1016 """ 1017 Parses a list of tokens into a given Expression type. If a collection of Expression 1018 types is given instead, this method will try to parse the token list into each one 1019 of them, stopping at the first for which the parsing succeeds. 1020 1021 Args: 1022 expression_types: The expression type(s) to try and parse the token list into. 1023 raw_tokens: The list of tokens. 1024 sql: The original SQL string, used to produce helpful debug messages. 1025 1026 Returns: 1027 The target Expression. 1028 """ 1029 errors = [] 1030 for expression_type in ensure_list(expression_types): 1031 parser = self.EXPRESSION_PARSERS.get(expression_type) 1032 if not parser: 1033 raise TypeError(f"No parser registered for {expression_type}") 1034 1035 try: 1036 return self._parse(parser, raw_tokens, sql) 1037 except ParseError as e: 1038 e.errors[0]["into_expression"] = expression_type 1039 errors.append(e) 1040 1041 raise ParseError( 1042 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1043 errors=merge_errors(errors), 1044 ) from errors[-1] 1045 1046 def _parse( 1047 self, 1048 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1049 raw_tokens: t.List[Token], 1050 sql: t.Optional[str] = None, 1051 ) -> t.List[t.Optional[exp.Expression]]: 1052 self.reset() 1053 self.sql = sql or "" 1054 1055 total = len(raw_tokens) 1056 chunks: t.List[t.List[Token]] = [[]] 1057 1058 for i, token in enumerate(raw_tokens): 1059 if token.token_type == TokenType.SEMICOLON: 1060 if i < total - 1: 1061 chunks.append([]) 1062 else: 1063 chunks[-1].append(token) 1064 1065 expressions = [] 1066 1067 for tokens in chunks: 1068 self._index = -1 1069 self._tokens = tokens 1070 self._advance() 1071 1072 expressions.append(parse_method(self)) 1073 1074 if self._index < len(self._tokens): 1075 self.raise_error("Invalid expression / Unexpected token") 1076 1077 self.check_errors() 1078 1079 return expressions 1080 1081 def check_errors(self) -> None: 1082 """Logs or raises any found errors, depending on the chosen error level setting.""" 1083 if self.error_level == ErrorLevel.WARN: 1084 for error in self.errors: 1085 logger.error(str(error)) 1086 elif self.error_level == ErrorLevel.RAISE and self.errors: 1087 raise ParseError( 1088 concat_messages(self.errors, self.max_errors), 1089 errors=merge_errors(self.errors), 1090 ) 1091 1092 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1093 """ 1094 Appends an error in the list of recorded errors or raises it, depending on the chosen 1095 error level setting. 1096 """ 1097 token = token or self._curr or self._prev or Token.string("") 1098 start = token.start 1099 end = token.end + 1 1100 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1101 highlight = self.sql[start:end] 1102 end_context = self.sql[end : end + self.error_message_context] 1103 1104 error = ParseError.new( 1105 f"{message}. Line {token.line}, Col: {token.col}.\n" 1106 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1107 description=message, 1108 line=token.line, 1109 col=token.col, 1110 start_context=start_context, 1111 highlight=highlight, 1112 end_context=end_context, 1113 ) 1114 1115 if self.error_level == ErrorLevel.IMMEDIATE: 1116 raise error 1117 1118 self.errors.append(error) 1119 1120 def expression( 1121 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1122 ) -> E: 1123 """ 1124 Creates a new, validated Expression. 1125 1126 Args: 1127 exp_class: The expression class to instantiate. 1128 comments: An optional list of comments to attach to the expression. 1129 kwargs: The arguments to set for the expression along with their respective values. 1130 1131 Returns: 1132 The target expression. 1133 """ 1134 instance = exp_class(**kwargs) 1135 instance.add_comments(comments) if comments else self._add_comments(instance) 1136 return self.validate_expression(instance) 1137 1138 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1139 if expression and self._prev_comments: 1140 expression.add_comments(self._prev_comments) 1141 self._prev_comments = None 1142 1143 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1144 """ 1145 Validates an Expression, making sure that all its mandatory arguments are set. 1146 1147 Args: 1148 expression: The expression to validate. 1149 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1150 1151 Returns: 1152 The validated expression. 1153 """ 1154 if self.error_level != ErrorLevel.IGNORE: 1155 for error_message in expression.error_messages(args): 1156 self.raise_error(error_message) 1157 1158 return expression 1159 1160 def _find_sql(self, start: Token, end: Token) -> str: 1161 return self.sql[start.start : end.end + 1] 1162 1163 def _advance(self, times: int = 1) -> None: 1164 self._index += times 1165 self._curr = seq_get(self._tokens, self._index) 1166 self._next = seq_get(self._tokens, self._index + 1) 1167 1168 if self._index > 0: 1169 self._prev = self._tokens[self._index - 1] 1170 self._prev_comments = self._prev.comments 1171 else: 1172 self._prev = None 1173 self._prev_comments = None 1174 1175 def _retreat(self, index: int) -> None: 1176 if index != self._index: 1177 self._advance(index - self._index) 1178 1179 def _parse_command(self) -> exp.Command: 1180 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1181 1182 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1183 start = self._prev 1184 exists = self._parse_exists() if allow_exists else None 1185 1186 self._match(TokenType.ON) 1187 1188 kind = self._match_set(self.CREATABLES) and self._prev 1189 if not kind: 1190 return self._parse_as_command(start) 1191 1192 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1193 this = self._parse_user_defined_function(kind=kind.token_type) 1194 elif kind.token_type == TokenType.TABLE: 1195 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1196 elif kind.token_type == TokenType.COLUMN: 1197 this = self._parse_column() 1198 else: 1199 this = self._parse_id_var() 1200 1201 self._match(TokenType.IS) 1202 1203 return self.expression( 1204 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1205 ) 1206 1207 def _parse_to_table( 1208 self, 1209 ) -> exp.ToTableProperty: 1210 table = self._parse_table_parts(schema=True) 1211 return self.expression(exp.ToTableProperty, this=table) 1212 1213 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1214 def _parse_ttl(self) -> exp.Expression: 1215 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1216 this = self._parse_bitwise() 1217 1218 if self._match_text_seq("DELETE"): 1219 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1220 if self._match_text_seq("RECOMPRESS"): 1221 return self.expression( 1222 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1223 ) 1224 if self._match_text_seq("TO", "DISK"): 1225 return self.expression( 1226 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1227 ) 1228 if self._match_text_seq("TO", "VOLUME"): 1229 return self.expression( 1230 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1231 ) 1232 1233 return this 1234 1235 expressions = self._parse_csv(_parse_ttl_action) 1236 where = self._parse_where() 1237 group = self._parse_group() 1238 1239 aggregates = None 1240 if group and self._match(TokenType.SET): 1241 aggregates = self._parse_csv(self._parse_set_item) 1242 1243 return self.expression( 1244 exp.MergeTreeTTL, 1245 expressions=expressions, 1246 where=where, 1247 group=group, 1248 aggregates=aggregates, 1249 ) 1250 1251 def _parse_statement(self) -> t.Optional[exp.Expression]: 1252 if self._curr is None: 1253 return None 1254 1255 if self._match_set(self.STATEMENT_PARSERS): 1256 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1257 1258 if self._match_set(Tokenizer.COMMANDS): 1259 return self._parse_command() 1260 1261 expression = self._parse_expression() 1262 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1263 return self._parse_query_modifiers(expression) 1264 1265 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1266 start = self._prev 1267 temporary = self._match(TokenType.TEMPORARY) 1268 materialized = self._match_text_seq("MATERIALIZED") 1269 1270 kind = self._match_set(self.CREATABLES) and self._prev.text 1271 if not kind: 1272 return self._parse_as_command(start) 1273 1274 return self.expression( 1275 exp.Drop, 1276 comments=start.comments, 1277 exists=exists or self._parse_exists(), 1278 this=self._parse_table(schema=True), 1279 kind=kind, 1280 temporary=temporary, 1281 materialized=materialized, 1282 cascade=self._match_text_seq("CASCADE"), 1283 constraints=self._match_text_seq("CONSTRAINTS"), 1284 purge=self._match_text_seq("PURGE"), 1285 ) 1286 1287 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1288 return ( 1289 self._match_text_seq("IF") 1290 and (not not_ or self._match(TokenType.NOT)) 1291 and self._match(TokenType.EXISTS) 1292 ) 1293 1294 def _parse_create(self) -> exp.Create | exp.Command: 1295 # Note: this can't be None because we've matched a statement parser 1296 start = self._prev 1297 comments = self._prev_comments 1298 1299 replace = start.text.upper() == "REPLACE" or self._match_pair( 1300 TokenType.OR, TokenType.REPLACE 1301 ) 1302 unique = self._match(TokenType.UNIQUE) 1303 1304 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1305 self._advance() 1306 1307 properties = None 1308 create_token = self._match_set(self.CREATABLES) and self._prev 1309 1310 if not create_token: 1311 # exp.Properties.Location.POST_CREATE 1312 properties = self._parse_properties() 1313 create_token = self._match_set(self.CREATABLES) and self._prev 1314 1315 if not properties or not create_token: 1316 return self._parse_as_command(start) 1317 1318 exists = self._parse_exists(not_=True) 1319 this = None 1320 expression: t.Optional[exp.Expression] = None 1321 indexes = None 1322 no_schema_binding = None 1323 begin = None 1324 end = None 1325 clone = None 1326 1327 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1328 nonlocal properties 1329 if properties and temp_props: 1330 properties.expressions.extend(temp_props.expressions) 1331 elif temp_props: 1332 properties = temp_props 1333 1334 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1335 this = self._parse_user_defined_function(kind=create_token.token_type) 1336 1337 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1338 extend_props(self._parse_properties()) 1339 1340 self._match(TokenType.ALIAS) 1341 1342 if self._match(TokenType.COMMAND): 1343 expression = self._parse_as_command(self._prev) 1344 else: 1345 begin = self._match(TokenType.BEGIN) 1346 return_ = self._match_text_seq("RETURN") 1347 1348 if self._match(TokenType.STRING, advance=False): 1349 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1350 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1351 expression = self._parse_string() 1352 extend_props(self._parse_properties()) 1353 else: 1354 expression = self._parse_statement() 1355 1356 end = self._match_text_seq("END") 1357 1358 if return_: 1359 expression = self.expression(exp.Return, this=expression) 1360 elif create_token.token_type == TokenType.INDEX: 1361 this = self._parse_index(index=self._parse_id_var()) 1362 elif create_token.token_type in self.DB_CREATABLES: 1363 table_parts = self._parse_table_parts(schema=True) 1364 1365 # exp.Properties.Location.POST_NAME 1366 self._match(TokenType.COMMA) 1367 extend_props(self._parse_properties(before=True)) 1368 1369 this = self._parse_schema(this=table_parts) 1370 1371 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1372 extend_props(self._parse_properties()) 1373 1374 self._match(TokenType.ALIAS) 1375 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1376 # exp.Properties.Location.POST_ALIAS 1377 extend_props(self._parse_properties()) 1378 1379 expression = self._parse_ddl_select() 1380 1381 if create_token.token_type == TokenType.TABLE: 1382 # exp.Properties.Location.POST_EXPRESSION 1383 extend_props(self._parse_properties()) 1384 1385 indexes = [] 1386 while True: 1387 index = self._parse_index() 1388 1389 # exp.Properties.Location.POST_INDEX 1390 extend_props(self._parse_properties()) 1391 1392 if not index: 1393 break 1394 else: 1395 self._match(TokenType.COMMA) 1396 indexes.append(index) 1397 elif create_token.token_type == TokenType.VIEW: 1398 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1399 no_schema_binding = True 1400 1401 shallow = self._match_text_seq("SHALLOW") 1402 1403 if self._match_texts(self.CLONE_KEYWORDS): 1404 copy = self._prev.text.lower() == "copy" 1405 clone = self._parse_table(schema=True) 1406 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1407 clone_kind = ( 1408 self._match(TokenType.L_PAREN) 1409 and self._match_texts(self.CLONE_KINDS) 1410 and self._prev.text.upper() 1411 ) 1412 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1413 self._match(TokenType.R_PAREN) 1414 clone = self.expression( 1415 exp.Clone, 1416 this=clone, 1417 when=when, 1418 kind=clone_kind, 1419 shallow=shallow, 1420 expression=clone_expression, 1421 copy=copy, 1422 ) 1423 1424 return self.expression( 1425 exp.Create, 1426 comments=comments, 1427 this=this, 1428 kind=create_token.text, 1429 replace=replace, 1430 unique=unique, 1431 expression=expression, 1432 exists=exists, 1433 properties=properties, 1434 indexes=indexes, 1435 no_schema_binding=no_schema_binding, 1436 begin=begin, 1437 end=end, 1438 clone=clone, 1439 ) 1440 1441 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1442 # only used for teradata currently 1443 self._match(TokenType.COMMA) 1444 1445 kwargs = { 1446 "no": self._match_text_seq("NO"), 1447 "dual": self._match_text_seq("DUAL"), 1448 "before": self._match_text_seq("BEFORE"), 1449 "default": self._match_text_seq("DEFAULT"), 1450 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1451 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1452 "after": self._match_text_seq("AFTER"), 1453 "minimum": self._match_texts(("MIN", "MINIMUM")), 1454 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1455 } 1456 1457 if self._match_texts(self.PROPERTY_PARSERS): 1458 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1459 try: 1460 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1461 except TypeError: 1462 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1463 1464 return None 1465 1466 def _parse_property(self) -> t.Optional[exp.Expression]: 1467 if self._match_texts(self.PROPERTY_PARSERS): 1468 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1469 1470 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1471 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1472 1473 if self._match_text_seq("COMPOUND", "SORTKEY"): 1474 return self._parse_sortkey(compound=True) 1475 1476 if self._match_text_seq("SQL", "SECURITY"): 1477 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1478 1479 index = self._index 1480 key = self._parse_column() 1481 1482 if not self._match(TokenType.EQ): 1483 self._retreat(index) 1484 return None 1485 1486 return self.expression( 1487 exp.Property, 1488 this=key.to_dot() if isinstance(key, exp.Column) else key, 1489 value=self._parse_column() or self._parse_var(any_token=True), 1490 ) 1491 1492 def _parse_stored(self) -> exp.FileFormatProperty: 1493 self._match(TokenType.ALIAS) 1494 1495 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1496 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1497 1498 return self.expression( 1499 exp.FileFormatProperty, 1500 this=self.expression( 1501 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1502 ) 1503 if input_format or output_format 1504 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1505 ) 1506 1507 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1508 self._match(TokenType.EQ) 1509 self._match(TokenType.ALIAS) 1510 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1511 1512 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1513 properties = [] 1514 while True: 1515 if before: 1516 prop = self._parse_property_before() 1517 else: 1518 prop = self._parse_property() 1519 1520 if not prop: 1521 break 1522 for p in ensure_list(prop): 1523 properties.append(p) 1524 1525 if properties: 1526 return self.expression(exp.Properties, expressions=properties) 1527 1528 return None 1529 1530 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1531 return self.expression( 1532 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1533 ) 1534 1535 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1536 if self._index >= 2: 1537 pre_volatile_token = self._tokens[self._index - 2] 1538 else: 1539 pre_volatile_token = None 1540 1541 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1542 return exp.VolatileProperty() 1543 1544 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1545 1546 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1547 self._match_pair(TokenType.EQ, TokenType.ON) 1548 1549 prop = self.expression(exp.WithSystemVersioningProperty) 1550 if self._match(TokenType.L_PAREN): 1551 self._match_text_seq("HISTORY_TABLE", "=") 1552 prop.set("this", self._parse_table_parts()) 1553 1554 if self._match(TokenType.COMMA): 1555 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1556 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1557 1558 self._match_r_paren() 1559 1560 return prop 1561 1562 def _parse_with_property( 1563 self, 1564 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1565 if self._match(TokenType.L_PAREN, advance=False): 1566 return self._parse_wrapped_csv(self._parse_property) 1567 1568 if self._match_text_seq("JOURNAL"): 1569 return self._parse_withjournaltable() 1570 1571 if self._match_text_seq("DATA"): 1572 return self._parse_withdata(no=False) 1573 elif self._match_text_seq("NO", "DATA"): 1574 return self._parse_withdata(no=True) 1575 1576 if not self._next: 1577 return None 1578 1579 return self._parse_withisolatedloading() 1580 1581 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1582 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1583 self._match(TokenType.EQ) 1584 1585 user = self._parse_id_var() 1586 self._match(TokenType.PARAMETER) 1587 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1588 1589 if not user or not host: 1590 return None 1591 1592 return exp.DefinerProperty(this=f"{user}@{host}") 1593 1594 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1595 self._match(TokenType.TABLE) 1596 self._match(TokenType.EQ) 1597 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1598 1599 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1600 return self.expression(exp.LogProperty, no=no) 1601 1602 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1603 return self.expression(exp.JournalProperty, **kwargs) 1604 1605 def _parse_checksum(self) -> exp.ChecksumProperty: 1606 self._match(TokenType.EQ) 1607 1608 on = None 1609 if self._match(TokenType.ON): 1610 on = True 1611 elif self._match_text_seq("OFF"): 1612 on = False 1613 1614 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1615 1616 def _parse_cluster(self) -> exp.Cluster: 1617 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1618 1619 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1620 self._match_text_seq("BY") 1621 1622 self._match_l_paren() 1623 expressions = self._parse_csv(self._parse_column) 1624 self._match_r_paren() 1625 1626 if self._match_text_seq("SORTED", "BY"): 1627 self._match_l_paren() 1628 sorted_by = self._parse_csv(self._parse_ordered) 1629 self._match_r_paren() 1630 else: 1631 sorted_by = None 1632 1633 self._match(TokenType.INTO) 1634 buckets = self._parse_number() 1635 self._match_text_seq("BUCKETS") 1636 1637 return self.expression( 1638 exp.ClusteredByProperty, 1639 expressions=expressions, 1640 sorted_by=sorted_by, 1641 buckets=buckets, 1642 ) 1643 1644 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1645 if not self._match_text_seq("GRANTS"): 1646 self._retreat(self._index - 1) 1647 return None 1648 1649 return self.expression(exp.CopyGrantsProperty) 1650 1651 def _parse_freespace(self) -> exp.FreespaceProperty: 1652 self._match(TokenType.EQ) 1653 return self.expression( 1654 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1655 ) 1656 1657 def _parse_mergeblockratio( 1658 self, no: bool = False, default: bool = False 1659 ) -> exp.MergeBlockRatioProperty: 1660 if self._match(TokenType.EQ): 1661 return self.expression( 1662 exp.MergeBlockRatioProperty, 1663 this=self._parse_number(), 1664 percent=self._match(TokenType.PERCENT), 1665 ) 1666 1667 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1668 1669 def _parse_datablocksize( 1670 self, 1671 default: t.Optional[bool] = None, 1672 minimum: t.Optional[bool] = None, 1673 maximum: t.Optional[bool] = None, 1674 ) -> exp.DataBlocksizeProperty: 1675 self._match(TokenType.EQ) 1676 size = self._parse_number() 1677 1678 units = None 1679 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1680 units = self._prev.text 1681 1682 return self.expression( 1683 exp.DataBlocksizeProperty, 1684 size=size, 1685 units=units, 1686 default=default, 1687 minimum=minimum, 1688 maximum=maximum, 1689 ) 1690 1691 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1692 self._match(TokenType.EQ) 1693 always = self._match_text_seq("ALWAYS") 1694 manual = self._match_text_seq("MANUAL") 1695 never = self._match_text_seq("NEVER") 1696 default = self._match_text_seq("DEFAULT") 1697 1698 autotemp = None 1699 if self._match_text_seq("AUTOTEMP"): 1700 autotemp = self._parse_schema() 1701 1702 return self.expression( 1703 exp.BlockCompressionProperty, 1704 always=always, 1705 manual=manual, 1706 never=never, 1707 default=default, 1708 autotemp=autotemp, 1709 ) 1710 1711 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1712 no = self._match_text_seq("NO") 1713 concurrent = self._match_text_seq("CONCURRENT") 1714 self._match_text_seq("ISOLATED", "LOADING") 1715 for_all = self._match_text_seq("FOR", "ALL") 1716 for_insert = self._match_text_seq("FOR", "INSERT") 1717 for_none = self._match_text_seq("FOR", "NONE") 1718 return self.expression( 1719 exp.IsolatedLoadingProperty, 1720 no=no, 1721 concurrent=concurrent, 1722 for_all=for_all, 1723 for_insert=for_insert, 1724 for_none=for_none, 1725 ) 1726 1727 def _parse_locking(self) -> exp.LockingProperty: 1728 if self._match(TokenType.TABLE): 1729 kind = "TABLE" 1730 elif self._match(TokenType.VIEW): 1731 kind = "VIEW" 1732 elif self._match(TokenType.ROW): 1733 kind = "ROW" 1734 elif self._match_text_seq("DATABASE"): 1735 kind = "DATABASE" 1736 else: 1737 kind = None 1738 1739 if kind in ("DATABASE", "TABLE", "VIEW"): 1740 this = self._parse_table_parts() 1741 else: 1742 this = None 1743 1744 if self._match(TokenType.FOR): 1745 for_or_in = "FOR" 1746 elif self._match(TokenType.IN): 1747 for_or_in = "IN" 1748 else: 1749 for_or_in = None 1750 1751 if self._match_text_seq("ACCESS"): 1752 lock_type = "ACCESS" 1753 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1754 lock_type = "EXCLUSIVE" 1755 elif self._match_text_seq("SHARE"): 1756 lock_type = "SHARE" 1757 elif self._match_text_seq("READ"): 1758 lock_type = "READ" 1759 elif self._match_text_seq("WRITE"): 1760 lock_type = "WRITE" 1761 elif self._match_text_seq("CHECKSUM"): 1762 lock_type = "CHECKSUM" 1763 else: 1764 lock_type = None 1765 1766 override = self._match_text_seq("OVERRIDE") 1767 1768 return self.expression( 1769 exp.LockingProperty, 1770 this=this, 1771 kind=kind, 1772 for_or_in=for_or_in, 1773 lock_type=lock_type, 1774 override=override, 1775 ) 1776 1777 def _parse_partition_by(self) -> t.List[exp.Expression]: 1778 if self._match(TokenType.PARTITION_BY): 1779 return self._parse_csv(self._parse_conjunction) 1780 return [] 1781 1782 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1783 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1784 if self._match_text_seq("MINVALUE"): 1785 return exp.var("MINVALUE") 1786 if self._match_text_seq("MAXVALUE"): 1787 return exp.var("MAXVALUE") 1788 return self._parse_bitwise() 1789 1790 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1791 expression = None 1792 from_expressions = None 1793 to_expressions = None 1794 1795 if self._match(TokenType.IN): 1796 this = self._parse_wrapped_csv(self._parse_bitwise) 1797 elif self._match(TokenType.FROM): 1798 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1799 self._match_text_seq("TO") 1800 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1801 elif self._match_text_seq("WITH", "(", "MODULUS"): 1802 this = self._parse_number() 1803 self._match_text_seq(",", "REMAINDER") 1804 expression = self._parse_number() 1805 self._match_r_paren() 1806 else: 1807 self.raise_error("Failed to parse partition bound spec.") 1808 1809 return self.expression( 1810 exp.PartitionBoundSpec, 1811 this=this, 1812 expression=expression, 1813 from_expressions=from_expressions, 1814 to_expressions=to_expressions, 1815 ) 1816 1817 # https://www.postgresql.org/docs/current/sql-createtable.html 1818 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1819 if not self._match_text_seq("OF"): 1820 self._retreat(self._index - 1) 1821 return None 1822 1823 this = self._parse_table(schema=True) 1824 1825 if self._match(TokenType.DEFAULT): 1826 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1827 elif self._match_text_seq("FOR", "VALUES"): 1828 expression = self._parse_partition_bound_spec() 1829 else: 1830 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1831 1832 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1833 1834 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1835 self._match(TokenType.EQ) 1836 return self.expression( 1837 exp.PartitionedByProperty, 1838 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1839 ) 1840 1841 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1842 if self._match_text_seq("AND", "STATISTICS"): 1843 statistics = True 1844 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1845 statistics = False 1846 else: 1847 statistics = None 1848 1849 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1850 1851 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1852 if self._match_text_seq("PRIMARY", "INDEX"): 1853 return exp.NoPrimaryIndexProperty() 1854 return None 1855 1856 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1857 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1858 return exp.OnCommitProperty() 1859 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1860 return exp.OnCommitProperty(delete=True) 1861 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1862 1863 def _parse_distkey(self) -> exp.DistKeyProperty: 1864 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1865 1866 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1867 table = self._parse_table(schema=True) 1868 1869 options = [] 1870 while self._match_texts(("INCLUDING", "EXCLUDING")): 1871 this = self._prev.text.upper() 1872 1873 id_var = self._parse_id_var() 1874 if not id_var: 1875 return None 1876 1877 options.append( 1878 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1879 ) 1880 1881 return self.expression(exp.LikeProperty, this=table, expressions=options) 1882 1883 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1884 return self.expression( 1885 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1886 ) 1887 1888 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1889 self._match(TokenType.EQ) 1890 return self.expression( 1891 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1892 ) 1893 1894 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1895 self._match_text_seq("WITH", "CONNECTION") 1896 return self.expression( 1897 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1898 ) 1899 1900 def _parse_returns(self) -> exp.ReturnsProperty: 1901 value: t.Optional[exp.Expression] 1902 is_table = self._match(TokenType.TABLE) 1903 1904 if is_table: 1905 if self._match(TokenType.LT): 1906 value = self.expression( 1907 exp.Schema, 1908 this="TABLE", 1909 expressions=self._parse_csv(self._parse_struct_types), 1910 ) 1911 if not self._match(TokenType.GT): 1912 self.raise_error("Expecting >") 1913 else: 1914 value = self._parse_schema(exp.var("TABLE")) 1915 else: 1916 value = self._parse_types() 1917 1918 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1919 1920 def _parse_describe(self) -> exp.Describe: 1921 kind = self._match_set(self.CREATABLES) and self._prev.text 1922 this = self._parse_table(schema=True) 1923 properties = self._parse_properties() 1924 expressions = properties.expressions if properties else None 1925 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1926 1927 def _parse_insert(self) -> exp.Insert: 1928 comments = ensure_list(self._prev_comments) 1929 overwrite = self._match(TokenType.OVERWRITE) 1930 ignore = self._match(TokenType.IGNORE) 1931 local = self._match_text_seq("LOCAL") 1932 alternative = None 1933 1934 if self._match_text_seq("DIRECTORY"): 1935 this: t.Optional[exp.Expression] = self.expression( 1936 exp.Directory, 1937 this=self._parse_var_or_string(), 1938 local=local, 1939 row_format=self._parse_row_format(match_row=True), 1940 ) 1941 else: 1942 if self._match(TokenType.OR): 1943 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1944 1945 self._match(TokenType.INTO) 1946 comments += ensure_list(self._prev_comments) 1947 self._match(TokenType.TABLE) 1948 this = self._parse_table(schema=True) 1949 1950 returning = self._parse_returning() 1951 1952 return self.expression( 1953 exp.Insert, 1954 comments=comments, 1955 this=this, 1956 by_name=self._match_text_seq("BY", "NAME"), 1957 exists=self._parse_exists(), 1958 partition=self._parse_partition(), 1959 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1960 and self._parse_conjunction(), 1961 expression=self._parse_ddl_select(), 1962 conflict=self._parse_on_conflict(), 1963 returning=returning or self._parse_returning(), 1964 overwrite=overwrite, 1965 alternative=alternative, 1966 ignore=ignore, 1967 ) 1968 1969 def _parse_kill(self) -> exp.Kill: 1970 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1971 1972 return self.expression( 1973 exp.Kill, 1974 this=self._parse_primary(), 1975 kind=kind, 1976 ) 1977 1978 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1979 conflict = self._match_text_seq("ON", "CONFLICT") 1980 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1981 1982 if not conflict and not duplicate: 1983 return None 1984 1985 nothing = None 1986 expressions = None 1987 key = None 1988 constraint = None 1989 1990 if conflict: 1991 if self._match_text_seq("ON", "CONSTRAINT"): 1992 constraint = self._parse_id_var() 1993 else: 1994 key = self._parse_csv(self._parse_value) 1995 1996 self._match_text_seq("DO") 1997 if self._match_text_seq("NOTHING"): 1998 nothing = True 1999 else: 2000 self._match(TokenType.UPDATE) 2001 self._match(TokenType.SET) 2002 expressions = self._parse_csv(self._parse_equality) 2003 2004 return self.expression( 2005 exp.OnConflict, 2006 duplicate=duplicate, 2007 expressions=expressions, 2008 nothing=nothing, 2009 key=key, 2010 constraint=constraint, 2011 ) 2012 2013 def _parse_returning(self) -> t.Optional[exp.Returning]: 2014 if not self._match(TokenType.RETURNING): 2015 return None 2016 return self.expression( 2017 exp.Returning, 2018 expressions=self._parse_csv(self._parse_expression), 2019 into=self._match(TokenType.INTO) and self._parse_table_part(), 2020 ) 2021 2022 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2023 if not self._match(TokenType.FORMAT): 2024 return None 2025 return self._parse_row_format() 2026 2027 def _parse_row_format( 2028 self, match_row: bool = False 2029 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2030 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2031 return None 2032 2033 if self._match_text_seq("SERDE"): 2034 this = self._parse_string() 2035 2036 serde_properties = None 2037 if self._match(TokenType.SERDE_PROPERTIES): 2038 serde_properties = self.expression( 2039 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2040 ) 2041 2042 return self.expression( 2043 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2044 ) 2045 2046 self._match_text_seq("DELIMITED") 2047 2048 kwargs = {} 2049 2050 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2051 kwargs["fields"] = self._parse_string() 2052 if self._match_text_seq("ESCAPED", "BY"): 2053 kwargs["escaped"] = self._parse_string() 2054 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2055 kwargs["collection_items"] = self._parse_string() 2056 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2057 kwargs["map_keys"] = self._parse_string() 2058 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2059 kwargs["lines"] = self._parse_string() 2060 if self._match_text_seq("NULL", "DEFINED", "AS"): 2061 kwargs["null"] = self._parse_string() 2062 2063 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2064 2065 def _parse_load(self) -> exp.LoadData | exp.Command: 2066 if self._match_text_seq("DATA"): 2067 local = self._match_text_seq("LOCAL") 2068 self._match_text_seq("INPATH") 2069 inpath = self._parse_string() 2070 overwrite = self._match(TokenType.OVERWRITE) 2071 self._match_pair(TokenType.INTO, TokenType.TABLE) 2072 2073 return self.expression( 2074 exp.LoadData, 2075 this=self._parse_table(schema=True), 2076 local=local, 2077 overwrite=overwrite, 2078 inpath=inpath, 2079 partition=self._parse_partition(), 2080 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2081 serde=self._match_text_seq("SERDE") and self._parse_string(), 2082 ) 2083 return self._parse_as_command(self._prev) 2084 2085 def _parse_delete(self) -> exp.Delete: 2086 # This handles MySQL's "Multiple-Table Syntax" 2087 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2088 tables = None 2089 comments = self._prev_comments 2090 if not self._match(TokenType.FROM, advance=False): 2091 tables = self._parse_csv(self._parse_table) or None 2092 2093 returning = self._parse_returning() 2094 2095 return self.expression( 2096 exp.Delete, 2097 comments=comments, 2098 tables=tables, 2099 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2100 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2101 where=self._parse_where(), 2102 returning=returning or self._parse_returning(), 2103 limit=self._parse_limit(), 2104 ) 2105 2106 def _parse_update(self) -> exp.Update: 2107 comments = self._prev_comments 2108 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2109 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2110 returning = self._parse_returning() 2111 return self.expression( 2112 exp.Update, 2113 comments=comments, 2114 **{ # type: ignore 2115 "this": this, 2116 "expressions": expressions, 2117 "from": self._parse_from(joins=True), 2118 "where": self._parse_where(), 2119 "returning": returning or self._parse_returning(), 2120 "order": self._parse_order(), 2121 "limit": self._parse_limit(), 2122 }, 2123 ) 2124 2125 def _parse_uncache(self) -> exp.Uncache: 2126 if not self._match(TokenType.TABLE): 2127 self.raise_error("Expecting TABLE after UNCACHE") 2128 2129 return self.expression( 2130 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2131 ) 2132 2133 def _parse_cache(self) -> exp.Cache: 2134 lazy = self._match_text_seq("LAZY") 2135 self._match(TokenType.TABLE) 2136 table = self._parse_table(schema=True) 2137 2138 options = [] 2139 if self._match_text_seq("OPTIONS"): 2140 self._match_l_paren() 2141 k = self._parse_string() 2142 self._match(TokenType.EQ) 2143 v = self._parse_string() 2144 options = [k, v] 2145 self._match_r_paren() 2146 2147 self._match(TokenType.ALIAS) 2148 return self.expression( 2149 exp.Cache, 2150 this=table, 2151 lazy=lazy, 2152 options=options, 2153 expression=self._parse_select(nested=True), 2154 ) 2155 2156 def _parse_partition(self) -> t.Optional[exp.Partition]: 2157 if not self._match(TokenType.PARTITION): 2158 return None 2159 2160 return self.expression( 2161 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2162 ) 2163 2164 def _parse_value(self) -> exp.Tuple: 2165 if self._match(TokenType.L_PAREN): 2166 expressions = self._parse_csv(self._parse_conjunction) 2167 self._match_r_paren() 2168 return self.expression(exp.Tuple, expressions=expressions) 2169 2170 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2171 # https://prestodb.io/docs/current/sql/values.html 2172 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2173 2174 def _parse_projections(self) -> t.List[exp.Expression]: 2175 return self._parse_expressions() 2176 2177 def _parse_select( 2178 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2179 ) -> t.Optional[exp.Expression]: 2180 cte = self._parse_with() 2181 2182 if cte: 2183 this = self._parse_statement() 2184 2185 if not this: 2186 self.raise_error("Failed to parse any statement following CTE") 2187 return cte 2188 2189 if "with" in this.arg_types: 2190 this.set("with", cte) 2191 else: 2192 self.raise_error(f"{this.key} does not support CTE") 2193 this = cte 2194 2195 return this 2196 2197 # duckdb supports leading with FROM x 2198 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2199 2200 if self._match(TokenType.SELECT): 2201 comments = self._prev_comments 2202 2203 hint = self._parse_hint() 2204 all_ = self._match(TokenType.ALL) 2205 distinct = self._match_set(self.DISTINCT_TOKENS) 2206 2207 kind = ( 2208 self._match(TokenType.ALIAS) 2209 and self._match_texts(("STRUCT", "VALUE")) 2210 and self._prev.text 2211 ) 2212 2213 if distinct: 2214 distinct = self.expression( 2215 exp.Distinct, 2216 on=self._parse_value() if self._match(TokenType.ON) else None, 2217 ) 2218 2219 if all_ and distinct: 2220 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2221 2222 limit = self._parse_limit(top=True) 2223 projections = self._parse_projections() 2224 2225 this = self.expression( 2226 exp.Select, 2227 kind=kind, 2228 hint=hint, 2229 distinct=distinct, 2230 expressions=projections, 2231 limit=limit, 2232 ) 2233 this.comments = comments 2234 2235 into = self._parse_into() 2236 if into: 2237 this.set("into", into) 2238 2239 if not from_: 2240 from_ = self._parse_from() 2241 2242 if from_: 2243 this.set("from", from_) 2244 2245 this = self._parse_query_modifiers(this) 2246 elif (table or nested) and self._match(TokenType.L_PAREN): 2247 if self._match(TokenType.PIVOT): 2248 this = self._parse_simplified_pivot() 2249 elif self._match(TokenType.FROM): 2250 this = exp.select("*").from_( 2251 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2252 ) 2253 else: 2254 this = self._parse_table() if table else self._parse_select(nested=True) 2255 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2256 2257 self._match_r_paren() 2258 2259 # We return early here so that the UNION isn't attached to the subquery by the 2260 # following call to _parse_set_operations, but instead becomes the parent node 2261 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2262 elif self._match(TokenType.VALUES): 2263 this = self.expression( 2264 exp.Values, 2265 expressions=self._parse_csv(self._parse_value), 2266 alias=self._parse_table_alias(), 2267 ) 2268 elif from_: 2269 this = exp.select("*").from_(from_.this, copy=False) 2270 else: 2271 this = None 2272 2273 return self._parse_set_operations(this) 2274 2275 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2276 if not skip_with_token and not self._match(TokenType.WITH): 2277 return None 2278 2279 comments = self._prev_comments 2280 recursive = self._match(TokenType.RECURSIVE) 2281 2282 expressions = [] 2283 while True: 2284 expressions.append(self._parse_cte()) 2285 2286 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2287 break 2288 else: 2289 self._match(TokenType.WITH) 2290 2291 return self.expression( 2292 exp.With, comments=comments, expressions=expressions, recursive=recursive 2293 ) 2294 2295 def _parse_cte(self) -> exp.CTE: 2296 alias = self._parse_table_alias() 2297 if not alias or not alias.this: 2298 self.raise_error("Expected CTE to have alias") 2299 2300 self._match(TokenType.ALIAS) 2301 return self.expression( 2302 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2303 ) 2304 2305 def _parse_table_alias( 2306 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2307 ) -> t.Optional[exp.TableAlias]: 2308 any_token = self._match(TokenType.ALIAS) 2309 alias = ( 2310 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2311 or self._parse_string_as_identifier() 2312 ) 2313 2314 index = self._index 2315 if self._match(TokenType.L_PAREN): 2316 columns = self._parse_csv(self._parse_function_parameter) 2317 self._match_r_paren() if columns else self._retreat(index) 2318 else: 2319 columns = None 2320 2321 if not alias and not columns: 2322 return None 2323 2324 return self.expression(exp.TableAlias, this=alias, columns=columns) 2325 2326 def _parse_subquery( 2327 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2328 ) -> t.Optional[exp.Subquery]: 2329 if not this: 2330 return None 2331 2332 return self.expression( 2333 exp.Subquery, 2334 this=this, 2335 pivots=self._parse_pivots(), 2336 alias=self._parse_table_alias() if parse_alias else None, 2337 ) 2338 2339 def _parse_query_modifiers( 2340 self, this: t.Optional[exp.Expression] 2341 ) -> t.Optional[exp.Expression]: 2342 if isinstance(this, self.MODIFIABLES): 2343 for join in iter(self._parse_join, None): 2344 this.append("joins", join) 2345 for lateral in iter(self._parse_lateral, None): 2346 this.append("laterals", lateral) 2347 2348 while True: 2349 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2350 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2351 key, expression = parser(self) 2352 2353 if expression: 2354 this.set(key, expression) 2355 if key == "limit": 2356 offset = expression.args.pop("offset", None) 2357 if offset: 2358 this.set("offset", exp.Offset(expression=offset)) 2359 continue 2360 break 2361 return this 2362 2363 def _parse_hint(self) -> t.Optional[exp.Hint]: 2364 if self._match(TokenType.HINT): 2365 hints = [] 2366 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2367 hints.extend(hint) 2368 2369 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2370 self.raise_error("Expected */ after HINT") 2371 2372 return self.expression(exp.Hint, expressions=hints) 2373 2374 return None 2375 2376 def _parse_into(self) -> t.Optional[exp.Into]: 2377 if not self._match(TokenType.INTO): 2378 return None 2379 2380 temp = self._match(TokenType.TEMPORARY) 2381 unlogged = self._match_text_seq("UNLOGGED") 2382 self._match(TokenType.TABLE) 2383 2384 return self.expression( 2385 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2386 ) 2387 2388 def _parse_from( 2389 self, joins: bool = False, skip_from_token: bool = False 2390 ) -> t.Optional[exp.From]: 2391 if not skip_from_token and not self._match(TokenType.FROM): 2392 return None 2393 2394 return self.expression( 2395 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2396 ) 2397 2398 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2399 if not self._match(TokenType.MATCH_RECOGNIZE): 2400 return None 2401 2402 self._match_l_paren() 2403 2404 partition = self._parse_partition_by() 2405 order = self._parse_order() 2406 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2407 2408 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2409 rows = exp.var("ONE ROW PER MATCH") 2410 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2411 text = "ALL ROWS PER MATCH" 2412 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2413 text += f" SHOW EMPTY MATCHES" 2414 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2415 text += f" OMIT EMPTY MATCHES" 2416 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2417 text += f" WITH UNMATCHED ROWS" 2418 rows = exp.var(text) 2419 else: 2420 rows = None 2421 2422 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2423 text = "AFTER MATCH SKIP" 2424 if self._match_text_seq("PAST", "LAST", "ROW"): 2425 text += f" PAST LAST ROW" 2426 elif self._match_text_seq("TO", "NEXT", "ROW"): 2427 text += f" TO NEXT ROW" 2428 elif self._match_text_seq("TO", "FIRST"): 2429 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2430 elif self._match_text_seq("TO", "LAST"): 2431 text += f" TO LAST {self._advance_any().text}" # type: ignore 2432 after = exp.var(text) 2433 else: 2434 after = None 2435 2436 if self._match_text_seq("PATTERN"): 2437 self._match_l_paren() 2438 2439 if not self._curr: 2440 self.raise_error("Expecting )", self._curr) 2441 2442 paren = 1 2443 start = self._curr 2444 2445 while self._curr and paren > 0: 2446 if self._curr.token_type == TokenType.L_PAREN: 2447 paren += 1 2448 if self._curr.token_type == TokenType.R_PAREN: 2449 paren -= 1 2450 2451 end = self._prev 2452 self._advance() 2453 2454 if paren > 0: 2455 self.raise_error("Expecting )", self._curr) 2456 2457 pattern = exp.var(self._find_sql(start, end)) 2458 else: 2459 pattern = None 2460 2461 define = ( 2462 self._parse_csv( 2463 lambda: self.expression( 2464 exp.Alias, 2465 alias=self._parse_id_var(any_token=True), 2466 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2467 ) 2468 ) 2469 if self._match_text_seq("DEFINE") 2470 else None 2471 ) 2472 2473 self._match_r_paren() 2474 2475 return self.expression( 2476 exp.MatchRecognize, 2477 partition_by=partition, 2478 order=order, 2479 measures=measures, 2480 rows=rows, 2481 after=after, 2482 pattern=pattern, 2483 define=define, 2484 alias=self._parse_table_alias(), 2485 ) 2486 2487 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2488 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2489 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2490 2491 if outer_apply or cross_apply: 2492 this = self._parse_select(table=True) 2493 view = None 2494 outer = not cross_apply 2495 elif self._match(TokenType.LATERAL): 2496 this = self._parse_select(table=True) 2497 view = self._match(TokenType.VIEW) 2498 outer = self._match(TokenType.OUTER) 2499 else: 2500 return None 2501 2502 if not this: 2503 this = ( 2504 self._parse_unnest() 2505 or self._parse_function() 2506 or self._parse_id_var(any_token=False) 2507 ) 2508 2509 while self._match(TokenType.DOT): 2510 this = exp.Dot( 2511 this=this, 2512 expression=self._parse_function() or self._parse_id_var(any_token=False), 2513 ) 2514 2515 if view: 2516 table = self._parse_id_var(any_token=False) 2517 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2518 table_alias: t.Optional[exp.TableAlias] = self.expression( 2519 exp.TableAlias, this=table, columns=columns 2520 ) 2521 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2522 # We move the alias from the lateral's child node to the lateral itself 2523 table_alias = this.args["alias"].pop() 2524 else: 2525 table_alias = self._parse_table_alias() 2526 2527 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2528 2529 def _parse_join_parts( 2530 self, 2531 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2532 return ( 2533 self._match_set(self.JOIN_METHODS) and self._prev, 2534 self._match_set(self.JOIN_SIDES) and self._prev, 2535 self._match_set(self.JOIN_KINDS) and self._prev, 2536 ) 2537 2538 def _parse_join( 2539 self, skip_join_token: bool = False, parse_bracket: bool = False 2540 ) -> t.Optional[exp.Join]: 2541 if self._match(TokenType.COMMA): 2542 return self.expression(exp.Join, this=self._parse_table()) 2543 2544 index = self._index 2545 method, side, kind = self._parse_join_parts() 2546 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2547 join = self._match(TokenType.JOIN) 2548 2549 if not skip_join_token and not join: 2550 self._retreat(index) 2551 kind = None 2552 method = None 2553 side = None 2554 2555 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2556 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2557 2558 if not skip_join_token and not join and not outer_apply and not cross_apply: 2559 return None 2560 2561 if outer_apply: 2562 side = Token(TokenType.LEFT, "LEFT") 2563 2564 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2565 2566 if method: 2567 kwargs["method"] = method.text 2568 if side: 2569 kwargs["side"] = side.text 2570 if kind: 2571 kwargs["kind"] = kind.text 2572 if hint: 2573 kwargs["hint"] = hint 2574 2575 if self._match(TokenType.ON): 2576 kwargs["on"] = self._parse_conjunction() 2577 elif self._match(TokenType.USING): 2578 kwargs["using"] = self._parse_wrapped_id_vars() 2579 elif not (kind and kind.token_type == TokenType.CROSS): 2580 index = self._index 2581 join = self._parse_join() 2582 2583 if join and self._match(TokenType.ON): 2584 kwargs["on"] = self._parse_conjunction() 2585 elif join and self._match(TokenType.USING): 2586 kwargs["using"] = self._parse_wrapped_id_vars() 2587 else: 2588 join = None 2589 self._retreat(index) 2590 2591 kwargs["this"].set("joins", [join] if join else None) 2592 2593 comments = [c for token in (method, side, kind) if token for c in token.comments] 2594 return self.expression(exp.Join, comments=comments, **kwargs) 2595 2596 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2597 this = self._parse_conjunction() 2598 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2599 return this 2600 2601 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2602 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2603 2604 return this 2605 2606 def _parse_index( 2607 self, 2608 index: t.Optional[exp.Expression] = None, 2609 ) -> t.Optional[exp.Index]: 2610 if index: 2611 unique = None 2612 primary = None 2613 amp = None 2614 2615 self._match(TokenType.ON) 2616 self._match(TokenType.TABLE) # hive 2617 table = self._parse_table_parts(schema=True) 2618 else: 2619 unique = self._match(TokenType.UNIQUE) 2620 primary = self._match_text_seq("PRIMARY") 2621 amp = self._match_text_seq("AMP") 2622 2623 if not self._match(TokenType.INDEX): 2624 return None 2625 2626 index = self._parse_id_var() 2627 table = None 2628 2629 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2630 2631 if self._match(TokenType.L_PAREN, advance=False): 2632 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2633 else: 2634 columns = None 2635 2636 return self.expression( 2637 exp.Index, 2638 this=index, 2639 table=table, 2640 using=using, 2641 columns=columns, 2642 unique=unique, 2643 primary=primary, 2644 amp=amp, 2645 partition_by=self._parse_partition_by(), 2646 where=self._parse_where(), 2647 ) 2648 2649 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2650 hints: t.List[exp.Expression] = [] 2651 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2652 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2653 hints.append( 2654 self.expression( 2655 exp.WithTableHint, 2656 expressions=self._parse_csv( 2657 lambda: self._parse_function() or self._parse_var(any_token=True) 2658 ), 2659 ) 2660 ) 2661 self._match_r_paren() 2662 else: 2663 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2664 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2665 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2666 2667 self._match_texts(("INDEX", "KEY")) 2668 if self._match(TokenType.FOR): 2669 hint.set("target", self._advance_any() and self._prev.text.upper()) 2670 2671 hint.set("expressions", self._parse_wrapped_id_vars()) 2672 hints.append(hint) 2673 2674 return hints or None 2675 2676 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2677 return ( 2678 (not schema and self._parse_function(optional_parens=False)) 2679 or self._parse_id_var(any_token=False) 2680 or self._parse_string_as_identifier() 2681 or self._parse_placeholder() 2682 ) 2683 2684 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2685 catalog = None 2686 db = None 2687 table = self._parse_table_part(schema=schema) 2688 2689 while self._match(TokenType.DOT): 2690 if catalog: 2691 # This allows nesting the table in arbitrarily many dot expressions if needed 2692 table = self.expression( 2693 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2694 ) 2695 else: 2696 catalog = db 2697 db = table 2698 table = self._parse_table_part(schema=schema) 2699 2700 if not table: 2701 self.raise_error(f"Expected table name but got {self._curr}") 2702 2703 return self.expression( 2704 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2705 ) 2706 2707 def _parse_table( 2708 self, 2709 schema: bool = False, 2710 joins: bool = False, 2711 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2712 parse_bracket: bool = False, 2713 ) -> t.Optional[exp.Expression]: 2714 lateral = self._parse_lateral() 2715 if lateral: 2716 return lateral 2717 2718 unnest = self._parse_unnest() 2719 if unnest: 2720 return unnest 2721 2722 values = self._parse_derived_table_values() 2723 if values: 2724 return values 2725 2726 subquery = self._parse_select(table=True) 2727 if subquery: 2728 if not subquery.args.get("pivots"): 2729 subquery.set("pivots", self._parse_pivots()) 2730 return subquery 2731 2732 bracket = parse_bracket and self._parse_bracket(None) 2733 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2734 this = t.cast( 2735 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2736 ) 2737 2738 if schema: 2739 return self._parse_schema(this=this) 2740 2741 version = self._parse_version() 2742 2743 if version: 2744 this.set("version", version) 2745 2746 if self.ALIAS_POST_TABLESAMPLE: 2747 table_sample = self._parse_table_sample() 2748 2749 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2750 if alias: 2751 this.set("alias", alias) 2752 2753 if self._match_text_seq("AT"): 2754 this.set("index", self._parse_id_var()) 2755 2756 this.set("hints", self._parse_table_hints()) 2757 2758 if not this.args.get("pivots"): 2759 this.set("pivots", self._parse_pivots()) 2760 2761 if not self.ALIAS_POST_TABLESAMPLE: 2762 table_sample = self._parse_table_sample() 2763 2764 if table_sample: 2765 table_sample.set("this", this) 2766 this = table_sample 2767 2768 if joins: 2769 for join in iter(self._parse_join, None): 2770 this.append("joins", join) 2771 2772 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2773 this.set("ordinality", True) 2774 this.set("alias", self._parse_table_alias()) 2775 2776 return this 2777 2778 def _parse_version(self) -> t.Optional[exp.Version]: 2779 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2780 this = "TIMESTAMP" 2781 elif self._match(TokenType.VERSION_SNAPSHOT): 2782 this = "VERSION" 2783 else: 2784 return None 2785 2786 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2787 kind = self._prev.text.upper() 2788 start = self._parse_bitwise() 2789 self._match_texts(("TO", "AND")) 2790 end = self._parse_bitwise() 2791 expression: t.Optional[exp.Expression] = self.expression( 2792 exp.Tuple, expressions=[start, end] 2793 ) 2794 elif self._match_text_seq("CONTAINED", "IN"): 2795 kind = "CONTAINED IN" 2796 expression = self.expression( 2797 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2798 ) 2799 elif self._match(TokenType.ALL): 2800 kind = "ALL" 2801 expression = None 2802 else: 2803 self._match_text_seq("AS", "OF") 2804 kind = "AS OF" 2805 expression = self._parse_type() 2806 2807 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2808 2809 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2810 if not self._match(TokenType.UNNEST): 2811 return None 2812 2813 expressions = self._parse_wrapped_csv(self._parse_equality) 2814 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2815 2816 alias = self._parse_table_alias() if with_alias else None 2817 2818 if alias: 2819 if self.UNNEST_COLUMN_ONLY: 2820 if alias.args.get("columns"): 2821 self.raise_error("Unexpected extra column alias in unnest.") 2822 2823 alias.set("columns", [alias.this]) 2824 alias.set("this", None) 2825 2826 columns = alias.args.get("columns") or [] 2827 if offset and len(expressions) < len(columns): 2828 offset = columns.pop() 2829 2830 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2831 self._match(TokenType.ALIAS) 2832 offset = self._parse_id_var( 2833 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2834 ) or exp.to_identifier("offset") 2835 2836 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2837 2838 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2839 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2840 if not is_derived and not self._match(TokenType.VALUES): 2841 return None 2842 2843 expressions = self._parse_csv(self._parse_value) 2844 alias = self._parse_table_alias() 2845 2846 if is_derived: 2847 self._match_r_paren() 2848 2849 return self.expression( 2850 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2851 ) 2852 2853 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2854 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2855 as_modifier and self._match_text_seq("USING", "SAMPLE") 2856 ): 2857 return None 2858 2859 bucket_numerator = None 2860 bucket_denominator = None 2861 bucket_field = None 2862 percent = None 2863 rows = None 2864 size = None 2865 seed = None 2866 2867 kind = ( 2868 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2869 ) 2870 method = self._parse_var(tokens=(TokenType.ROW,)) 2871 2872 matched_l_paren = self._match(TokenType.L_PAREN) 2873 2874 if self.TABLESAMPLE_CSV: 2875 num = None 2876 expressions = self._parse_csv(self._parse_primary) 2877 else: 2878 expressions = None 2879 num = ( 2880 self._parse_factor() 2881 if self._match(TokenType.NUMBER, advance=False) 2882 else self._parse_primary() 2883 ) 2884 2885 if self._match_text_seq("BUCKET"): 2886 bucket_numerator = self._parse_number() 2887 self._match_text_seq("OUT", "OF") 2888 bucket_denominator = bucket_denominator = self._parse_number() 2889 self._match(TokenType.ON) 2890 bucket_field = self._parse_field() 2891 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2892 percent = num 2893 elif self._match(TokenType.ROWS): 2894 rows = num 2895 elif num: 2896 size = num 2897 2898 if matched_l_paren: 2899 self._match_r_paren() 2900 2901 if self._match(TokenType.L_PAREN): 2902 method = self._parse_var() 2903 seed = self._match(TokenType.COMMA) and self._parse_number() 2904 self._match_r_paren() 2905 elif self._match_texts(("SEED", "REPEATABLE")): 2906 seed = self._parse_wrapped(self._parse_number) 2907 2908 return self.expression( 2909 exp.TableSample, 2910 expressions=expressions, 2911 method=method, 2912 bucket_numerator=bucket_numerator, 2913 bucket_denominator=bucket_denominator, 2914 bucket_field=bucket_field, 2915 percent=percent, 2916 rows=rows, 2917 size=size, 2918 seed=seed, 2919 kind=kind, 2920 ) 2921 2922 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2923 return list(iter(self._parse_pivot, None)) or None 2924 2925 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2926 return list(iter(self._parse_join, None)) or None 2927 2928 # https://duckdb.org/docs/sql/statements/pivot 2929 def _parse_simplified_pivot(self) -> exp.Pivot: 2930 def _parse_on() -> t.Optional[exp.Expression]: 2931 this = self._parse_bitwise() 2932 return self._parse_in(this) if self._match(TokenType.IN) else this 2933 2934 this = self._parse_table() 2935 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2936 using = self._match(TokenType.USING) and self._parse_csv( 2937 lambda: self._parse_alias(self._parse_function()) 2938 ) 2939 group = self._parse_group() 2940 return self.expression( 2941 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2942 ) 2943 2944 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2945 index = self._index 2946 include_nulls = None 2947 2948 if self._match(TokenType.PIVOT): 2949 unpivot = False 2950 elif self._match(TokenType.UNPIVOT): 2951 unpivot = True 2952 2953 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2954 if self._match_text_seq("INCLUDE", "NULLS"): 2955 include_nulls = True 2956 elif self._match_text_seq("EXCLUDE", "NULLS"): 2957 include_nulls = False 2958 else: 2959 return None 2960 2961 expressions = [] 2962 field = None 2963 2964 if not self._match(TokenType.L_PAREN): 2965 self._retreat(index) 2966 return None 2967 2968 if unpivot: 2969 expressions = self._parse_csv(self._parse_column) 2970 else: 2971 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2972 2973 if not expressions: 2974 self.raise_error("Failed to parse PIVOT's aggregation list") 2975 2976 if not self._match(TokenType.FOR): 2977 self.raise_error("Expecting FOR") 2978 2979 value = self._parse_column() 2980 2981 if not self._match(TokenType.IN): 2982 self.raise_error("Expecting IN") 2983 2984 field = self._parse_in(value, alias=True) 2985 2986 self._match_r_paren() 2987 2988 pivot = self.expression( 2989 exp.Pivot, 2990 expressions=expressions, 2991 field=field, 2992 unpivot=unpivot, 2993 include_nulls=include_nulls, 2994 ) 2995 2996 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2997 pivot.set("alias", self._parse_table_alias()) 2998 2999 if not unpivot: 3000 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3001 3002 columns: t.List[exp.Expression] = [] 3003 for fld in pivot.args["field"].expressions: 3004 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3005 for name in names: 3006 if self.PREFIXED_PIVOT_COLUMNS: 3007 name = f"{name}_{field_name}" if name else field_name 3008 else: 3009 name = f"{field_name}_{name}" if name else field_name 3010 3011 columns.append(exp.to_identifier(name)) 3012 3013 pivot.set("columns", columns) 3014 3015 return pivot 3016 3017 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3018 return [agg.alias for agg in aggregations] 3019 3020 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3021 if not skip_where_token and not self._match(TokenType.WHERE): 3022 return None 3023 3024 return self.expression( 3025 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3026 ) 3027 3028 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3029 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3030 return None 3031 3032 elements = defaultdict(list) 3033 3034 if self._match(TokenType.ALL): 3035 return self.expression(exp.Group, all=True) 3036 3037 while True: 3038 expressions = self._parse_csv(self._parse_conjunction) 3039 if expressions: 3040 elements["expressions"].extend(expressions) 3041 3042 grouping_sets = self._parse_grouping_sets() 3043 if grouping_sets: 3044 elements["grouping_sets"].extend(grouping_sets) 3045 3046 rollup = None 3047 cube = None 3048 totals = None 3049 3050 index = self._index 3051 with_ = self._match(TokenType.WITH) 3052 if self._match(TokenType.ROLLUP): 3053 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3054 elements["rollup"].extend(ensure_list(rollup)) 3055 3056 if self._match(TokenType.CUBE): 3057 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3058 elements["cube"].extend(ensure_list(cube)) 3059 3060 if self._match_text_seq("TOTALS"): 3061 totals = True 3062 elements["totals"] = True # type: ignore 3063 3064 if not (grouping_sets or rollup or cube or totals): 3065 if with_: 3066 self._retreat(index) 3067 break 3068 3069 return self.expression(exp.Group, **elements) # type: ignore 3070 3071 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3072 if not self._match(TokenType.GROUPING_SETS): 3073 return None 3074 3075 return self._parse_wrapped_csv(self._parse_grouping_set) 3076 3077 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3078 if self._match(TokenType.L_PAREN): 3079 grouping_set = self._parse_csv(self._parse_column) 3080 self._match_r_paren() 3081 return self.expression(exp.Tuple, expressions=grouping_set) 3082 3083 return self._parse_column() 3084 3085 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3086 if not skip_having_token and not self._match(TokenType.HAVING): 3087 return None 3088 return self.expression(exp.Having, this=self._parse_conjunction()) 3089 3090 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3091 if not self._match(TokenType.QUALIFY): 3092 return None 3093 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3094 3095 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3096 if skip_start_token: 3097 start = None 3098 elif self._match(TokenType.START_WITH): 3099 start = self._parse_conjunction() 3100 else: 3101 return None 3102 3103 self._match(TokenType.CONNECT_BY) 3104 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3105 exp.Prior, this=self._parse_bitwise() 3106 ) 3107 connect = self._parse_conjunction() 3108 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3109 3110 if not start and self._match(TokenType.START_WITH): 3111 start = self._parse_conjunction() 3112 3113 return self.expression(exp.Connect, start=start, connect=connect) 3114 3115 def _parse_order( 3116 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3117 ) -> t.Optional[exp.Expression]: 3118 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3119 return this 3120 3121 return self.expression( 3122 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3123 ) 3124 3125 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3126 if not self._match(token): 3127 return None 3128 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3129 3130 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3131 this = parse_method() if parse_method else self._parse_conjunction() 3132 3133 asc = self._match(TokenType.ASC) 3134 desc = self._match(TokenType.DESC) or (asc and False) 3135 3136 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3137 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3138 3139 nulls_first = is_nulls_first or False 3140 explicitly_null_ordered = is_nulls_first or is_nulls_last 3141 3142 if ( 3143 not explicitly_null_ordered 3144 and ( 3145 (not desc and self.NULL_ORDERING == "nulls_are_small") 3146 or (desc and self.NULL_ORDERING != "nulls_are_small") 3147 ) 3148 and self.NULL_ORDERING != "nulls_are_last" 3149 ): 3150 nulls_first = True 3151 3152 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3153 3154 def _parse_limit( 3155 self, this: t.Optional[exp.Expression] = None, top: bool = False 3156 ) -> t.Optional[exp.Expression]: 3157 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3158 comments = self._prev_comments 3159 if top: 3160 limit_paren = self._match(TokenType.L_PAREN) 3161 expression = self._parse_number() 3162 3163 if limit_paren: 3164 self._match_r_paren() 3165 else: 3166 expression = self._parse_term() 3167 3168 if self._match(TokenType.COMMA): 3169 offset = expression 3170 expression = self._parse_term() 3171 else: 3172 offset = None 3173 3174 limit_exp = self.expression( 3175 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3176 ) 3177 3178 return limit_exp 3179 3180 if self._match(TokenType.FETCH): 3181 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3182 direction = self._prev.text if direction else "FIRST" 3183 3184 count = self._parse_field(tokens=self.FETCH_TOKENS) 3185 percent = self._match(TokenType.PERCENT) 3186 3187 self._match_set((TokenType.ROW, TokenType.ROWS)) 3188 3189 only = self._match_text_seq("ONLY") 3190 with_ties = self._match_text_seq("WITH", "TIES") 3191 3192 if only and with_ties: 3193 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3194 3195 return self.expression( 3196 exp.Fetch, 3197 direction=direction, 3198 count=count, 3199 percent=percent, 3200 with_ties=with_ties, 3201 ) 3202 3203 return this 3204 3205 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3206 if not self._match(TokenType.OFFSET): 3207 return this 3208 3209 count = self._parse_term() 3210 self._match_set((TokenType.ROW, TokenType.ROWS)) 3211 return self.expression(exp.Offset, this=this, expression=count) 3212 3213 def _parse_locks(self) -> t.List[exp.Lock]: 3214 locks = [] 3215 while True: 3216 if self._match_text_seq("FOR", "UPDATE"): 3217 update = True 3218 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3219 "LOCK", "IN", "SHARE", "MODE" 3220 ): 3221 update = False 3222 else: 3223 break 3224 3225 expressions = None 3226 if self._match_text_seq("OF"): 3227 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3228 3229 wait: t.Optional[bool | exp.Expression] = None 3230 if self._match_text_seq("NOWAIT"): 3231 wait = True 3232 elif self._match_text_seq("WAIT"): 3233 wait = self._parse_primary() 3234 elif self._match_text_seq("SKIP", "LOCKED"): 3235 wait = False 3236 3237 locks.append( 3238 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3239 ) 3240 3241 return locks 3242 3243 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3244 if not self._match_set(self.SET_OPERATIONS): 3245 return this 3246 3247 token_type = self._prev.token_type 3248 3249 if token_type == TokenType.UNION: 3250 expression = exp.Union 3251 elif token_type == TokenType.EXCEPT: 3252 expression = exp.Except 3253 else: 3254 expression = exp.Intersect 3255 3256 return self.expression( 3257 expression, 3258 comments=self._prev.comments, 3259 this=this, 3260 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3261 by_name=self._match_text_seq("BY", "NAME"), 3262 expression=self._parse_set_operations(self._parse_select(nested=True)), 3263 ) 3264 3265 def _parse_expression(self) -> t.Optional[exp.Expression]: 3266 return self._parse_alias(self._parse_conjunction()) 3267 3268 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3269 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3270 3271 def _parse_equality(self) -> t.Optional[exp.Expression]: 3272 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3273 3274 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3275 return self._parse_tokens(self._parse_range, self.COMPARISON) 3276 3277 def _parse_range(self) -> t.Optional[exp.Expression]: 3278 this = self._parse_bitwise() 3279 negate = self._match(TokenType.NOT) 3280 3281 if self._match_set(self.RANGE_PARSERS): 3282 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3283 if not expression: 3284 return this 3285 3286 this = expression 3287 elif self._match(TokenType.ISNULL): 3288 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3289 3290 # Postgres supports ISNULL and NOTNULL for conditions. 3291 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3292 if self._match(TokenType.NOTNULL): 3293 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3294 this = self.expression(exp.Not, this=this) 3295 3296 if negate: 3297 this = self.expression(exp.Not, this=this) 3298 3299 if self._match(TokenType.IS): 3300 this = self._parse_is(this) 3301 3302 return this 3303 3304 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3305 index = self._index - 1 3306 negate = self._match(TokenType.NOT) 3307 3308 if self._match_text_seq("DISTINCT", "FROM"): 3309 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3310 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3311 3312 expression = self._parse_null() or self._parse_boolean() 3313 if not expression: 3314 self._retreat(index) 3315 return None 3316 3317 this = self.expression(exp.Is, this=this, expression=expression) 3318 return self.expression(exp.Not, this=this) if negate else this 3319 3320 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3321 unnest = self._parse_unnest(with_alias=False) 3322 if unnest: 3323 this = self.expression(exp.In, this=this, unnest=unnest) 3324 elif self._match(TokenType.L_PAREN): 3325 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3326 3327 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3328 this = self.expression(exp.In, this=this, query=expressions[0]) 3329 else: 3330 this = self.expression(exp.In, this=this, expressions=expressions) 3331 3332 self._match_r_paren(this) 3333 else: 3334 this = self.expression(exp.In, this=this, field=self._parse_field()) 3335 3336 return this 3337 3338 def _parse_between(self, this: exp.Expression) -> exp.Between: 3339 low = self._parse_bitwise() 3340 self._match(TokenType.AND) 3341 high = self._parse_bitwise() 3342 return self.expression(exp.Between, this=this, low=low, high=high) 3343 3344 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3345 if not self._match(TokenType.ESCAPE): 3346 return this 3347 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3348 3349 def _parse_interval(self) -> t.Optional[exp.Interval]: 3350 index = self._index 3351 3352 if not self._match(TokenType.INTERVAL): 3353 return None 3354 3355 if self._match(TokenType.STRING, advance=False): 3356 this = self._parse_primary() 3357 else: 3358 this = self._parse_term() 3359 3360 if not this: 3361 self._retreat(index) 3362 return None 3363 3364 unit = self._parse_function() or self._parse_var(any_token=True) 3365 3366 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3367 # each INTERVAL expression into this canonical form so it's easy to transpile 3368 if this and this.is_number: 3369 this = exp.Literal.string(this.name) 3370 elif this and this.is_string: 3371 parts = this.name.split() 3372 3373 if len(parts) == 2: 3374 if unit: 3375 # This is not actually a unit, it's something else (e.g. a "window side") 3376 unit = None 3377 self._retreat(self._index - 1) 3378 3379 this = exp.Literal.string(parts[0]) 3380 unit = self.expression(exp.Var, this=parts[1]) 3381 3382 return self.expression(exp.Interval, this=this, unit=unit) 3383 3384 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3385 this = self._parse_term() 3386 3387 while True: 3388 if self._match_set(self.BITWISE): 3389 this = self.expression( 3390 self.BITWISE[self._prev.token_type], 3391 this=this, 3392 expression=self._parse_term(), 3393 ) 3394 elif self._match(TokenType.DQMARK): 3395 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3396 elif self._match_pair(TokenType.LT, TokenType.LT): 3397 this = self.expression( 3398 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3399 ) 3400 elif self._match_pair(TokenType.GT, TokenType.GT): 3401 this = self.expression( 3402 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3403 ) 3404 else: 3405 break 3406 3407 return this 3408 3409 def _parse_term(self) -> t.Optional[exp.Expression]: 3410 return self._parse_tokens(self._parse_factor, self.TERM) 3411 3412 def _parse_factor(self) -> t.Optional[exp.Expression]: 3413 if self.EXPONENT: 3414 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3415 else: 3416 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3417 if isinstance(factor, exp.Div): 3418 factor.args["typed"] = self.TYPED_DIVISION 3419 factor.args["safe"] = self.SAFE_DIVISION 3420 return factor 3421 3422 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3423 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3424 3425 def _parse_unary(self) -> t.Optional[exp.Expression]: 3426 if self._match_set(self.UNARY_PARSERS): 3427 return self.UNARY_PARSERS[self._prev.token_type](self) 3428 return self._parse_at_time_zone(self._parse_type()) 3429 3430 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3431 interval = parse_interval and self._parse_interval() 3432 if interval: 3433 return interval 3434 3435 index = self._index 3436 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3437 this = self._parse_column() 3438 3439 if data_type: 3440 if isinstance(this, exp.Literal): 3441 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3442 if parser: 3443 return parser(self, this, data_type) 3444 return self.expression(exp.Cast, this=this, to=data_type) 3445 if not data_type.expressions: 3446 self._retreat(index) 3447 return self._parse_column() 3448 return self._parse_column_ops(data_type) 3449 3450 return this and self._parse_column_ops(this) 3451 3452 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3453 this = self._parse_type() 3454 if not this: 3455 return None 3456 3457 return self.expression( 3458 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3459 ) 3460 3461 def _parse_types( 3462 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3463 ) -> t.Optional[exp.Expression]: 3464 index = self._index 3465 3466 prefix = self._match_text_seq("SYSUDTLIB", ".") 3467 3468 if not self._match_set(self.TYPE_TOKENS): 3469 identifier = allow_identifiers and self._parse_id_var( 3470 any_token=False, tokens=(TokenType.VAR,) 3471 ) 3472 3473 if identifier: 3474 tokens = self._tokenizer.tokenize(identifier.name) 3475 3476 if len(tokens) != 1: 3477 self.raise_error("Unexpected identifier", self._prev) 3478 3479 if tokens[0].token_type in self.TYPE_TOKENS: 3480 self._prev = tokens[0] 3481 elif self.SUPPORTS_USER_DEFINED_TYPES: 3482 type_name = identifier.name 3483 3484 while self._match(TokenType.DOT): 3485 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3486 3487 return exp.DataType.build(type_name, udt=True) 3488 else: 3489 return None 3490 else: 3491 return None 3492 3493 type_token = self._prev.token_type 3494 3495 if type_token == TokenType.PSEUDO_TYPE: 3496 return self.expression(exp.PseudoType, this=self._prev.text) 3497 3498 if type_token == TokenType.OBJECT_IDENTIFIER: 3499 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3500 3501 nested = type_token in self.NESTED_TYPE_TOKENS 3502 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3503 expressions = None 3504 maybe_func = False 3505 3506 if self._match(TokenType.L_PAREN): 3507 if is_struct: 3508 expressions = self._parse_csv(self._parse_struct_types) 3509 elif nested: 3510 expressions = self._parse_csv( 3511 lambda: self._parse_types( 3512 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3513 ) 3514 ) 3515 elif type_token in self.ENUM_TYPE_TOKENS: 3516 expressions = self._parse_csv(self._parse_equality) 3517 else: 3518 expressions = self._parse_csv(self._parse_type_size) 3519 3520 if not expressions or not self._match(TokenType.R_PAREN): 3521 self._retreat(index) 3522 return None 3523 3524 maybe_func = True 3525 3526 this: t.Optional[exp.Expression] = None 3527 values: t.Optional[t.List[exp.Expression]] = None 3528 3529 if nested and self._match(TokenType.LT): 3530 if is_struct: 3531 expressions = self._parse_csv(self._parse_struct_types) 3532 else: 3533 expressions = self._parse_csv( 3534 lambda: self._parse_types( 3535 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3536 ) 3537 ) 3538 3539 if not self._match(TokenType.GT): 3540 self.raise_error("Expecting >") 3541 3542 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3543 values = self._parse_csv(self._parse_conjunction) 3544 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3545 3546 if type_token in self.TIMESTAMPS: 3547 if self._match_text_seq("WITH", "TIME", "ZONE"): 3548 maybe_func = False 3549 tz_type = ( 3550 exp.DataType.Type.TIMETZ 3551 if type_token in self.TIMES 3552 else exp.DataType.Type.TIMESTAMPTZ 3553 ) 3554 this = exp.DataType(this=tz_type, expressions=expressions) 3555 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3556 maybe_func = False 3557 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3558 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3559 maybe_func = False 3560 elif type_token == TokenType.INTERVAL: 3561 unit = self._parse_var() 3562 3563 if self._match_text_seq("TO"): 3564 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3565 else: 3566 span = None 3567 3568 if span or not unit: 3569 this = self.expression( 3570 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3571 ) 3572 else: 3573 this = self.expression(exp.Interval, unit=unit) 3574 3575 if maybe_func and check_func: 3576 index2 = self._index 3577 peek = self._parse_string() 3578 3579 if not peek: 3580 self._retreat(index) 3581 return None 3582 3583 self._retreat(index2) 3584 3585 if not this: 3586 if self._match_text_seq("UNSIGNED"): 3587 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3588 if not unsigned_type_token: 3589 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3590 3591 type_token = unsigned_type_token or type_token 3592 3593 this = exp.DataType( 3594 this=exp.DataType.Type[type_token.value], 3595 expressions=expressions, 3596 nested=nested, 3597 values=values, 3598 prefix=prefix, 3599 ) 3600 3601 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3602 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3603 3604 return this 3605 3606 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3607 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3608 self._match(TokenType.COLON) 3609 return self._parse_column_def(this) 3610 3611 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3612 if not self._match_text_seq("AT", "TIME", "ZONE"): 3613 return this 3614 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3615 3616 def _parse_column(self) -> t.Optional[exp.Expression]: 3617 this = self._parse_field() 3618 if isinstance(this, exp.Identifier): 3619 this = self.expression(exp.Column, this=this) 3620 elif not this: 3621 return self._parse_bracket(this) 3622 return self._parse_column_ops(this) 3623 3624 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3625 this = self._parse_bracket(this) 3626 3627 while self._match_set(self.COLUMN_OPERATORS): 3628 op_token = self._prev.token_type 3629 op = self.COLUMN_OPERATORS.get(op_token) 3630 3631 if op_token == TokenType.DCOLON: 3632 field = self._parse_types() 3633 if not field: 3634 self.raise_error("Expected type") 3635 elif op and self._curr: 3636 self._advance() 3637 value = self._prev.text 3638 field = ( 3639 exp.Literal.number(value) 3640 if self._prev.token_type == TokenType.NUMBER 3641 else exp.Literal.string(value) 3642 ) 3643 else: 3644 field = self._parse_field(anonymous_func=True, any_token=True) 3645 3646 if isinstance(field, exp.Func): 3647 # bigquery allows function calls like x.y.count(...) 3648 # SAFE.SUBSTR(...) 3649 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3650 this = self._replace_columns_with_dots(this) 3651 3652 if op: 3653 this = op(self, this, field) 3654 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3655 this = self.expression( 3656 exp.Column, 3657 this=field, 3658 table=this.this, 3659 db=this.args.get("table"), 3660 catalog=this.args.get("db"), 3661 ) 3662 else: 3663 this = self.expression(exp.Dot, this=this, expression=field) 3664 this = self._parse_bracket(this) 3665 return this 3666 3667 def _parse_primary(self) -> t.Optional[exp.Expression]: 3668 if self._match_set(self.PRIMARY_PARSERS): 3669 token_type = self._prev.token_type 3670 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3671 3672 if token_type == TokenType.STRING: 3673 expressions = [primary] 3674 while self._match(TokenType.STRING): 3675 expressions.append(exp.Literal.string(self._prev.text)) 3676 3677 if len(expressions) > 1: 3678 return self.expression(exp.Concat, expressions=expressions) 3679 3680 return primary 3681 3682 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3683 return exp.Literal.number(f"0.{self._prev.text}") 3684 3685 if self._match(TokenType.L_PAREN): 3686 comments = self._prev_comments 3687 query = self._parse_select() 3688 3689 if query: 3690 expressions = [query] 3691 else: 3692 expressions = self._parse_expressions() 3693 3694 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3695 3696 if isinstance(this, exp.Subqueryable): 3697 this = self._parse_set_operations( 3698 self._parse_subquery(this=this, parse_alias=False) 3699 ) 3700 elif len(expressions) > 1: 3701 this = self.expression(exp.Tuple, expressions=expressions) 3702 else: 3703 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3704 3705 if this: 3706 this.add_comments(comments) 3707 3708 self._match_r_paren(expression=this) 3709 return this 3710 3711 return None 3712 3713 def _parse_field( 3714 self, 3715 any_token: bool = False, 3716 tokens: t.Optional[t.Collection[TokenType]] = None, 3717 anonymous_func: bool = False, 3718 ) -> t.Optional[exp.Expression]: 3719 return ( 3720 self._parse_primary() 3721 or self._parse_function(anonymous=anonymous_func) 3722 or self._parse_id_var(any_token=any_token, tokens=tokens) 3723 ) 3724 3725 def _parse_function( 3726 self, 3727 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3728 anonymous: bool = False, 3729 optional_parens: bool = True, 3730 ) -> t.Optional[exp.Expression]: 3731 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3732 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3733 fn_syntax = False 3734 if ( 3735 self._match(TokenType.L_BRACE, advance=False) 3736 and self._next 3737 and self._next.text.upper() == "FN" 3738 ): 3739 self._advance(2) 3740 fn_syntax = True 3741 3742 func = self._parse_function_call( 3743 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3744 ) 3745 3746 if fn_syntax: 3747 self._match(TokenType.R_BRACE) 3748 3749 return func 3750 3751 def _parse_function_call( 3752 self, 3753 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3754 anonymous: bool = False, 3755 optional_parens: bool = True, 3756 ) -> t.Optional[exp.Expression]: 3757 if not self._curr: 3758 return None 3759 3760 token_type = self._curr.token_type 3761 this = self._curr.text 3762 upper = this.upper() 3763 3764 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3765 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3766 self._advance() 3767 return parser(self) 3768 3769 if not self._next or self._next.token_type != TokenType.L_PAREN: 3770 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3771 self._advance() 3772 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3773 3774 return None 3775 3776 if token_type not in self.FUNC_TOKENS: 3777 return None 3778 3779 self._advance(2) 3780 3781 parser = self.FUNCTION_PARSERS.get(upper) 3782 if parser and not anonymous: 3783 this = parser(self) 3784 else: 3785 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3786 3787 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3788 this = self.expression(subquery_predicate, this=self._parse_select()) 3789 self._match_r_paren() 3790 return this 3791 3792 if functions is None: 3793 functions = self.FUNCTIONS 3794 3795 function = functions.get(upper) 3796 3797 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3798 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3799 3800 if function and not anonymous: 3801 func = self.validate_expression(function(args), args) 3802 if not self.NORMALIZE_FUNCTIONS: 3803 func.meta["name"] = this 3804 this = func 3805 else: 3806 this = self.expression(exp.Anonymous, this=this, expressions=args) 3807 3808 self._match_r_paren(this) 3809 return self._parse_window(this) 3810 3811 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3812 return self._parse_column_def(self._parse_id_var()) 3813 3814 def _parse_user_defined_function( 3815 self, kind: t.Optional[TokenType] = None 3816 ) -> t.Optional[exp.Expression]: 3817 this = self._parse_id_var() 3818 3819 while self._match(TokenType.DOT): 3820 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3821 3822 if not self._match(TokenType.L_PAREN): 3823 return this 3824 3825 expressions = self._parse_csv(self._parse_function_parameter) 3826 self._match_r_paren() 3827 return self.expression( 3828 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3829 ) 3830 3831 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3832 literal = self._parse_primary() 3833 if literal: 3834 return self.expression(exp.Introducer, this=token.text, expression=literal) 3835 3836 return self.expression(exp.Identifier, this=token.text) 3837 3838 def _parse_session_parameter(self) -> exp.SessionParameter: 3839 kind = None 3840 this = self._parse_id_var() or self._parse_primary() 3841 3842 if this and self._match(TokenType.DOT): 3843 kind = this.name 3844 this = self._parse_var() or self._parse_primary() 3845 3846 return self.expression(exp.SessionParameter, this=this, kind=kind) 3847 3848 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3849 index = self._index 3850 3851 if self._match(TokenType.L_PAREN): 3852 expressions = t.cast( 3853 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3854 ) 3855 3856 if not self._match(TokenType.R_PAREN): 3857 self._retreat(index) 3858 else: 3859 expressions = [self._parse_id_var()] 3860 3861 if self._match_set(self.LAMBDAS): 3862 return self.LAMBDAS[self._prev.token_type](self, expressions) 3863 3864 self._retreat(index) 3865 3866 this: t.Optional[exp.Expression] 3867 3868 if self._match(TokenType.DISTINCT): 3869 this = self.expression( 3870 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3871 ) 3872 else: 3873 this = self._parse_select_or_expression(alias=alias) 3874 3875 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3876 3877 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3878 index = self._index 3879 3880 if not self.errors: 3881 try: 3882 if self._parse_select(nested=True): 3883 return this 3884 except ParseError: 3885 pass 3886 finally: 3887 self.errors.clear() 3888 self._retreat(index) 3889 3890 if not self._match(TokenType.L_PAREN): 3891 return this 3892 3893 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3894 3895 self._match_r_paren() 3896 return self.expression(exp.Schema, this=this, expressions=args) 3897 3898 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3899 return self._parse_column_def(self._parse_field(any_token=True)) 3900 3901 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3902 # column defs are not really columns, they're identifiers 3903 if isinstance(this, exp.Column): 3904 this = this.this 3905 3906 kind = self._parse_types(schema=True) 3907 3908 if self._match_text_seq("FOR", "ORDINALITY"): 3909 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3910 3911 constraints: t.List[exp.Expression] = [] 3912 3913 if not kind and self._match(TokenType.ALIAS): 3914 constraints.append( 3915 self.expression( 3916 exp.ComputedColumnConstraint, 3917 this=self._parse_conjunction(), 3918 persisted=self._match_text_seq("PERSISTED"), 3919 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3920 ) 3921 ) 3922 3923 while True: 3924 constraint = self._parse_column_constraint() 3925 if not constraint: 3926 break 3927 constraints.append(constraint) 3928 3929 if not kind and not constraints: 3930 return this 3931 3932 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3933 3934 def _parse_auto_increment( 3935 self, 3936 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3937 start = None 3938 increment = None 3939 3940 if self._match(TokenType.L_PAREN, advance=False): 3941 args = self._parse_wrapped_csv(self._parse_bitwise) 3942 start = seq_get(args, 0) 3943 increment = seq_get(args, 1) 3944 elif self._match_text_seq("START"): 3945 start = self._parse_bitwise() 3946 self._match_text_seq("INCREMENT") 3947 increment = self._parse_bitwise() 3948 3949 if start and increment: 3950 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3951 3952 return exp.AutoIncrementColumnConstraint() 3953 3954 def _parse_compress(self) -> exp.CompressColumnConstraint: 3955 if self._match(TokenType.L_PAREN, advance=False): 3956 return self.expression( 3957 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3958 ) 3959 3960 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3961 3962 def _parse_generated_as_identity( 3963 self, 3964 ) -> ( 3965 exp.GeneratedAsIdentityColumnConstraint 3966 | exp.ComputedColumnConstraint 3967 | exp.GeneratedAsRowColumnConstraint 3968 ): 3969 if self._match_text_seq("BY", "DEFAULT"): 3970 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3971 this = self.expression( 3972 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3973 ) 3974 else: 3975 self._match_text_seq("ALWAYS") 3976 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3977 3978 self._match(TokenType.ALIAS) 3979 3980 if self._match_text_seq("ROW"): 3981 start = self._match_text_seq("START") 3982 if not start: 3983 self._match(TokenType.END) 3984 hidden = self._match_text_seq("HIDDEN") 3985 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 3986 3987 identity = self._match_text_seq("IDENTITY") 3988 3989 if self._match(TokenType.L_PAREN): 3990 if self._match(TokenType.START_WITH): 3991 this.set("start", self._parse_bitwise()) 3992 if self._match_text_seq("INCREMENT", "BY"): 3993 this.set("increment", self._parse_bitwise()) 3994 if self._match_text_seq("MINVALUE"): 3995 this.set("minvalue", self._parse_bitwise()) 3996 if self._match_text_seq("MAXVALUE"): 3997 this.set("maxvalue", self._parse_bitwise()) 3998 3999 if self._match_text_seq("CYCLE"): 4000 this.set("cycle", True) 4001 elif self._match_text_seq("NO", "CYCLE"): 4002 this.set("cycle", False) 4003 4004 if not identity: 4005 this.set("expression", self._parse_bitwise()) 4006 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4007 args = self._parse_csv(self._parse_bitwise) 4008 this.set("start", seq_get(args, 0)) 4009 this.set("increment", seq_get(args, 1)) 4010 4011 self._match_r_paren() 4012 4013 return this 4014 4015 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4016 self._match_text_seq("LENGTH") 4017 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4018 4019 def _parse_not_constraint( 4020 self, 4021 ) -> t.Optional[exp.Expression]: 4022 if self._match_text_seq("NULL"): 4023 return self.expression(exp.NotNullColumnConstraint) 4024 if self._match_text_seq("CASESPECIFIC"): 4025 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4026 if self._match_text_seq("FOR", "REPLICATION"): 4027 return self.expression(exp.NotForReplicationColumnConstraint) 4028 return None 4029 4030 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4031 if self._match(TokenType.CONSTRAINT): 4032 this = self._parse_id_var() 4033 else: 4034 this = None 4035 4036 if self._match_texts(self.CONSTRAINT_PARSERS): 4037 return self.expression( 4038 exp.ColumnConstraint, 4039 this=this, 4040 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4041 ) 4042 4043 return this 4044 4045 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4046 if not self._match(TokenType.CONSTRAINT): 4047 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4048 4049 this = self._parse_id_var() 4050 expressions = [] 4051 4052 while True: 4053 constraint = self._parse_unnamed_constraint() or self._parse_function() 4054 if not constraint: 4055 break 4056 expressions.append(constraint) 4057 4058 return self.expression(exp.Constraint, this=this, expressions=expressions) 4059 4060 def _parse_unnamed_constraint( 4061 self, constraints: t.Optional[t.Collection[str]] = None 4062 ) -> t.Optional[exp.Expression]: 4063 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4064 constraints or self.CONSTRAINT_PARSERS 4065 ): 4066 return None 4067 4068 constraint = self._prev.text.upper() 4069 if constraint not in self.CONSTRAINT_PARSERS: 4070 self.raise_error(f"No parser found for schema constraint {constraint}.") 4071 4072 return self.CONSTRAINT_PARSERS[constraint](self) 4073 4074 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4075 self._match_text_seq("KEY") 4076 return self.expression( 4077 exp.UniqueColumnConstraint, 4078 this=self._parse_schema(self._parse_id_var(any_token=False)), 4079 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4080 ) 4081 4082 def _parse_key_constraint_options(self) -> t.List[str]: 4083 options = [] 4084 while True: 4085 if not self._curr: 4086 break 4087 4088 if self._match(TokenType.ON): 4089 action = None 4090 on = self._advance_any() and self._prev.text 4091 4092 if self._match_text_seq("NO", "ACTION"): 4093 action = "NO ACTION" 4094 elif self._match_text_seq("CASCADE"): 4095 action = "CASCADE" 4096 elif self._match_text_seq("RESTRICT"): 4097 action = "RESTRICT" 4098 elif self._match_pair(TokenType.SET, TokenType.NULL): 4099 action = "SET NULL" 4100 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4101 action = "SET DEFAULT" 4102 else: 4103 self.raise_error("Invalid key constraint") 4104 4105 options.append(f"ON {on} {action}") 4106 elif self._match_text_seq("NOT", "ENFORCED"): 4107 options.append("NOT ENFORCED") 4108 elif self._match_text_seq("DEFERRABLE"): 4109 options.append("DEFERRABLE") 4110 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4111 options.append("INITIALLY DEFERRED") 4112 elif self._match_text_seq("NORELY"): 4113 options.append("NORELY") 4114 elif self._match_text_seq("MATCH", "FULL"): 4115 options.append("MATCH FULL") 4116 else: 4117 break 4118 4119 return options 4120 4121 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4122 if match and not self._match(TokenType.REFERENCES): 4123 return None 4124 4125 expressions = None 4126 this = self._parse_table(schema=True) 4127 options = self._parse_key_constraint_options() 4128 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4129 4130 def _parse_foreign_key(self) -> exp.ForeignKey: 4131 expressions = self._parse_wrapped_id_vars() 4132 reference = self._parse_references() 4133 options = {} 4134 4135 while self._match(TokenType.ON): 4136 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4137 self.raise_error("Expected DELETE or UPDATE") 4138 4139 kind = self._prev.text.lower() 4140 4141 if self._match_text_seq("NO", "ACTION"): 4142 action = "NO ACTION" 4143 elif self._match(TokenType.SET): 4144 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4145 action = "SET " + self._prev.text.upper() 4146 else: 4147 self._advance() 4148 action = self._prev.text.upper() 4149 4150 options[kind] = action 4151 4152 return self.expression( 4153 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4154 ) 4155 4156 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4157 return self._parse_field() 4158 4159 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4160 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4161 4162 id_vars = self._parse_wrapped_id_vars() 4163 return self.expression( 4164 exp.PeriodForSystemTimeConstraint, 4165 this=seq_get(id_vars, 0), 4166 expression=seq_get(id_vars, 1), 4167 ) 4168 4169 def _parse_primary_key( 4170 self, wrapped_optional: bool = False, in_props: bool = False 4171 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4172 desc = ( 4173 self._match_set((TokenType.ASC, TokenType.DESC)) 4174 and self._prev.token_type == TokenType.DESC 4175 ) 4176 4177 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4178 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4179 4180 expressions = self._parse_wrapped_csv( 4181 self._parse_primary_key_part, optional=wrapped_optional 4182 ) 4183 options = self._parse_key_constraint_options() 4184 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4185 4186 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4187 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4188 return this 4189 4190 bracket_kind = self._prev.token_type 4191 4192 if self._match(TokenType.COLON): 4193 expressions: t.List[exp.Expression] = [ 4194 self.expression(exp.Slice, expression=self._parse_conjunction()) 4195 ] 4196 else: 4197 expressions = self._parse_csv( 4198 lambda: self._parse_slice( 4199 self._parse_alias(self._parse_conjunction(), explicit=True) 4200 ) 4201 ) 4202 4203 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4204 self.raise_error("Expected ]") 4205 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4206 self.raise_error("Expected }") 4207 4208 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4209 if bracket_kind == TokenType.L_BRACE: 4210 this = self.expression(exp.Struct, expressions=expressions) 4211 elif not this or this.name.upper() == "ARRAY": 4212 this = self.expression(exp.Array, expressions=expressions) 4213 else: 4214 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4215 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4216 4217 self._add_comments(this) 4218 return self._parse_bracket(this) 4219 4220 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4221 if self._match(TokenType.COLON): 4222 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4223 return this 4224 4225 def _parse_case(self) -> t.Optional[exp.Expression]: 4226 ifs = [] 4227 default = None 4228 4229 comments = self._prev_comments 4230 expression = self._parse_conjunction() 4231 4232 while self._match(TokenType.WHEN): 4233 this = self._parse_conjunction() 4234 self._match(TokenType.THEN) 4235 then = self._parse_conjunction() 4236 ifs.append(self.expression(exp.If, this=this, true=then)) 4237 4238 if self._match(TokenType.ELSE): 4239 default = self._parse_conjunction() 4240 4241 if not self._match(TokenType.END): 4242 self.raise_error("Expected END after CASE", self._prev) 4243 4244 return self._parse_window( 4245 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4246 ) 4247 4248 def _parse_if(self) -> t.Optional[exp.Expression]: 4249 if self._match(TokenType.L_PAREN): 4250 args = self._parse_csv(self._parse_conjunction) 4251 this = self.validate_expression(exp.If.from_arg_list(args), args) 4252 self._match_r_paren() 4253 else: 4254 index = self._index - 1 4255 condition = self._parse_conjunction() 4256 4257 if not condition: 4258 self._retreat(index) 4259 return None 4260 4261 self._match(TokenType.THEN) 4262 true = self._parse_conjunction() 4263 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4264 self._match(TokenType.END) 4265 this = self.expression(exp.If, this=condition, true=true, false=false) 4266 4267 return self._parse_window(this) 4268 4269 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4270 if not self._match_text_seq("VALUE", "FOR"): 4271 self._retreat(self._index - 1) 4272 return None 4273 4274 return self.expression( 4275 exp.NextValueFor, 4276 this=self._parse_column(), 4277 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4278 ) 4279 4280 def _parse_extract(self) -> exp.Extract: 4281 this = self._parse_function() or self._parse_var() or self._parse_type() 4282 4283 if self._match(TokenType.FROM): 4284 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4285 4286 if not self._match(TokenType.COMMA): 4287 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4288 4289 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4290 4291 def _parse_any_value(self) -> exp.AnyValue: 4292 this = self._parse_lambda() 4293 is_max = None 4294 having = None 4295 4296 if self._match(TokenType.HAVING): 4297 self._match_texts(("MAX", "MIN")) 4298 is_max = self._prev.text == "MAX" 4299 having = self._parse_column() 4300 4301 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4302 4303 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4304 this = self._parse_conjunction() 4305 4306 if not self._match(TokenType.ALIAS): 4307 if self._match(TokenType.COMMA): 4308 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4309 4310 self.raise_error("Expected AS after CAST") 4311 4312 fmt = None 4313 to = self._parse_types() 4314 4315 if self._match(TokenType.FORMAT): 4316 fmt_string = self._parse_string() 4317 fmt = self._parse_at_time_zone(fmt_string) 4318 4319 if not to: 4320 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4321 if to.this in exp.DataType.TEMPORAL_TYPES: 4322 this = self.expression( 4323 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4324 this=this, 4325 format=exp.Literal.string( 4326 format_time( 4327 fmt_string.this if fmt_string else "", 4328 self.FORMAT_MAPPING or self.TIME_MAPPING, 4329 self.FORMAT_TRIE or self.TIME_TRIE, 4330 ) 4331 ), 4332 ) 4333 4334 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4335 this.set("zone", fmt.args["zone"]) 4336 return this 4337 elif not to: 4338 self.raise_error("Expected TYPE after CAST") 4339 elif isinstance(to, exp.Identifier): 4340 to = exp.DataType.build(to.name, udt=True) 4341 elif to.this == exp.DataType.Type.CHAR: 4342 if self._match(TokenType.CHARACTER_SET): 4343 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4344 4345 return self.expression( 4346 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4347 ) 4348 4349 def _parse_concat(self) -> t.Optional[exp.Expression]: 4350 args = self._parse_csv(self._parse_conjunction) 4351 if self.CONCAT_NULL_OUTPUTS_STRING: 4352 args = self._ensure_string_if_null(args) 4353 4354 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4355 # we find such a call we replace it with its argument. 4356 if len(args) == 1: 4357 return args[0] 4358 4359 return self.expression( 4360 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4361 ) 4362 4363 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4364 args = self._parse_csv(self._parse_conjunction) 4365 if len(args) < 2: 4366 return self.expression(exp.ConcatWs, expressions=args) 4367 delim, *values = args 4368 if self.CONCAT_NULL_OUTPUTS_STRING: 4369 values = self._ensure_string_if_null(values) 4370 4371 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4372 4373 def _parse_string_agg(self) -> exp.Expression: 4374 if self._match(TokenType.DISTINCT): 4375 args: t.List[t.Optional[exp.Expression]] = [ 4376 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4377 ] 4378 if self._match(TokenType.COMMA): 4379 args.extend(self._parse_csv(self._parse_conjunction)) 4380 else: 4381 args = self._parse_csv(self._parse_conjunction) # type: ignore 4382 4383 index = self._index 4384 if not self._match(TokenType.R_PAREN) and args: 4385 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4386 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4387 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4388 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4389 4390 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4391 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4392 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4393 if not self._match_text_seq("WITHIN", "GROUP"): 4394 self._retreat(index) 4395 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4396 4397 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4398 order = self._parse_order(this=seq_get(args, 0)) 4399 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4400 4401 def _parse_convert( 4402 self, strict: bool, safe: t.Optional[bool] = None 4403 ) -> t.Optional[exp.Expression]: 4404 this = self._parse_bitwise() 4405 4406 if self._match(TokenType.USING): 4407 to: t.Optional[exp.Expression] = self.expression( 4408 exp.CharacterSet, this=self._parse_var() 4409 ) 4410 elif self._match(TokenType.COMMA): 4411 to = self._parse_types() 4412 else: 4413 to = None 4414 4415 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4416 4417 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4418 """ 4419 There are generally two variants of the DECODE function: 4420 4421 - DECODE(bin, charset) 4422 - DECODE(expression, search, result [, search, result] ... [, default]) 4423 4424 The second variant will always be parsed into a CASE expression. Note that NULL 4425 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4426 instead of relying on pattern matching. 4427 """ 4428 args = self._parse_csv(self._parse_conjunction) 4429 4430 if len(args) < 3: 4431 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4432 4433 expression, *expressions = args 4434 if not expression: 4435 return None 4436 4437 ifs = [] 4438 for search, result in zip(expressions[::2], expressions[1::2]): 4439 if not search or not result: 4440 return None 4441 4442 if isinstance(search, exp.Literal): 4443 ifs.append( 4444 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4445 ) 4446 elif isinstance(search, exp.Null): 4447 ifs.append( 4448 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4449 ) 4450 else: 4451 cond = exp.or_( 4452 exp.EQ(this=expression.copy(), expression=search), 4453 exp.and_( 4454 exp.Is(this=expression.copy(), expression=exp.Null()), 4455 exp.Is(this=search.copy(), expression=exp.Null()), 4456 copy=False, 4457 ), 4458 copy=False, 4459 ) 4460 ifs.append(exp.If(this=cond, true=result)) 4461 4462 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4463 4464 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4465 self._match_text_seq("KEY") 4466 key = self._parse_column() 4467 self._match_set((TokenType.COLON, TokenType.COMMA)) 4468 self._match_text_seq("VALUE") 4469 value = self._parse_bitwise() 4470 4471 if not key and not value: 4472 return None 4473 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4474 4475 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4476 if not this or not self._match_text_seq("FORMAT", "JSON"): 4477 return this 4478 4479 return self.expression(exp.FormatJson, this=this) 4480 4481 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4482 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4483 for value in values: 4484 if self._match_text_seq(value, "ON", on): 4485 return f"{value} ON {on}" 4486 4487 return None 4488 4489 def _parse_json_object(self) -> exp.JSONObject: 4490 star = self._parse_star() 4491 expressions = ( 4492 [star] 4493 if star 4494 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4495 ) 4496 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4497 4498 unique_keys = None 4499 if self._match_text_seq("WITH", "UNIQUE"): 4500 unique_keys = True 4501 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4502 unique_keys = False 4503 4504 self._match_text_seq("KEYS") 4505 4506 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4507 self._parse_type() 4508 ) 4509 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4510 4511 return self.expression( 4512 exp.JSONObject, 4513 expressions=expressions, 4514 null_handling=null_handling, 4515 unique_keys=unique_keys, 4516 return_type=return_type, 4517 encoding=encoding, 4518 ) 4519 4520 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4521 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4522 if not self._match_text_seq("NESTED"): 4523 this = self._parse_id_var() 4524 kind = self._parse_types(allow_identifiers=False) 4525 nested = None 4526 else: 4527 this = None 4528 kind = None 4529 nested = True 4530 4531 path = self._match_text_seq("PATH") and self._parse_string() 4532 nested_schema = nested and self._parse_json_schema() 4533 4534 return self.expression( 4535 exp.JSONColumnDef, 4536 this=this, 4537 kind=kind, 4538 path=path, 4539 nested_schema=nested_schema, 4540 ) 4541 4542 def _parse_json_schema(self) -> exp.JSONSchema: 4543 self._match_text_seq("COLUMNS") 4544 return self.expression( 4545 exp.JSONSchema, 4546 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4547 ) 4548 4549 def _parse_json_table(self) -> exp.JSONTable: 4550 this = self._parse_format_json(self._parse_bitwise()) 4551 path = self._match(TokenType.COMMA) and self._parse_string() 4552 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4553 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4554 schema = self._parse_json_schema() 4555 4556 return exp.JSONTable( 4557 this=this, 4558 schema=schema, 4559 path=path, 4560 error_handling=error_handling, 4561 empty_handling=empty_handling, 4562 ) 4563 4564 def _parse_logarithm(self) -> exp.Func: 4565 # Default argument order is base, expression 4566 args = self._parse_csv(self._parse_range) 4567 4568 if len(args) > 1: 4569 if not self.LOG_BASE_FIRST: 4570 args.reverse() 4571 return exp.Log.from_arg_list(args) 4572 4573 return self.expression( 4574 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4575 ) 4576 4577 def _parse_match_against(self) -> exp.MatchAgainst: 4578 expressions = self._parse_csv(self._parse_column) 4579 4580 self._match_text_seq(")", "AGAINST", "(") 4581 4582 this = self._parse_string() 4583 4584 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4585 modifier = "IN NATURAL LANGUAGE MODE" 4586 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4587 modifier = f"{modifier} WITH QUERY EXPANSION" 4588 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4589 modifier = "IN BOOLEAN MODE" 4590 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4591 modifier = "WITH QUERY EXPANSION" 4592 else: 4593 modifier = None 4594 4595 return self.expression( 4596 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4597 ) 4598 4599 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4600 def _parse_open_json(self) -> exp.OpenJSON: 4601 this = self._parse_bitwise() 4602 path = self._match(TokenType.COMMA) and self._parse_string() 4603 4604 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4605 this = self._parse_field(any_token=True) 4606 kind = self._parse_types() 4607 path = self._parse_string() 4608 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4609 4610 return self.expression( 4611 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4612 ) 4613 4614 expressions = None 4615 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4616 self._match_l_paren() 4617 expressions = self._parse_csv(_parse_open_json_column_def) 4618 4619 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4620 4621 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4622 args = self._parse_csv(self._parse_bitwise) 4623 4624 if self._match(TokenType.IN): 4625 return self.expression( 4626 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4627 ) 4628 4629 if haystack_first: 4630 haystack = seq_get(args, 0) 4631 needle = seq_get(args, 1) 4632 else: 4633 needle = seq_get(args, 0) 4634 haystack = seq_get(args, 1) 4635 4636 return self.expression( 4637 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4638 ) 4639 4640 def _parse_predict(self) -> exp.Predict: 4641 self._match_text_seq("MODEL") 4642 this = self._parse_table() 4643 4644 self._match(TokenType.COMMA) 4645 self._match_text_seq("TABLE") 4646 4647 return self.expression( 4648 exp.Predict, 4649 this=this, 4650 expression=self._parse_table(), 4651 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4652 ) 4653 4654 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4655 args = self._parse_csv(self._parse_table) 4656 return exp.JoinHint(this=func_name.upper(), expressions=args) 4657 4658 def _parse_substring(self) -> exp.Substring: 4659 # Postgres supports the form: substring(string [from int] [for int]) 4660 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4661 4662 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4663 4664 if self._match(TokenType.FROM): 4665 args.append(self._parse_bitwise()) 4666 if self._match(TokenType.FOR): 4667 args.append(self._parse_bitwise()) 4668 4669 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4670 4671 def _parse_trim(self) -> exp.Trim: 4672 # https://www.w3resource.com/sql/character-functions/trim.php 4673 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4674 4675 position = None 4676 collation = None 4677 expression = None 4678 4679 if self._match_texts(self.TRIM_TYPES): 4680 position = self._prev.text.upper() 4681 4682 this = self._parse_bitwise() 4683 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4684 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4685 expression = self._parse_bitwise() 4686 4687 if invert_order: 4688 this, expression = expression, this 4689 4690 if self._match(TokenType.COLLATE): 4691 collation = self._parse_bitwise() 4692 4693 return self.expression( 4694 exp.Trim, this=this, position=position, expression=expression, collation=collation 4695 ) 4696 4697 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4698 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4699 4700 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4701 return self._parse_window(self._parse_id_var(), alias=True) 4702 4703 def _parse_respect_or_ignore_nulls( 4704 self, this: t.Optional[exp.Expression] 4705 ) -> t.Optional[exp.Expression]: 4706 if self._match_text_seq("IGNORE", "NULLS"): 4707 return self.expression(exp.IgnoreNulls, this=this) 4708 if self._match_text_seq("RESPECT", "NULLS"): 4709 return self.expression(exp.RespectNulls, this=this) 4710 return this 4711 4712 def _parse_window( 4713 self, this: t.Optional[exp.Expression], alias: bool = False 4714 ) -> t.Optional[exp.Expression]: 4715 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4716 self._match(TokenType.WHERE) 4717 this = self.expression( 4718 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4719 ) 4720 self._match_r_paren() 4721 4722 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4723 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4724 if self._match_text_seq("WITHIN", "GROUP"): 4725 order = self._parse_wrapped(self._parse_order) 4726 this = self.expression(exp.WithinGroup, this=this, expression=order) 4727 4728 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4729 # Some dialects choose to implement and some do not. 4730 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4731 4732 # There is some code above in _parse_lambda that handles 4733 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4734 4735 # The below changes handle 4736 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4737 4738 # Oracle allows both formats 4739 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4740 # and Snowflake chose to do the same for familiarity 4741 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4742 this = self._parse_respect_or_ignore_nulls(this) 4743 4744 # bigquery select from window x AS (partition by ...) 4745 if alias: 4746 over = None 4747 self._match(TokenType.ALIAS) 4748 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4749 return this 4750 else: 4751 over = self._prev.text.upper() 4752 4753 if not self._match(TokenType.L_PAREN): 4754 return self.expression( 4755 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4756 ) 4757 4758 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4759 4760 first = self._match(TokenType.FIRST) 4761 if self._match_text_seq("LAST"): 4762 first = False 4763 4764 partition, order = self._parse_partition_and_order() 4765 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4766 4767 if kind: 4768 self._match(TokenType.BETWEEN) 4769 start = self._parse_window_spec() 4770 self._match(TokenType.AND) 4771 end = self._parse_window_spec() 4772 4773 spec = self.expression( 4774 exp.WindowSpec, 4775 kind=kind, 4776 start=start["value"], 4777 start_side=start["side"], 4778 end=end["value"], 4779 end_side=end["side"], 4780 ) 4781 else: 4782 spec = None 4783 4784 self._match_r_paren() 4785 4786 window = self.expression( 4787 exp.Window, 4788 this=this, 4789 partition_by=partition, 4790 order=order, 4791 spec=spec, 4792 alias=window_alias, 4793 over=over, 4794 first=first, 4795 ) 4796 4797 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4798 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4799 return self._parse_window(window, alias=alias) 4800 4801 return window 4802 4803 def _parse_partition_and_order( 4804 self, 4805 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4806 return self._parse_partition_by(), self._parse_order() 4807 4808 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4809 self._match(TokenType.BETWEEN) 4810 4811 return { 4812 "value": ( 4813 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4814 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4815 or self._parse_bitwise() 4816 ), 4817 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4818 } 4819 4820 def _parse_alias( 4821 self, this: t.Optional[exp.Expression], explicit: bool = False 4822 ) -> t.Optional[exp.Expression]: 4823 any_token = self._match(TokenType.ALIAS) 4824 4825 if explicit and not any_token: 4826 return this 4827 4828 if self._match(TokenType.L_PAREN): 4829 aliases = self.expression( 4830 exp.Aliases, 4831 this=this, 4832 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4833 ) 4834 self._match_r_paren(aliases) 4835 return aliases 4836 4837 alias = self._parse_id_var(any_token) 4838 4839 if alias: 4840 return self.expression(exp.Alias, this=this, alias=alias) 4841 4842 return this 4843 4844 def _parse_id_var( 4845 self, 4846 any_token: bool = True, 4847 tokens: t.Optional[t.Collection[TokenType]] = None, 4848 ) -> t.Optional[exp.Expression]: 4849 identifier = self._parse_identifier() 4850 4851 if identifier: 4852 return identifier 4853 4854 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4855 quoted = self._prev.token_type == TokenType.STRING 4856 return exp.Identifier(this=self._prev.text, quoted=quoted) 4857 4858 return None 4859 4860 def _parse_string(self) -> t.Optional[exp.Expression]: 4861 if self._match(TokenType.STRING): 4862 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4863 return self._parse_placeholder() 4864 4865 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4866 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4867 4868 def _parse_number(self) -> t.Optional[exp.Expression]: 4869 if self._match(TokenType.NUMBER): 4870 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4871 return self._parse_placeholder() 4872 4873 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4874 if self._match(TokenType.IDENTIFIER): 4875 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4876 return self._parse_placeholder() 4877 4878 def _parse_var( 4879 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4880 ) -> t.Optional[exp.Expression]: 4881 if ( 4882 (any_token and self._advance_any()) 4883 or self._match(TokenType.VAR) 4884 or (self._match_set(tokens) if tokens else False) 4885 ): 4886 return self.expression(exp.Var, this=self._prev.text) 4887 return self._parse_placeholder() 4888 4889 def _advance_any(self) -> t.Optional[Token]: 4890 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4891 self._advance() 4892 return self._prev 4893 return None 4894 4895 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4896 return self._parse_var() or self._parse_string() 4897 4898 def _parse_null(self) -> t.Optional[exp.Expression]: 4899 if self._match_set(self.NULL_TOKENS): 4900 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4901 return self._parse_placeholder() 4902 4903 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4904 if self._match(TokenType.TRUE): 4905 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4906 if self._match(TokenType.FALSE): 4907 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4908 return self._parse_placeholder() 4909 4910 def _parse_star(self) -> t.Optional[exp.Expression]: 4911 if self._match(TokenType.STAR): 4912 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4913 return self._parse_placeholder() 4914 4915 def _parse_parameter(self) -> exp.Parameter: 4916 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4917 return ( 4918 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4919 ) 4920 4921 self._match(TokenType.L_BRACE) 4922 this = _parse_parameter_part() 4923 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4924 self._match(TokenType.R_BRACE) 4925 4926 return self.expression(exp.Parameter, this=this, expression=expression) 4927 4928 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4929 if self._match_set(self.PLACEHOLDER_PARSERS): 4930 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4931 if placeholder: 4932 return placeholder 4933 self._advance(-1) 4934 return None 4935 4936 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4937 if not self._match(TokenType.EXCEPT): 4938 return None 4939 if self._match(TokenType.L_PAREN, advance=False): 4940 return self._parse_wrapped_csv(self._parse_column) 4941 4942 except_column = self._parse_column() 4943 return [except_column] if except_column else None 4944 4945 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4946 if not self._match(TokenType.REPLACE): 4947 return None 4948 if self._match(TokenType.L_PAREN, advance=False): 4949 return self._parse_wrapped_csv(self._parse_expression) 4950 4951 replace_expression = self._parse_expression() 4952 return [replace_expression] if replace_expression else None 4953 4954 def _parse_csv( 4955 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4956 ) -> t.List[exp.Expression]: 4957 parse_result = parse_method() 4958 items = [parse_result] if parse_result is not None else [] 4959 4960 while self._match(sep): 4961 self._add_comments(parse_result) 4962 parse_result = parse_method() 4963 if parse_result is not None: 4964 items.append(parse_result) 4965 4966 return items 4967 4968 def _parse_tokens( 4969 self, parse_method: t.Callable, expressions: t.Dict 4970 ) -> t.Optional[exp.Expression]: 4971 this = parse_method() 4972 4973 while self._match_set(expressions): 4974 this = self.expression( 4975 expressions[self._prev.token_type], 4976 this=this, 4977 comments=self._prev_comments, 4978 expression=parse_method(), 4979 ) 4980 4981 return this 4982 4983 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4984 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4985 4986 def _parse_wrapped_csv( 4987 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4988 ) -> t.List[exp.Expression]: 4989 return self._parse_wrapped( 4990 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4991 ) 4992 4993 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4994 wrapped = self._match(TokenType.L_PAREN) 4995 if not wrapped and not optional: 4996 self.raise_error("Expecting (") 4997 parse_result = parse_method() 4998 if wrapped: 4999 self._match_r_paren() 5000 return parse_result 5001 5002 def _parse_expressions(self) -> t.List[exp.Expression]: 5003 return self._parse_csv(self._parse_expression) 5004 5005 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5006 return self._parse_select() or self._parse_set_operations( 5007 self._parse_expression() if alias else self._parse_conjunction() 5008 ) 5009 5010 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5011 return self._parse_query_modifiers( 5012 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5013 ) 5014 5015 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5016 this = None 5017 if self._match_texts(self.TRANSACTION_KIND): 5018 this = self._prev.text 5019 5020 self._match_texts(("TRANSACTION", "WORK")) 5021 5022 modes = [] 5023 while True: 5024 mode = [] 5025 while self._match(TokenType.VAR): 5026 mode.append(self._prev.text) 5027 5028 if mode: 5029 modes.append(" ".join(mode)) 5030 if not self._match(TokenType.COMMA): 5031 break 5032 5033 return self.expression(exp.Transaction, this=this, modes=modes) 5034 5035 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5036 chain = None 5037 savepoint = None 5038 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5039 5040 self._match_texts(("TRANSACTION", "WORK")) 5041 5042 if self._match_text_seq("TO"): 5043 self._match_text_seq("SAVEPOINT") 5044 savepoint = self._parse_id_var() 5045 5046 if self._match(TokenType.AND): 5047 chain = not self._match_text_seq("NO") 5048 self._match_text_seq("CHAIN") 5049 5050 if is_rollback: 5051 return self.expression(exp.Rollback, savepoint=savepoint) 5052 5053 return self.expression(exp.Commit, chain=chain) 5054 5055 def _parse_refresh(self) -> exp.Refresh: 5056 self._match(TokenType.TABLE) 5057 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5058 5059 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5060 if not self._match_text_seq("ADD"): 5061 return None 5062 5063 self._match(TokenType.COLUMN) 5064 exists_column = self._parse_exists(not_=True) 5065 expression = self._parse_field_def() 5066 5067 if expression: 5068 expression.set("exists", exists_column) 5069 5070 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5071 if self._match_texts(("FIRST", "AFTER")): 5072 position = self._prev.text 5073 column_position = self.expression( 5074 exp.ColumnPosition, this=self._parse_column(), position=position 5075 ) 5076 expression.set("position", column_position) 5077 5078 return expression 5079 5080 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5081 drop = self._match(TokenType.DROP) and self._parse_drop() 5082 if drop and not isinstance(drop, exp.Command): 5083 drop.set("kind", drop.args.get("kind", "COLUMN")) 5084 return drop 5085 5086 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5087 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5088 return self.expression( 5089 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5090 ) 5091 5092 def _parse_add_constraint(self) -> exp.AddConstraint: 5093 this = None 5094 kind = self._prev.token_type 5095 5096 if kind == TokenType.CONSTRAINT: 5097 this = self._parse_id_var() 5098 5099 if self._match_text_seq("CHECK"): 5100 expression = self._parse_wrapped(self._parse_conjunction) 5101 enforced = self._match_text_seq("ENFORCED") 5102 5103 return self.expression( 5104 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5105 ) 5106 5107 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5108 expression = self._parse_foreign_key() 5109 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5110 expression = self._parse_primary_key() 5111 else: 5112 expression = None 5113 5114 return self.expression(exp.AddConstraint, this=this, expression=expression) 5115 5116 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5117 index = self._index - 1 5118 5119 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5120 return self._parse_csv(self._parse_add_constraint) 5121 5122 self._retreat(index) 5123 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5124 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5125 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5126 5127 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5128 self._match(TokenType.COLUMN) 5129 column = self._parse_field(any_token=True) 5130 5131 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5132 return self.expression(exp.AlterColumn, this=column, drop=True) 5133 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5134 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5135 5136 self._match_text_seq("SET", "DATA") 5137 return self.expression( 5138 exp.AlterColumn, 5139 this=column, 5140 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5141 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5142 using=self._match(TokenType.USING) and self._parse_conjunction(), 5143 ) 5144 5145 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5146 index = self._index - 1 5147 5148 partition_exists = self._parse_exists() 5149 if self._match(TokenType.PARTITION, advance=False): 5150 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5151 5152 self._retreat(index) 5153 return self._parse_csv(self._parse_drop_column) 5154 5155 def _parse_alter_table_rename(self) -> exp.RenameTable: 5156 self._match_text_seq("TO") 5157 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5158 5159 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5160 start = self._prev 5161 5162 if not self._match(TokenType.TABLE): 5163 return self._parse_as_command(start) 5164 5165 exists = self._parse_exists() 5166 only = self._match_text_seq("ONLY") 5167 this = self._parse_table(schema=True) 5168 5169 if self._next: 5170 self._advance() 5171 5172 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5173 if parser: 5174 actions = ensure_list(parser(self)) 5175 5176 if not self._curr: 5177 return self.expression( 5178 exp.AlterTable, 5179 this=this, 5180 exists=exists, 5181 actions=actions, 5182 only=only, 5183 ) 5184 5185 return self._parse_as_command(start) 5186 5187 def _parse_merge(self) -> exp.Merge: 5188 self._match(TokenType.INTO) 5189 target = self._parse_table() 5190 5191 if target and self._match(TokenType.ALIAS, advance=False): 5192 target.set("alias", self._parse_table_alias()) 5193 5194 self._match(TokenType.USING) 5195 using = self._parse_table() 5196 5197 self._match(TokenType.ON) 5198 on = self._parse_conjunction() 5199 5200 return self.expression( 5201 exp.Merge, 5202 this=target, 5203 using=using, 5204 on=on, 5205 expressions=self._parse_when_matched(), 5206 ) 5207 5208 def _parse_when_matched(self) -> t.List[exp.When]: 5209 whens = [] 5210 5211 while self._match(TokenType.WHEN): 5212 matched = not self._match(TokenType.NOT) 5213 self._match_text_seq("MATCHED") 5214 source = ( 5215 False 5216 if self._match_text_seq("BY", "TARGET") 5217 else self._match_text_seq("BY", "SOURCE") 5218 ) 5219 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5220 5221 self._match(TokenType.THEN) 5222 5223 if self._match(TokenType.INSERT): 5224 _this = self._parse_star() 5225 if _this: 5226 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5227 else: 5228 then = self.expression( 5229 exp.Insert, 5230 this=self._parse_value(), 5231 expression=self._match(TokenType.VALUES) and self._parse_value(), 5232 ) 5233 elif self._match(TokenType.UPDATE): 5234 expressions = self._parse_star() 5235 if expressions: 5236 then = self.expression(exp.Update, expressions=expressions) 5237 else: 5238 then = self.expression( 5239 exp.Update, 5240 expressions=self._match(TokenType.SET) 5241 and self._parse_csv(self._parse_equality), 5242 ) 5243 elif self._match(TokenType.DELETE): 5244 then = self.expression(exp.Var, this=self._prev.text) 5245 else: 5246 then = None 5247 5248 whens.append( 5249 self.expression( 5250 exp.When, 5251 matched=matched, 5252 source=source, 5253 condition=condition, 5254 then=then, 5255 ) 5256 ) 5257 return whens 5258 5259 def _parse_show(self) -> t.Optional[exp.Expression]: 5260 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5261 if parser: 5262 return parser(self) 5263 return self._parse_as_command(self._prev) 5264 5265 def _parse_set_item_assignment( 5266 self, kind: t.Optional[str] = None 5267 ) -> t.Optional[exp.Expression]: 5268 index = self._index 5269 5270 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5271 return self._parse_set_transaction(global_=kind == "GLOBAL") 5272 5273 left = self._parse_primary() or self._parse_id_var() 5274 assignment_delimiter = self._match_texts(("=", "TO")) 5275 5276 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5277 self._retreat(index) 5278 return None 5279 5280 right = self._parse_statement() or self._parse_id_var() 5281 this = self.expression(exp.EQ, this=left, expression=right) 5282 5283 return self.expression(exp.SetItem, this=this, kind=kind) 5284 5285 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5286 self._match_text_seq("TRANSACTION") 5287 characteristics = self._parse_csv( 5288 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5289 ) 5290 return self.expression( 5291 exp.SetItem, 5292 expressions=characteristics, 5293 kind="TRANSACTION", 5294 **{"global": global_}, # type: ignore 5295 ) 5296 5297 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5298 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5299 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5300 5301 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5302 index = self._index 5303 set_ = self.expression( 5304 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5305 ) 5306 5307 if self._curr: 5308 self._retreat(index) 5309 return self._parse_as_command(self._prev) 5310 5311 return set_ 5312 5313 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5314 for option in options: 5315 if self._match_text_seq(*option.split(" ")): 5316 return exp.var(option) 5317 return None 5318 5319 def _parse_as_command(self, start: Token) -> exp.Command: 5320 while self._curr: 5321 self._advance() 5322 text = self._find_sql(start, self._prev) 5323 size = len(start.text) 5324 return exp.Command(this=text[:size], expression=text[size:]) 5325 5326 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5327 settings = [] 5328 5329 self._match_l_paren() 5330 kind = self._parse_id_var() 5331 5332 if self._match(TokenType.L_PAREN): 5333 while True: 5334 key = self._parse_id_var() 5335 value = self._parse_primary() 5336 5337 if not key and value is None: 5338 break 5339 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5340 self._match(TokenType.R_PAREN) 5341 5342 self._match_r_paren() 5343 5344 return self.expression( 5345 exp.DictProperty, 5346 this=this, 5347 kind=kind.this if kind else None, 5348 settings=settings, 5349 ) 5350 5351 def _parse_dict_range(self, this: str) -> exp.DictRange: 5352 self._match_l_paren() 5353 has_min = self._match_text_seq("MIN") 5354 if has_min: 5355 min = self._parse_var() or self._parse_primary() 5356 self._match_text_seq("MAX") 5357 max = self._parse_var() or self._parse_primary() 5358 else: 5359 max = self._parse_var() or self._parse_primary() 5360 min = exp.Literal.number(0) 5361 self._match_r_paren() 5362 return self.expression(exp.DictRange, this=this, min=min, max=max) 5363 5364 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5365 index = self._index 5366 expression = self._parse_column() 5367 if not self._match(TokenType.IN): 5368 self._retreat(index - 1) 5369 return None 5370 iterator = self._parse_column() 5371 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5372 return self.expression( 5373 exp.Comprehension, 5374 this=this, 5375 expression=expression, 5376 iterator=iterator, 5377 condition=condition, 5378 ) 5379 5380 def _find_parser( 5381 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5382 ) -> t.Optional[t.Callable]: 5383 if not self._curr: 5384 return None 5385 5386 index = self._index 5387 this = [] 5388 while True: 5389 # The current token might be multiple words 5390 curr = self._curr.text.upper() 5391 key = curr.split(" ") 5392 this.append(curr) 5393 5394 self._advance() 5395 result, trie = in_trie(trie, key) 5396 if result == TrieResult.FAILED: 5397 break 5398 5399 if result == TrieResult.EXISTS: 5400 subparser = parsers[" ".join(this)] 5401 return subparser 5402 5403 self._retreat(index) 5404 return None 5405 5406 def _match(self, token_type, advance=True, expression=None): 5407 if not self._curr: 5408 return None 5409 5410 if self._curr.token_type == token_type: 5411 if advance: 5412 self._advance() 5413 self._add_comments(expression) 5414 return True 5415 5416 return None 5417 5418 def _match_set(self, types, advance=True): 5419 if not self._curr: 5420 return None 5421 5422 if self._curr.token_type in types: 5423 if advance: 5424 self._advance() 5425 return True 5426 5427 return None 5428 5429 def _match_pair(self, token_type_a, token_type_b, advance=True): 5430 if not self._curr or not self._next: 5431 return None 5432 5433 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5434 if advance: 5435 self._advance(2) 5436 return True 5437 5438 return None 5439 5440 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5441 if not self._match(TokenType.L_PAREN, expression=expression): 5442 self.raise_error("Expecting (") 5443 5444 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5445 if not self._match(TokenType.R_PAREN, expression=expression): 5446 self.raise_error("Expecting )") 5447 5448 def _match_texts(self, texts, advance=True): 5449 if self._curr and self._curr.text.upper() in texts: 5450 if advance: 5451 self._advance() 5452 return True 5453 return False 5454 5455 def _match_text_seq(self, *texts, advance=True): 5456 index = self._index 5457 for text in texts: 5458 if self._curr and self._curr.text.upper() == text: 5459 self._advance() 5460 else: 5461 self._retreat(index) 5462 return False 5463 5464 if not advance: 5465 self._retreat(index) 5466 5467 return True 5468 5469 @t.overload 5470 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5471 ... 5472 5473 @t.overload 5474 def _replace_columns_with_dots( 5475 self, this: t.Optional[exp.Expression] 5476 ) -> t.Optional[exp.Expression]: 5477 ... 5478 5479 def _replace_columns_with_dots(self, this): 5480 if isinstance(this, exp.Dot): 5481 exp.replace_children(this, self._replace_columns_with_dots) 5482 elif isinstance(this, exp.Column): 5483 exp.replace_children(this, self._replace_columns_with_dots) 5484 table = this.args.get("table") 5485 this = ( 5486 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5487 ) 5488 5489 return this 5490 5491 def _replace_lambda( 5492 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5493 ) -> t.Optional[exp.Expression]: 5494 if not node: 5495 return node 5496 5497 for column in node.find_all(exp.Column): 5498 if column.parts[0].name in lambda_variables: 5499 dot_or_id = column.to_dot() if column.table else column.this 5500 parent = column.parent 5501 5502 while isinstance(parent, exp.Dot): 5503 if not isinstance(parent.parent, exp.Dot): 5504 parent.replace(dot_or_id) 5505 break 5506 parent = parent.parent 5507 else: 5508 if column is node: 5509 node = dot_or_id 5510 else: 5511 column.replace(dot_or_id) 5512 return node 5513 5514 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5515 return [ 5516 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5517 for value in values 5518 if value 5519 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.CONSTRAINT, 250 TokenType.FUNCTION, 251 TokenType.INDEX, 252 TokenType.PROCEDURE, 253 *DB_CREATABLES, 254 } 255 256 # Tokens that can represent identifiers 257 ID_VAR_TOKENS = { 258 TokenType.VAR, 259 TokenType.ANTI, 260 TokenType.APPLY, 261 TokenType.ASC, 262 TokenType.AUTO_INCREMENT, 263 TokenType.BEGIN, 264 TokenType.CACHE, 265 TokenType.CASE, 266 TokenType.COLLATE, 267 TokenType.COMMAND, 268 TokenType.COMMENT, 269 TokenType.COMMIT, 270 TokenType.CONSTRAINT, 271 TokenType.DEFAULT, 272 TokenType.DELETE, 273 TokenType.DESC, 274 TokenType.DESCRIBE, 275 TokenType.DICTIONARY, 276 TokenType.DIV, 277 TokenType.END, 278 TokenType.EXECUTE, 279 TokenType.ESCAPE, 280 TokenType.FALSE, 281 TokenType.FIRST, 282 TokenType.FILTER, 283 TokenType.FORMAT, 284 TokenType.FULL, 285 TokenType.IS, 286 TokenType.ISNULL, 287 TokenType.INTERVAL, 288 TokenType.KEEP, 289 TokenType.KILL, 290 TokenType.LEFT, 291 TokenType.LOAD, 292 TokenType.MERGE, 293 TokenType.NATURAL, 294 TokenType.NEXT, 295 TokenType.OFFSET, 296 TokenType.ORDINALITY, 297 TokenType.OVERLAPS, 298 TokenType.OVERWRITE, 299 TokenType.PARTITION, 300 TokenType.PERCENT, 301 TokenType.PIVOT, 302 TokenType.PRAGMA, 303 TokenType.RANGE, 304 TokenType.RECURSIVE, 305 TokenType.REFERENCES, 306 TokenType.REFRESH, 307 TokenType.RIGHT, 308 TokenType.ROW, 309 TokenType.ROWS, 310 TokenType.SEMI, 311 TokenType.SET, 312 TokenType.SETTINGS, 313 TokenType.SHOW, 314 TokenType.TEMPORARY, 315 TokenType.TOP, 316 TokenType.TRUE, 317 TokenType.UNIQUE, 318 TokenType.UNPIVOT, 319 TokenType.UPDATE, 320 TokenType.USE, 321 TokenType.VOLATILE, 322 TokenType.WINDOW, 323 *CREATABLES, 324 *SUBQUERY_PREDICATES, 325 *TYPE_TOKENS, 326 *NO_PAREN_FUNCTIONS, 327 } 328 329 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 330 331 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 332 TokenType.ANTI, 333 TokenType.APPLY, 334 TokenType.ASOF, 335 TokenType.FULL, 336 TokenType.LEFT, 337 TokenType.LOCK, 338 TokenType.NATURAL, 339 TokenType.OFFSET, 340 TokenType.RIGHT, 341 TokenType.SEMI, 342 TokenType.WINDOW, 343 } 344 345 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 346 347 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 348 349 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 350 351 FUNC_TOKENS = { 352 TokenType.COLLATE, 353 TokenType.COMMAND, 354 TokenType.CURRENT_DATE, 355 TokenType.CURRENT_DATETIME, 356 TokenType.CURRENT_TIMESTAMP, 357 TokenType.CURRENT_TIME, 358 TokenType.CURRENT_USER, 359 TokenType.FILTER, 360 TokenType.FIRST, 361 TokenType.FORMAT, 362 TokenType.GLOB, 363 TokenType.IDENTIFIER, 364 TokenType.INDEX, 365 TokenType.ISNULL, 366 TokenType.ILIKE, 367 TokenType.INSERT, 368 TokenType.LIKE, 369 TokenType.MERGE, 370 TokenType.OFFSET, 371 TokenType.PRIMARY_KEY, 372 TokenType.RANGE, 373 TokenType.REPLACE, 374 TokenType.RLIKE, 375 TokenType.ROW, 376 TokenType.UNNEST, 377 TokenType.VAR, 378 TokenType.LEFT, 379 TokenType.RIGHT, 380 TokenType.DATE, 381 TokenType.DATETIME, 382 TokenType.TABLE, 383 TokenType.TIMESTAMP, 384 TokenType.TIMESTAMPTZ, 385 TokenType.WINDOW, 386 TokenType.XOR, 387 *TYPE_TOKENS, 388 *SUBQUERY_PREDICATES, 389 } 390 391 CONJUNCTION = { 392 TokenType.AND: exp.And, 393 TokenType.OR: exp.Or, 394 } 395 396 EQUALITY = { 397 TokenType.COLON_EQ: exp.PropertyEQ, 398 TokenType.EQ: exp.EQ, 399 TokenType.NEQ: exp.NEQ, 400 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 401 } 402 403 COMPARISON = { 404 TokenType.GT: exp.GT, 405 TokenType.GTE: exp.GTE, 406 TokenType.LT: exp.LT, 407 TokenType.LTE: exp.LTE, 408 } 409 410 BITWISE = { 411 TokenType.AMP: exp.BitwiseAnd, 412 TokenType.CARET: exp.BitwiseXor, 413 TokenType.PIPE: exp.BitwiseOr, 414 TokenType.DPIPE: exp.DPipe, 415 } 416 417 TERM = { 418 TokenType.DASH: exp.Sub, 419 TokenType.PLUS: exp.Add, 420 TokenType.MOD: exp.Mod, 421 TokenType.COLLATE: exp.Collate, 422 } 423 424 FACTOR = { 425 TokenType.DIV: exp.IntDiv, 426 TokenType.LR_ARROW: exp.Distance, 427 TokenType.SLASH: exp.Div, 428 TokenType.STAR: exp.Mul, 429 } 430 431 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 432 433 TIMES = { 434 TokenType.TIME, 435 TokenType.TIMETZ, 436 } 437 438 TIMESTAMPS = { 439 TokenType.TIMESTAMP, 440 TokenType.TIMESTAMPTZ, 441 TokenType.TIMESTAMPLTZ, 442 *TIMES, 443 } 444 445 SET_OPERATIONS = { 446 TokenType.UNION, 447 TokenType.INTERSECT, 448 TokenType.EXCEPT, 449 } 450 451 JOIN_METHODS = { 452 TokenType.NATURAL, 453 TokenType.ASOF, 454 } 455 456 JOIN_SIDES = { 457 TokenType.LEFT, 458 TokenType.RIGHT, 459 TokenType.FULL, 460 } 461 462 JOIN_KINDS = { 463 TokenType.INNER, 464 TokenType.OUTER, 465 TokenType.CROSS, 466 TokenType.SEMI, 467 TokenType.ANTI, 468 } 469 470 JOIN_HINTS: t.Set[str] = set() 471 472 LAMBDAS = { 473 TokenType.ARROW: lambda self, expressions: self.expression( 474 exp.Lambda, 475 this=self._replace_lambda( 476 self._parse_conjunction(), 477 {node.name for node in expressions}, 478 ), 479 expressions=expressions, 480 ), 481 TokenType.FARROW: lambda self, expressions: self.expression( 482 exp.Kwarg, 483 this=exp.var(expressions[0].name), 484 expression=self._parse_conjunction(), 485 ), 486 } 487 488 COLUMN_OPERATORS = { 489 TokenType.DOT: None, 490 TokenType.DCOLON: lambda self, this, to: self.expression( 491 exp.Cast if self.STRICT_CAST else exp.TryCast, 492 this=this, 493 to=to, 494 ), 495 TokenType.ARROW: lambda self, this, path: self.expression( 496 exp.JSONExtract, 497 this=this, 498 expression=path, 499 ), 500 TokenType.DARROW: lambda self, this, path: self.expression( 501 exp.JSONExtractScalar, 502 this=this, 503 expression=path, 504 ), 505 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 506 exp.JSONBExtract, 507 this=this, 508 expression=path, 509 ), 510 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 511 exp.JSONBExtractScalar, 512 this=this, 513 expression=path, 514 ), 515 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 516 exp.JSONBContains, 517 this=this, 518 expression=key, 519 ), 520 } 521 522 EXPRESSION_PARSERS = { 523 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 524 exp.Column: lambda self: self._parse_column(), 525 exp.Condition: lambda self: self._parse_conjunction(), 526 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 527 exp.Expression: lambda self: self._parse_statement(), 528 exp.From: lambda self: self._parse_from(), 529 exp.Group: lambda self: self._parse_group(), 530 exp.Having: lambda self: self._parse_having(), 531 exp.Identifier: lambda self: self._parse_id_var(), 532 exp.Join: lambda self: self._parse_join(), 533 exp.Lambda: lambda self: self._parse_lambda(), 534 exp.Lateral: lambda self: self._parse_lateral(), 535 exp.Limit: lambda self: self._parse_limit(), 536 exp.Offset: lambda self: self._parse_offset(), 537 exp.Order: lambda self: self._parse_order(), 538 exp.Ordered: lambda self: self._parse_ordered(), 539 exp.Properties: lambda self: self._parse_properties(), 540 exp.Qualify: lambda self: self._parse_qualify(), 541 exp.Returning: lambda self: self._parse_returning(), 542 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 543 exp.Table: lambda self: self._parse_table_parts(), 544 exp.TableAlias: lambda self: self._parse_table_alias(), 545 exp.Where: lambda self: self._parse_where(), 546 exp.Window: lambda self: self._parse_named_window(), 547 exp.With: lambda self: self._parse_with(), 548 "JOIN_TYPE": lambda self: self._parse_join_parts(), 549 } 550 551 STATEMENT_PARSERS = { 552 TokenType.ALTER: lambda self: self._parse_alter(), 553 TokenType.BEGIN: lambda self: self._parse_transaction(), 554 TokenType.CACHE: lambda self: self._parse_cache(), 555 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 556 TokenType.COMMENT: lambda self: self._parse_comment(), 557 TokenType.CREATE: lambda self: self._parse_create(), 558 TokenType.DELETE: lambda self: self._parse_delete(), 559 TokenType.DESC: lambda self: self._parse_describe(), 560 TokenType.DESCRIBE: lambda self: self._parse_describe(), 561 TokenType.DROP: lambda self: self._parse_drop(), 562 TokenType.INSERT: lambda self: self._parse_insert(), 563 TokenType.KILL: lambda self: self._parse_kill(), 564 TokenType.LOAD: lambda self: self._parse_load(), 565 TokenType.MERGE: lambda self: self._parse_merge(), 566 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 567 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 568 TokenType.REFRESH: lambda self: self._parse_refresh(), 569 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 570 TokenType.SET: lambda self: self._parse_set(), 571 TokenType.UNCACHE: lambda self: self._parse_uncache(), 572 TokenType.UPDATE: lambda self: self._parse_update(), 573 TokenType.USE: lambda self: self.expression( 574 exp.Use, 575 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 576 and exp.var(self._prev.text), 577 this=self._parse_table(schema=False), 578 ), 579 } 580 581 UNARY_PARSERS = { 582 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 583 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 584 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 585 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 586 } 587 588 PRIMARY_PARSERS = { 589 TokenType.STRING: lambda self, token: self.expression( 590 exp.Literal, this=token.text, is_string=True 591 ), 592 TokenType.NUMBER: lambda self, token: self.expression( 593 exp.Literal, this=token.text, is_string=False 594 ), 595 TokenType.STAR: lambda self, _: self.expression( 596 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 597 ), 598 TokenType.NULL: lambda self, _: self.expression(exp.Null), 599 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 600 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 601 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 602 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 603 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 604 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 605 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 606 exp.National, this=token.text 607 ), 608 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 609 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 610 exp.RawString, this=token.text 611 ), 612 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 613 } 614 615 PLACEHOLDER_PARSERS = { 616 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 617 TokenType.PARAMETER: lambda self: self._parse_parameter(), 618 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 619 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 620 else None, 621 } 622 623 RANGE_PARSERS = { 624 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 625 TokenType.GLOB: binary_range_parser(exp.Glob), 626 TokenType.ILIKE: binary_range_parser(exp.ILike), 627 TokenType.IN: lambda self, this: self._parse_in(this), 628 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 629 TokenType.IS: lambda self, this: self._parse_is(this), 630 TokenType.LIKE: binary_range_parser(exp.Like), 631 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 632 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 633 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 634 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 635 } 636 637 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 638 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 639 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 640 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 641 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 642 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 643 "CHECKSUM": lambda self: self._parse_checksum(), 644 "CLUSTER BY": lambda self: self._parse_cluster(), 645 "CLUSTERED": lambda self: self._parse_clustered_by(), 646 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 647 exp.CollateProperty, **kwargs 648 ), 649 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 650 "COPY": lambda self: self._parse_copy_property(), 651 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 652 "DEFINER": lambda self: self._parse_definer(), 653 "DETERMINISTIC": lambda self: self.expression( 654 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 655 ), 656 "DISTKEY": lambda self: self._parse_distkey(), 657 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 658 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 659 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 660 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 661 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 662 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 663 "FREESPACE": lambda self: self._parse_freespace(), 664 "HEAP": lambda self: self.expression(exp.HeapProperty), 665 "IMMUTABLE": lambda self: self.expression( 666 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 667 ), 668 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 669 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 670 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 671 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 672 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 673 "LIKE": lambda self: self._parse_create_like(), 674 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 675 "LOCK": lambda self: self._parse_locking(), 676 "LOCKING": lambda self: self._parse_locking(), 677 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 678 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 679 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 680 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 681 "NO": lambda self: self._parse_no_property(), 682 "ON": lambda self: self._parse_on_property(), 683 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 684 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 685 "PARTITION": lambda self: self._parse_partitioned_of(), 686 "PARTITION BY": lambda self: self._parse_partitioned_by(), 687 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 688 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 689 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 690 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 691 "REMOTE": lambda self: self._parse_remote_with_connection(), 692 "RETURNS": lambda self: self._parse_returns(), 693 "ROW": lambda self: self._parse_row(), 694 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 695 "SAMPLE": lambda self: self.expression( 696 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 697 ), 698 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 699 "SETTINGS": lambda self: self.expression( 700 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 701 ), 702 "SORTKEY": lambda self: self._parse_sortkey(), 703 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 704 "STABLE": lambda self: self.expression( 705 exp.StabilityProperty, this=exp.Literal.string("STABLE") 706 ), 707 "STORED": lambda self: self._parse_stored(), 708 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 709 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 710 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 711 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 712 "TO": lambda self: self._parse_to_table(), 713 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 714 "TRANSFORM": lambda self: self.expression( 715 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 716 ), 717 "TTL": lambda self: self._parse_ttl(), 718 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 719 "VOLATILE": lambda self: self._parse_volatile_property(), 720 "WITH": lambda self: self._parse_with_property(), 721 } 722 723 CONSTRAINT_PARSERS = { 724 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 725 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 726 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 727 "CHARACTER SET": lambda self: self.expression( 728 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 729 ), 730 "CHECK": lambda self: self.expression( 731 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 732 ), 733 "COLLATE": lambda self: self.expression( 734 exp.CollateColumnConstraint, this=self._parse_var() 735 ), 736 "COMMENT": lambda self: self.expression( 737 exp.CommentColumnConstraint, this=self._parse_string() 738 ), 739 "COMPRESS": lambda self: self._parse_compress(), 740 "CLUSTERED": lambda self: self.expression( 741 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 742 ), 743 "NONCLUSTERED": lambda self: self.expression( 744 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 745 ), 746 "DEFAULT": lambda self: self.expression( 747 exp.DefaultColumnConstraint, this=self._parse_bitwise() 748 ), 749 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 750 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 751 "FORMAT": lambda self: self.expression( 752 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 753 ), 754 "GENERATED": lambda self: self._parse_generated_as_identity(), 755 "IDENTITY": lambda self: self._parse_auto_increment(), 756 "INLINE": lambda self: self._parse_inline(), 757 "LIKE": lambda self: self._parse_create_like(), 758 "NOT": lambda self: self._parse_not_constraint(), 759 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 760 "ON": lambda self: ( 761 self._match(TokenType.UPDATE) 762 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 763 ) 764 or self.expression(exp.OnProperty, this=self._parse_id_var()), 765 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 766 "PERIOD": lambda self: self._parse_period_for_system_time(), 767 "PRIMARY KEY": lambda self: self._parse_primary_key(), 768 "REFERENCES": lambda self: self._parse_references(match=False), 769 "TITLE": lambda self: self.expression( 770 exp.TitleColumnConstraint, this=self._parse_var_or_string() 771 ), 772 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 773 "UNIQUE": lambda self: self._parse_unique(), 774 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 775 "WITH": lambda self: self.expression( 776 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 777 ), 778 } 779 780 ALTER_PARSERS = { 781 "ADD": lambda self: self._parse_alter_table_add(), 782 "ALTER": lambda self: self._parse_alter_table_alter(), 783 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 784 "DROP": lambda self: self._parse_alter_table_drop(), 785 "RENAME": lambda self: self._parse_alter_table_rename(), 786 } 787 788 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 789 790 NO_PAREN_FUNCTION_PARSERS = { 791 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 792 "CASE": lambda self: self._parse_case(), 793 "IF": lambda self: self._parse_if(), 794 "NEXT": lambda self: self._parse_next_value_for(), 795 } 796 797 INVALID_FUNC_NAME_TOKENS = { 798 TokenType.IDENTIFIER, 799 TokenType.STRING, 800 } 801 802 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 803 804 FUNCTION_PARSERS = { 805 "ANY_VALUE": lambda self: self._parse_any_value(), 806 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 807 "CONCAT": lambda self: self._parse_concat(), 808 "CONCAT_WS": lambda self: self._parse_concat_ws(), 809 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 810 "DECODE": lambda self: self._parse_decode(), 811 "EXTRACT": lambda self: self._parse_extract(), 812 "JSON_OBJECT": lambda self: self._parse_json_object(), 813 "JSON_TABLE": lambda self: self._parse_json_table(), 814 "LOG": lambda self: self._parse_logarithm(), 815 "MATCH": lambda self: self._parse_match_against(), 816 "OPENJSON": lambda self: self._parse_open_json(), 817 "POSITION": lambda self: self._parse_position(), 818 "PREDICT": lambda self: self._parse_predict(), 819 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 820 "STRING_AGG": lambda self: self._parse_string_agg(), 821 "SUBSTRING": lambda self: self._parse_substring(), 822 "TRIM": lambda self: self._parse_trim(), 823 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 824 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 825 } 826 827 QUERY_MODIFIER_PARSERS = { 828 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 829 TokenType.WHERE: lambda self: ("where", self._parse_where()), 830 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 831 TokenType.HAVING: lambda self: ("having", self._parse_having()), 832 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 833 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 834 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 835 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 836 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 837 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 838 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 839 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 840 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 841 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 842 TokenType.CLUSTER_BY: lambda self: ( 843 "cluster", 844 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 845 ), 846 TokenType.DISTRIBUTE_BY: lambda self: ( 847 "distribute", 848 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 849 ), 850 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 851 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 852 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 853 } 854 855 SET_PARSERS = { 856 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 857 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 858 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 859 "TRANSACTION": lambda self: self._parse_set_transaction(), 860 } 861 862 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 863 864 TYPE_LITERAL_PARSERS = { 865 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 866 } 867 868 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 869 870 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 871 872 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 873 874 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 875 TRANSACTION_CHARACTERISTICS = { 876 "ISOLATION LEVEL REPEATABLE READ", 877 "ISOLATION LEVEL READ COMMITTED", 878 "ISOLATION LEVEL READ UNCOMMITTED", 879 "ISOLATION LEVEL SERIALIZABLE", 880 "READ WRITE", 881 "READ ONLY", 882 } 883 884 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 885 886 CLONE_KEYWORDS = {"CLONE", "COPY"} 887 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 888 889 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 890 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 891 892 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 893 894 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 895 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 896 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 897 898 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 899 900 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 901 902 DISTINCT_TOKENS = {TokenType.DISTINCT} 903 904 NULL_TOKENS = {TokenType.NULL} 905 906 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 907 908 STRICT_CAST = True 909 910 # A NULL arg in CONCAT yields NULL by default 911 CONCAT_NULL_OUTPUTS_STRING = False 912 913 PREFIXED_PIVOT_COLUMNS = False 914 IDENTIFY_PIVOT_STRINGS = False 915 916 LOG_BASE_FIRST = True 917 LOG_DEFAULTS_TO_LN = False 918 919 # Whether or not ADD is present for each column added by ALTER TABLE 920 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 921 922 # Whether or not the table sample clause expects CSV syntax 923 TABLESAMPLE_CSV = False 924 925 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 926 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 927 928 # Whether the TRIM function expects the characters to trim as its first argument 929 TRIM_PATTERN_FIRST = False 930 931 # Whether the behavior of a / b depends on the types of a and b. 932 # False means a / b is always float division. 933 # True means a / b is integer division if both a and b are integers. 934 TYPED_DIVISION = False 935 936 # False means 1 / 0 throws an error. 937 # True means 1 / 0 returns null. 938 SAFE_DIVISION = False 939 940 __slots__ = ( 941 "error_level", 942 "error_message_context", 943 "max_errors", 944 "sql", 945 "errors", 946 "_tokens", 947 "_index", 948 "_curr", 949 "_next", 950 "_prev", 951 "_prev_comments", 952 "_tokenizer", 953 ) 954 955 # Autofilled 956 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 957 INDEX_OFFSET: int = 0 958 UNNEST_COLUMN_ONLY: bool = False 959 ALIAS_POST_TABLESAMPLE: bool = False 960 STRICT_STRING_CONCAT = False 961 SUPPORTS_USER_DEFINED_TYPES = True 962 NORMALIZE_FUNCTIONS = "upper" 963 NULL_ORDERING: str = "nulls_are_small" 964 SHOW_TRIE: t.Dict = {} 965 SET_TRIE: t.Dict = {} 966 FORMAT_MAPPING: t.Dict[str, str] = {} 967 FORMAT_TRIE: t.Dict = {} 968 TIME_MAPPING: t.Dict[str, str] = {} 969 TIME_TRIE: t.Dict = {} 970 971 def __init__( 972 self, 973 error_level: t.Optional[ErrorLevel] = None, 974 error_message_context: int = 100, 975 max_errors: int = 3, 976 ): 977 self.error_level = error_level or ErrorLevel.IMMEDIATE 978 self.error_message_context = error_message_context 979 self.max_errors = max_errors 980 self._tokenizer = self.TOKENIZER_CLASS() 981 self.reset() 982 983 def reset(self): 984 self.sql = "" 985 self.errors = [] 986 self._tokens = [] 987 self._index = 0 988 self._curr = None 989 self._next = None 990 self._prev = None 991 self._prev_comments = None 992 993 def parse( 994 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 995 ) -> t.List[t.Optional[exp.Expression]]: 996 """ 997 Parses a list of tokens and returns a list of syntax trees, one tree 998 per parsed SQL statement. 999 1000 Args: 1001 raw_tokens: The list of tokens. 1002 sql: The original SQL string, used to produce helpful debug messages. 1003 1004 Returns: 1005 The list of the produced syntax trees. 1006 """ 1007 return self._parse( 1008 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1009 ) 1010 1011 def parse_into( 1012 self, 1013 expression_types: exp.IntoType, 1014 raw_tokens: t.List[Token], 1015 sql: t.Optional[str] = None, 1016 ) -> t.List[t.Optional[exp.Expression]]: 1017 """ 1018 Parses a list of tokens into a given Expression type. If a collection of Expression 1019 types is given instead, this method will try to parse the token list into each one 1020 of them, stopping at the first for which the parsing succeeds. 1021 1022 Args: 1023 expression_types: The expression type(s) to try and parse the token list into. 1024 raw_tokens: The list of tokens. 1025 sql: The original SQL string, used to produce helpful debug messages. 1026 1027 Returns: 1028 The target Expression. 1029 """ 1030 errors = [] 1031 for expression_type in ensure_list(expression_types): 1032 parser = self.EXPRESSION_PARSERS.get(expression_type) 1033 if not parser: 1034 raise TypeError(f"No parser registered for {expression_type}") 1035 1036 try: 1037 return self._parse(parser, raw_tokens, sql) 1038 except ParseError as e: 1039 e.errors[0]["into_expression"] = expression_type 1040 errors.append(e) 1041 1042 raise ParseError( 1043 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1044 errors=merge_errors(errors), 1045 ) from errors[-1] 1046 1047 def _parse( 1048 self, 1049 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1050 raw_tokens: t.List[Token], 1051 sql: t.Optional[str] = None, 1052 ) -> t.List[t.Optional[exp.Expression]]: 1053 self.reset() 1054 self.sql = sql or "" 1055 1056 total = len(raw_tokens) 1057 chunks: t.List[t.List[Token]] = [[]] 1058 1059 for i, token in enumerate(raw_tokens): 1060 if token.token_type == TokenType.SEMICOLON: 1061 if i < total - 1: 1062 chunks.append([]) 1063 else: 1064 chunks[-1].append(token) 1065 1066 expressions = [] 1067 1068 for tokens in chunks: 1069 self._index = -1 1070 self._tokens = tokens 1071 self._advance() 1072 1073 expressions.append(parse_method(self)) 1074 1075 if self._index < len(self._tokens): 1076 self.raise_error("Invalid expression / Unexpected token") 1077 1078 self.check_errors() 1079 1080 return expressions 1081 1082 def check_errors(self) -> None: 1083 """Logs or raises any found errors, depending on the chosen error level setting.""" 1084 if self.error_level == ErrorLevel.WARN: 1085 for error in self.errors: 1086 logger.error(str(error)) 1087 elif self.error_level == ErrorLevel.RAISE and self.errors: 1088 raise ParseError( 1089 concat_messages(self.errors, self.max_errors), 1090 errors=merge_errors(self.errors), 1091 ) 1092 1093 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1094 """ 1095 Appends an error in the list of recorded errors or raises it, depending on the chosen 1096 error level setting. 1097 """ 1098 token = token or self._curr or self._prev or Token.string("") 1099 start = token.start 1100 end = token.end + 1 1101 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1102 highlight = self.sql[start:end] 1103 end_context = self.sql[end : end + self.error_message_context] 1104 1105 error = ParseError.new( 1106 f"{message}. Line {token.line}, Col: {token.col}.\n" 1107 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1108 description=message, 1109 line=token.line, 1110 col=token.col, 1111 start_context=start_context, 1112 highlight=highlight, 1113 end_context=end_context, 1114 ) 1115 1116 if self.error_level == ErrorLevel.IMMEDIATE: 1117 raise error 1118 1119 self.errors.append(error) 1120 1121 def expression( 1122 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1123 ) -> E: 1124 """ 1125 Creates a new, validated Expression. 1126 1127 Args: 1128 exp_class: The expression class to instantiate. 1129 comments: An optional list of comments to attach to the expression. 1130 kwargs: The arguments to set for the expression along with their respective values. 1131 1132 Returns: 1133 The target expression. 1134 """ 1135 instance = exp_class(**kwargs) 1136 instance.add_comments(comments) if comments else self._add_comments(instance) 1137 return self.validate_expression(instance) 1138 1139 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1140 if expression and self._prev_comments: 1141 expression.add_comments(self._prev_comments) 1142 self._prev_comments = None 1143 1144 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1145 """ 1146 Validates an Expression, making sure that all its mandatory arguments are set. 1147 1148 Args: 1149 expression: The expression to validate. 1150 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1151 1152 Returns: 1153 The validated expression. 1154 """ 1155 if self.error_level != ErrorLevel.IGNORE: 1156 for error_message in expression.error_messages(args): 1157 self.raise_error(error_message) 1158 1159 return expression 1160 1161 def _find_sql(self, start: Token, end: Token) -> str: 1162 return self.sql[start.start : end.end + 1] 1163 1164 def _advance(self, times: int = 1) -> None: 1165 self._index += times 1166 self._curr = seq_get(self._tokens, self._index) 1167 self._next = seq_get(self._tokens, self._index + 1) 1168 1169 if self._index > 0: 1170 self._prev = self._tokens[self._index - 1] 1171 self._prev_comments = self._prev.comments 1172 else: 1173 self._prev = None 1174 self._prev_comments = None 1175 1176 def _retreat(self, index: int) -> None: 1177 if index != self._index: 1178 self._advance(index - self._index) 1179 1180 def _parse_command(self) -> exp.Command: 1181 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1182 1183 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1184 start = self._prev 1185 exists = self._parse_exists() if allow_exists else None 1186 1187 self._match(TokenType.ON) 1188 1189 kind = self._match_set(self.CREATABLES) and self._prev 1190 if not kind: 1191 return self._parse_as_command(start) 1192 1193 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1194 this = self._parse_user_defined_function(kind=kind.token_type) 1195 elif kind.token_type == TokenType.TABLE: 1196 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1197 elif kind.token_type == TokenType.COLUMN: 1198 this = self._parse_column() 1199 else: 1200 this = self._parse_id_var() 1201 1202 self._match(TokenType.IS) 1203 1204 return self.expression( 1205 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1206 ) 1207 1208 def _parse_to_table( 1209 self, 1210 ) -> exp.ToTableProperty: 1211 table = self._parse_table_parts(schema=True) 1212 return self.expression(exp.ToTableProperty, this=table) 1213 1214 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1215 def _parse_ttl(self) -> exp.Expression: 1216 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1217 this = self._parse_bitwise() 1218 1219 if self._match_text_seq("DELETE"): 1220 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1221 if self._match_text_seq("RECOMPRESS"): 1222 return self.expression( 1223 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1224 ) 1225 if self._match_text_seq("TO", "DISK"): 1226 return self.expression( 1227 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1228 ) 1229 if self._match_text_seq("TO", "VOLUME"): 1230 return self.expression( 1231 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1232 ) 1233 1234 return this 1235 1236 expressions = self._parse_csv(_parse_ttl_action) 1237 where = self._parse_where() 1238 group = self._parse_group() 1239 1240 aggregates = None 1241 if group and self._match(TokenType.SET): 1242 aggregates = self._parse_csv(self._parse_set_item) 1243 1244 return self.expression( 1245 exp.MergeTreeTTL, 1246 expressions=expressions, 1247 where=where, 1248 group=group, 1249 aggregates=aggregates, 1250 ) 1251 1252 def _parse_statement(self) -> t.Optional[exp.Expression]: 1253 if self._curr is None: 1254 return None 1255 1256 if self._match_set(self.STATEMENT_PARSERS): 1257 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1258 1259 if self._match_set(Tokenizer.COMMANDS): 1260 return self._parse_command() 1261 1262 expression = self._parse_expression() 1263 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1264 return self._parse_query_modifiers(expression) 1265 1266 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1267 start = self._prev 1268 temporary = self._match(TokenType.TEMPORARY) 1269 materialized = self._match_text_seq("MATERIALIZED") 1270 1271 kind = self._match_set(self.CREATABLES) and self._prev.text 1272 if not kind: 1273 return self._parse_as_command(start) 1274 1275 return self.expression( 1276 exp.Drop, 1277 comments=start.comments, 1278 exists=exists or self._parse_exists(), 1279 this=self._parse_table(schema=True), 1280 kind=kind, 1281 temporary=temporary, 1282 materialized=materialized, 1283 cascade=self._match_text_seq("CASCADE"), 1284 constraints=self._match_text_seq("CONSTRAINTS"), 1285 purge=self._match_text_seq("PURGE"), 1286 ) 1287 1288 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1289 return ( 1290 self._match_text_seq("IF") 1291 and (not not_ or self._match(TokenType.NOT)) 1292 and self._match(TokenType.EXISTS) 1293 ) 1294 1295 def _parse_create(self) -> exp.Create | exp.Command: 1296 # Note: this can't be None because we've matched a statement parser 1297 start = self._prev 1298 comments = self._prev_comments 1299 1300 replace = start.text.upper() == "REPLACE" or self._match_pair( 1301 TokenType.OR, TokenType.REPLACE 1302 ) 1303 unique = self._match(TokenType.UNIQUE) 1304 1305 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1306 self._advance() 1307 1308 properties = None 1309 create_token = self._match_set(self.CREATABLES) and self._prev 1310 1311 if not create_token: 1312 # exp.Properties.Location.POST_CREATE 1313 properties = self._parse_properties() 1314 create_token = self._match_set(self.CREATABLES) and self._prev 1315 1316 if not properties or not create_token: 1317 return self._parse_as_command(start) 1318 1319 exists = self._parse_exists(not_=True) 1320 this = None 1321 expression: t.Optional[exp.Expression] = None 1322 indexes = None 1323 no_schema_binding = None 1324 begin = None 1325 end = None 1326 clone = None 1327 1328 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1329 nonlocal properties 1330 if properties and temp_props: 1331 properties.expressions.extend(temp_props.expressions) 1332 elif temp_props: 1333 properties = temp_props 1334 1335 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1336 this = self._parse_user_defined_function(kind=create_token.token_type) 1337 1338 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1339 extend_props(self._parse_properties()) 1340 1341 self._match(TokenType.ALIAS) 1342 1343 if self._match(TokenType.COMMAND): 1344 expression = self._parse_as_command(self._prev) 1345 else: 1346 begin = self._match(TokenType.BEGIN) 1347 return_ = self._match_text_seq("RETURN") 1348 1349 if self._match(TokenType.STRING, advance=False): 1350 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1351 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1352 expression = self._parse_string() 1353 extend_props(self._parse_properties()) 1354 else: 1355 expression = self._parse_statement() 1356 1357 end = self._match_text_seq("END") 1358 1359 if return_: 1360 expression = self.expression(exp.Return, this=expression) 1361 elif create_token.token_type == TokenType.INDEX: 1362 this = self._parse_index(index=self._parse_id_var()) 1363 elif create_token.token_type in self.DB_CREATABLES: 1364 table_parts = self._parse_table_parts(schema=True) 1365 1366 # exp.Properties.Location.POST_NAME 1367 self._match(TokenType.COMMA) 1368 extend_props(self._parse_properties(before=True)) 1369 1370 this = self._parse_schema(this=table_parts) 1371 1372 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1373 extend_props(self._parse_properties()) 1374 1375 self._match(TokenType.ALIAS) 1376 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1377 # exp.Properties.Location.POST_ALIAS 1378 extend_props(self._parse_properties()) 1379 1380 expression = self._parse_ddl_select() 1381 1382 if create_token.token_type == TokenType.TABLE: 1383 # exp.Properties.Location.POST_EXPRESSION 1384 extend_props(self._parse_properties()) 1385 1386 indexes = [] 1387 while True: 1388 index = self._parse_index() 1389 1390 # exp.Properties.Location.POST_INDEX 1391 extend_props(self._parse_properties()) 1392 1393 if not index: 1394 break 1395 else: 1396 self._match(TokenType.COMMA) 1397 indexes.append(index) 1398 elif create_token.token_type == TokenType.VIEW: 1399 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1400 no_schema_binding = True 1401 1402 shallow = self._match_text_seq("SHALLOW") 1403 1404 if self._match_texts(self.CLONE_KEYWORDS): 1405 copy = self._prev.text.lower() == "copy" 1406 clone = self._parse_table(schema=True) 1407 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1408 clone_kind = ( 1409 self._match(TokenType.L_PAREN) 1410 and self._match_texts(self.CLONE_KINDS) 1411 and self._prev.text.upper() 1412 ) 1413 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1414 self._match(TokenType.R_PAREN) 1415 clone = self.expression( 1416 exp.Clone, 1417 this=clone, 1418 when=when, 1419 kind=clone_kind, 1420 shallow=shallow, 1421 expression=clone_expression, 1422 copy=copy, 1423 ) 1424 1425 return self.expression( 1426 exp.Create, 1427 comments=comments, 1428 this=this, 1429 kind=create_token.text, 1430 replace=replace, 1431 unique=unique, 1432 expression=expression, 1433 exists=exists, 1434 properties=properties, 1435 indexes=indexes, 1436 no_schema_binding=no_schema_binding, 1437 begin=begin, 1438 end=end, 1439 clone=clone, 1440 ) 1441 1442 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1443 # only used for teradata currently 1444 self._match(TokenType.COMMA) 1445 1446 kwargs = { 1447 "no": self._match_text_seq("NO"), 1448 "dual": self._match_text_seq("DUAL"), 1449 "before": self._match_text_seq("BEFORE"), 1450 "default": self._match_text_seq("DEFAULT"), 1451 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1452 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1453 "after": self._match_text_seq("AFTER"), 1454 "minimum": self._match_texts(("MIN", "MINIMUM")), 1455 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1456 } 1457 1458 if self._match_texts(self.PROPERTY_PARSERS): 1459 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1460 try: 1461 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1462 except TypeError: 1463 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1464 1465 return None 1466 1467 def _parse_property(self) -> t.Optional[exp.Expression]: 1468 if self._match_texts(self.PROPERTY_PARSERS): 1469 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1470 1471 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1472 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1473 1474 if self._match_text_seq("COMPOUND", "SORTKEY"): 1475 return self._parse_sortkey(compound=True) 1476 1477 if self._match_text_seq("SQL", "SECURITY"): 1478 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1479 1480 index = self._index 1481 key = self._parse_column() 1482 1483 if not self._match(TokenType.EQ): 1484 self._retreat(index) 1485 return None 1486 1487 return self.expression( 1488 exp.Property, 1489 this=key.to_dot() if isinstance(key, exp.Column) else key, 1490 value=self._parse_column() or self._parse_var(any_token=True), 1491 ) 1492 1493 def _parse_stored(self) -> exp.FileFormatProperty: 1494 self._match(TokenType.ALIAS) 1495 1496 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1497 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1498 1499 return self.expression( 1500 exp.FileFormatProperty, 1501 this=self.expression( 1502 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1503 ) 1504 if input_format or output_format 1505 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1506 ) 1507 1508 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1509 self._match(TokenType.EQ) 1510 self._match(TokenType.ALIAS) 1511 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1512 1513 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1514 properties = [] 1515 while True: 1516 if before: 1517 prop = self._parse_property_before() 1518 else: 1519 prop = self._parse_property() 1520 1521 if not prop: 1522 break 1523 for p in ensure_list(prop): 1524 properties.append(p) 1525 1526 if properties: 1527 return self.expression(exp.Properties, expressions=properties) 1528 1529 return None 1530 1531 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1532 return self.expression( 1533 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1534 ) 1535 1536 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1537 if self._index >= 2: 1538 pre_volatile_token = self._tokens[self._index - 2] 1539 else: 1540 pre_volatile_token = None 1541 1542 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1543 return exp.VolatileProperty() 1544 1545 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1546 1547 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1548 self._match_pair(TokenType.EQ, TokenType.ON) 1549 1550 prop = self.expression(exp.WithSystemVersioningProperty) 1551 if self._match(TokenType.L_PAREN): 1552 self._match_text_seq("HISTORY_TABLE", "=") 1553 prop.set("this", self._parse_table_parts()) 1554 1555 if self._match(TokenType.COMMA): 1556 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1557 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1558 1559 self._match_r_paren() 1560 1561 return prop 1562 1563 def _parse_with_property( 1564 self, 1565 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1566 if self._match(TokenType.L_PAREN, advance=False): 1567 return self._parse_wrapped_csv(self._parse_property) 1568 1569 if self._match_text_seq("JOURNAL"): 1570 return self._parse_withjournaltable() 1571 1572 if self._match_text_seq("DATA"): 1573 return self._parse_withdata(no=False) 1574 elif self._match_text_seq("NO", "DATA"): 1575 return self._parse_withdata(no=True) 1576 1577 if not self._next: 1578 return None 1579 1580 return self._parse_withisolatedloading() 1581 1582 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1583 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1584 self._match(TokenType.EQ) 1585 1586 user = self._parse_id_var() 1587 self._match(TokenType.PARAMETER) 1588 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1589 1590 if not user or not host: 1591 return None 1592 1593 return exp.DefinerProperty(this=f"{user}@{host}") 1594 1595 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1596 self._match(TokenType.TABLE) 1597 self._match(TokenType.EQ) 1598 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1599 1600 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1601 return self.expression(exp.LogProperty, no=no) 1602 1603 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1604 return self.expression(exp.JournalProperty, **kwargs) 1605 1606 def _parse_checksum(self) -> exp.ChecksumProperty: 1607 self._match(TokenType.EQ) 1608 1609 on = None 1610 if self._match(TokenType.ON): 1611 on = True 1612 elif self._match_text_seq("OFF"): 1613 on = False 1614 1615 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1616 1617 def _parse_cluster(self) -> exp.Cluster: 1618 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1619 1620 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1621 self._match_text_seq("BY") 1622 1623 self._match_l_paren() 1624 expressions = self._parse_csv(self._parse_column) 1625 self._match_r_paren() 1626 1627 if self._match_text_seq("SORTED", "BY"): 1628 self._match_l_paren() 1629 sorted_by = self._parse_csv(self._parse_ordered) 1630 self._match_r_paren() 1631 else: 1632 sorted_by = None 1633 1634 self._match(TokenType.INTO) 1635 buckets = self._parse_number() 1636 self._match_text_seq("BUCKETS") 1637 1638 return self.expression( 1639 exp.ClusteredByProperty, 1640 expressions=expressions, 1641 sorted_by=sorted_by, 1642 buckets=buckets, 1643 ) 1644 1645 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1646 if not self._match_text_seq("GRANTS"): 1647 self._retreat(self._index - 1) 1648 return None 1649 1650 return self.expression(exp.CopyGrantsProperty) 1651 1652 def _parse_freespace(self) -> exp.FreespaceProperty: 1653 self._match(TokenType.EQ) 1654 return self.expression( 1655 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1656 ) 1657 1658 def _parse_mergeblockratio( 1659 self, no: bool = False, default: bool = False 1660 ) -> exp.MergeBlockRatioProperty: 1661 if self._match(TokenType.EQ): 1662 return self.expression( 1663 exp.MergeBlockRatioProperty, 1664 this=self._parse_number(), 1665 percent=self._match(TokenType.PERCENT), 1666 ) 1667 1668 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1669 1670 def _parse_datablocksize( 1671 self, 1672 default: t.Optional[bool] = None, 1673 minimum: t.Optional[bool] = None, 1674 maximum: t.Optional[bool] = None, 1675 ) -> exp.DataBlocksizeProperty: 1676 self._match(TokenType.EQ) 1677 size = self._parse_number() 1678 1679 units = None 1680 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1681 units = self._prev.text 1682 1683 return self.expression( 1684 exp.DataBlocksizeProperty, 1685 size=size, 1686 units=units, 1687 default=default, 1688 minimum=minimum, 1689 maximum=maximum, 1690 ) 1691 1692 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1693 self._match(TokenType.EQ) 1694 always = self._match_text_seq("ALWAYS") 1695 manual = self._match_text_seq("MANUAL") 1696 never = self._match_text_seq("NEVER") 1697 default = self._match_text_seq("DEFAULT") 1698 1699 autotemp = None 1700 if self._match_text_seq("AUTOTEMP"): 1701 autotemp = self._parse_schema() 1702 1703 return self.expression( 1704 exp.BlockCompressionProperty, 1705 always=always, 1706 manual=manual, 1707 never=never, 1708 default=default, 1709 autotemp=autotemp, 1710 ) 1711 1712 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1713 no = self._match_text_seq("NO") 1714 concurrent = self._match_text_seq("CONCURRENT") 1715 self._match_text_seq("ISOLATED", "LOADING") 1716 for_all = self._match_text_seq("FOR", "ALL") 1717 for_insert = self._match_text_seq("FOR", "INSERT") 1718 for_none = self._match_text_seq("FOR", "NONE") 1719 return self.expression( 1720 exp.IsolatedLoadingProperty, 1721 no=no, 1722 concurrent=concurrent, 1723 for_all=for_all, 1724 for_insert=for_insert, 1725 for_none=for_none, 1726 ) 1727 1728 def _parse_locking(self) -> exp.LockingProperty: 1729 if self._match(TokenType.TABLE): 1730 kind = "TABLE" 1731 elif self._match(TokenType.VIEW): 1732 kind = "VIEW" 1733 elif self._match(TokenType.ROW): 1734 kind = "ROW" 1735 elif self._match_text_seq("DATABASE"): 1736 kind = "DATABASE" 1737 else: 1738 kind = None 1739 1740 if kind in ("DATABASE", "TABLE", "VIEW"): 1741 this = self._parse_table_parts() 1742 else: 1743 this = None 1744 1745 if self._match(TokenType.FOR): 1746 for_or_in = "FOR" 1747 elif self._match(TokenType.IN): 1748 for_or_in = "IN" 1749 else: 1750 for_or_in = None 1751 1752 if self._match_text_seq("ACCESS"): 1753 lock_type = "ACCESS" 1754 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1755 lock_type = "EXCLUSIVE" 1756 elif self._match_text_seq("SHARE"): 1757 lock_type = "SHARE" 1758 elif self._match_text_seq("READ"): 1759 lock_type = "READ" 1760 elif self._match_text_seq("WRITE"): 1761 lock_type = "WRITE" 1762 elif self._match_text_seq("CHECKSUM"): 1763 lock_type = "CHECKSUM" 1764 else: 1765 lock_type = None 1766 1767 override = self._match_text_seq("OVERRIDE") 1768 1769 return self.expression( 1770 exp.LockingProperty, 1771 this=this, 1772 kind=kind, 1773 for_or_in=for_or_in, 1774 lock_type=lock_type, 1775 override=override, 1776 ) 1777 1778 def _parse_partition_by(self) -> t.List[exp.Expression]: 1779 if self._match(TokenType.PARTITION_BY): 1780 return self._parse_csv(self._parse_conjunction) 1781 return [] 1782 1783 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1784 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1785 if self._match_text_seq("MINVALUE"): 1786 return exp.var("MINVALUE") 1787 if self._match_text_seq("MAXVALUE"): 1788 return exp.var("MAXVALUE") 1789 return self._parse_bitwise() 1790 1791 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1792 expression = None 1793 from_expressions = None 1794 to_expressions = None 1795 1796 if self._match(TokenType.IN): 1797 this = self._parse_wrapped_csv(self._parse_bitwise) 1798 elif self._match(TokenType.FROM): 1799 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1800 self._match_text_seq("TO") 1801 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1802 elif self._match_text_seq("WITH", "(", "MODULUS"): 1803 this = self._parse_number() 1804 self._match_text_seq(",", "REMAINDER") 1805 expression = self._parse_number() 1806 self._match_r_paren() 1807 else: 1808 self.raise_error("Failed to parse partition bound spec.") 1809 1810 return self.expression( 1811 exp.PartitionBoundSpec, 1812 this=this, 1813 expression=expression, 1814 from_expressions=from_expressions, 1815 to_expressions=to_expressions, 1816 ) 1817 1818 # https://www.postgresql.org/docs/current/sql-createtable.html 1819 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1820 if not self._match_text_seq("OF"): 1821 self._retreat(self._index - 1) 1822 return None 1823 1824 this = self._parse_table(schema=True) 1825 1826 if self._match(TokenType.DEFAULT): 1827 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1828 elif self._match_text_seq("FOR", "VALUES"): 1829 expression = self._parse_partition_bound_spec() 1830 else: 1831 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1832 1833 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1834 1835 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1836 self._match(TokenType.EQ) 1837 return self.expression( 1838 exp.PartitionedByProperty, 1839 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1840 ) 1841 1842 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1843 if self._match_text_seq("AND", "STATISTICS"): 1844 statistics = True 1845 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1846 statistics = False 1847 else: 1848 statistics = None 1849 1850 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1851 1852 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1853 if self._match_text_seq("PRIMARY", "INDEX"): 1854 return exp.NoPrimaryIndexProperty() 1855 return None 1856 1857 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1858 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1859 return exp.OnCommitProperty() 1860 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1861 return exp.OnCommitProperty(delete=True) 1862 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1863 1864 def _parse_distkey(self) -> exp.DistKeyProperty: 1865 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1866 1867 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1868 table = self._parse_table(schema=True) 1869 1870 options = [] 1871 while self._match_texts(("INCLUDING", "EXCLUDING")): 1872 this = self._prev.text.upper() 1873 1874 id_var = self._parse_id_var() 1875 if not id_var: 1876 return None 1877 1878 options.append( 1879 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1880 ) 1881 1882 return self.expression(exp.LikeProperty, this=table, expressions=options) 1883 1884 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1885 return self.expression( 1886 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1887 ) 1888 1889 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1890 self._match(TokenType.EQ) 1891 return self.expression( 1892 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1893 ) 1894 1895 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1896 self._match_text_seq("WITH", "CONNECTION") 1897 return self.expression( 1898 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1899 ) 1900 1901 def _parse_returns(self) -> exp.ReturnsProperty: 1902 value: t.Optional[exp.Expression] 1903 is_table = self._match(TokenType.TABLE) 1904 1905 if is_table: 1906 if self._match(TokenType.LT): 1907 value = self.expression( 1908 exp.Schema, 1909 this="TABLE", 1910 expressions=self._parse_csv(self._parse_struct_types), 1911 ) 1912 if not self._match(TokenType.GT): 1913 self.raise_error("Expecting >") 1914 else: 1915 value = self._parse_schema(exp.var("TABLE")) 1916 else: 1917 value = self._parse_types() 1918 1919 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1920 1921 def _parse_describe(self) -> exp.Describe: 1922 kind = self._match_set(self.CREATABLES) and self._prev.text 1923 this = self._parse_table(schema=True) 1924 properties = self._parse_properties() 1925 expressions = properties.expressions if properties else None 1926 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1927 1928 def _parse_insert(self) -> exp.Insert: 1929 comments = ensure_list(self._prev_comments) 1930 overwrite = self._match(TokenType.OVERWRITE) 1931 ignore = self._match(TokenType.IGNORE) 1932 local = self._match_text_seq("LOCAL") 1933 alternative = None 1934 1935 if self._match_text_seq("DIRECTORY"): 1936 this: t.Optional[exp.Expression] = self.expression( 1937 exp.Directory, 1938 this=self._parse_var_or_string(), 1939 local=local, 1940 row_format=self._parse_row_format(match_row=True), 1941 ) 1942 else: 1943 if self._match(TokenType.OR): 1944 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1945 1946 self._match(TokenType.INTO) 1947 comments += ensure_list(self._prev_comments) 1948 self._match(TokenType.TABLE) 1949 this = self._parse_table(schema=True) 1950 1951 returning = self._parse_returning() 1952 1953 return self.expression( 1954 exp.Insert, 1955 comments=comments, 1956 this=this, 1957 by_name=self._match_text_seq("BY", "NAME"), 1958 exists=self._parse_exists(), 1959 partition=self._parse_partition(), 1960 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1961 and self._parse_conjunction(), 1962 expression=self._parse_ddl_select(), 1963 conflict=self._parse_on_conflict(), 1964 returning=returning or self._parse_returning(), 1965 overwrite=overwrite, 1966 alternative=alternative, 1967 ignore=ignore, 1968 ) 1969 1970 def _parse_kill(self) -> exp.Kill: 1971 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1972 1973 return self.expression( 1974 exp.Kill, 1975 this=self._parse_primary(), 1976 kind=kind, 1977 ) 1978 1979 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1980 conflict = self._match_text_seq("ON", "CONFLICT") 1981 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1982 1983 if not conflict and not duplicate: 1984 return None 1985 1986 nothing = None 1987 expressions = None 1988 key = None 1989 constraint = None 1990 1991 if conflict: 1992 if self._match_text_seq("ON", "CONSTRAINT"): 1993 constraint = self._parse_id_var() 1994 else: 1995 key = self._parse_csv(self._parse_value) 1996 1997 self._match_text_seq("DO") 1998 if self._match_text_seq("NOTHING"): 1999 nothing = True 2000 else: 2001 self._match(TokenType.UPDATE) 2002 self._match(TokenType.SET) 2003 expressions = self._parse_csv(self._parse_equality) 2004 2005 return self.expression( 2006 exp.OnConflict, 2007 duplicate=duplicate, 2008 expressions=expressions, 2009 nothing=nothing, 2010 key=key, 2011 constraint=constraint, 2012 ) 2013 2014 def _parse_returning(self) -> t.Optional[exp.Returning]: 2015 if not self._match(TokenType.RETURNING): 2016 return None 2017 return self.expression( 2018 exp.Returning, 2019 expressions=self._parse_csv(self._parse_expression), 2020 into=self._match(TokenType.INTO) and self._parse_table_part(), 2021 ) 2022 2023 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2024 if not self._match(TokenType.FORMAT): 2025 return None 2026 return self._parse_row_format() 2027 2028 def _parse_row_format( 2029 self, match_row: bool = False 2030 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2031 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2032 return None 2033 2034 if self._match_text_seq("SERDE"): 2035 this = self._parse_string() 2036 2037 serde_properties = None 2038 if self._match(TokenType.SERDE_PROPERTIES): 2039 serde_properties = self.expression( 2040 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2041 ) 2042 2043 return self.expression( 2044 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2045 ) 2046 2047 self._match_text_seq("DELIMITED") 2048 2049 kwargs = {} 2050 2051 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2052 kwargs["fields"] = self._parse_string() 2053 if self._match_text_seq("ESCAPED", "BY"): 2054 kwargs["escaped"] = self._parse_string() 2055 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2056 kwargs["collection_items"] = self._parse_string() 2057 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2058 kwargs["map_keys"] = self._parse_string() 2059 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2060 kwargs["lines"] = self._parse_string() 2061 if self._match_text_seq("NULL", "DEFINED", "AS"): 2062 kwargs["null"] = self._parse_string() 2063 2064 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2065 2066 def _parse_load(self) -> exp.LoadData | exp.Command: 2067 if self._match_text_seq("DATA"): 2068 local = self._match_text_seq("LOCAL") 2069 self._match_text_seq("INPATH") 2070 inpath = self._parse_string() 2071 overwrite = self._match(TokenType.OVERWRITE) 2072 self._match_pair(TokenType.INTO, TokenType.TABLE) 2073 2074 return self.expression( 2075 exp.LoadData, 2076 this=self._parse_table(schema=True), 2077 local=local, 2078 overwrite=overwrite, 2079 inpath=inpath, 2080 partition=self._parse_partition(), 2081 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2082 serde=self._match_text_seq("SERDE") and self._parse_string(), 2083 ) 2084 return self._parse_as_command(self._prev) 2085 2086 def _parse_delete(self) -> exp.Delete: 2087 # This handles MySQL's "Multiple-Table Syntax" 2088 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2089 tables = None 2090 comments = self._prev_comments 2091 if not self._match(TokenType.FROM, advance=False): 2092 tables = self._parse_csv(self._parse_table) or None 2093 2094 returning = self._parse_returning() 2095 2096 return self.expression( 2097 exp.Delete, 2098 comments=comments, 2099 tables=tables, 2100 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2101 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2102 where=self._parse_where(), 2103 returning=returning or self._parse_returning(), 2104 limit=self._parse_limit(), 2105 ) 2106 2107 def _parse_update(self) -> exp.Update: 2108 comments = self._prev_comments 2109 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2110 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2111 returning = self._parse_returning() 2112 return self.expression( 2113 exp.Update, 2114 comments=comments, 2115 **{ # type: ignore 2116 "this": this, 2117 "expressions": expressions, 2118 "from": self._parse_from(joins=True), 2119 "where": self._parse_where(), 2120 "returning": returning or self._parse_returning(), 2121 "order": self._parse_order(), 2122 "limit": self._parse_limit(), 2123 }, 2124 ) 2125 2126 def _parse_uncache(self) -> exp.Uncache: 2127 if not self._match(TokenType.TABLE): 2128 self.raise_error("Expecting TABLE after UNCACHE") 2129 2130 return self.expression( 2131 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2132 ) 2133 2134 def _parse_cache(self) -> exp.Cache: 2135 lazy = self._match_text_seq("LAZY") 2136 self._match(TokenType.TABLE) 2137 table = self._parse_table(schema=True) 2138 2139 options = [] 2140 if self._match_text_seq("OPTIONS"): 2141 self._match_l_paren() 2142 k = self._parse_string() 2143 self._match(TokenType.EQ) 2144 v = self._parse_string() 2145 options = [k, v] 2146 self._match_r_paren() 2147 2148 self._match(TokenType.ALIAS) 2149 return self.expression( 2150 exp.Cache, 2151 this=table, 2152 lazy=lazy, 2153 options=options, 2154 expression=self._parse_select(nested=True), 2155 ) 2156 2157 def _parse_partition(self) -> t.Optional[exp.Partition]: 2158 if not self._match(TokenType.PARTITION): 2159 return None 2160 2161 return self.expression( 2162 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2163 ) 2164 2165 def _parse_value(self) -> exp.Tuple: 2166 if self._match(TokenType.L_PAREN): 2167 expressions = self._parse_csv(self._parse_conjunction) 2168 self._match_r_paren() 2169 return self.expression(exp.Tuple, expressions=expressions) 2170 2171 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2172 # https://prestodb.io/docs/current/sql/values.html 2173 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2174 2175 def _parse_projections(self) -> t.List[exp.Expression]: 2176 return self._parse_expressions() 2177 2178 def _parse_select( 2179 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2180 ) -> t.Optional[exp.Expression]: 2181 cte = self._parse_with() 2182 2183 if cte: 2184 this = self._parse_statement() 2185 2186 if not this: 2187 self.raise_error("Failed to parse any statement following CTE") 2188 return cte 2189 2190 if "with" in this.arg_types: 2191 this.set("with", cte) 2192 else: 2193 self.raise_error(f"{this.key} does not support CTE") 2194 this = cte 2195 2196 return this 2197 2198 # duckdb supports leading with FROM x 2199 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2200 2201 if self._match(TokenType.SELECT): 2202 comments = self._prev_comments 2203 2204 hint = self._parse_hint() 2205 all_ = self._match(TokenType.ALL) 2206 distinct = self._match_set(self.DISTINCT_TOKENS) 2207 2208 kind = ( 2209 self._match(TokenType.ALIAS) 2210 and self._match_texts(("STRUCT", "VALUE")) 2211 and self._prev.text 2212 ) 2213 2214 if distinct: 2215 distinct = self.expression( 2216 exp.Distinct, 2217 on=self._parse_value() if self._match(TokenType.ON) else None, 2218 ) 2219 2220 if all_ and distinct: 2221 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2222 2223 limit = self._parse_limit(top=True) 2224 projections = self._parse_projections() 2225 2226 this = self.expression( 2227 exp.Select, 2228 kind=kind, 2229 hint=hint, 2230 distinct=distinct, 2231 expressions=projections, 2232 limit=limit, 2233 ) 2234 this.comments = comments 2235 2236 into = self._parse_into() 2237 if into: 2238 this.set("into", into) 2239 2240 if not from_: 2241 from_ = self._parse_from() 2242 2243 if from_: 2244 this.set("from", from_) 2245 2246 this = self._parse_query_modifiers(this) 2247 elif (table or nested) and self._match(TokenType.L_PAREN): 2248 if self._match(TokenType.PIVOT): 2249 this = self._parse_simplified_pivot() 2250 elif self._match(TokenType.FROM): 2251 this = exp.select("*").from_( 2252 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2253 ) 2254 else: 2255 this = self._parse_table() if table else self._parse_select(nested=True) 2256 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2257 2258 self._match_r_paren() 2259 2260 # We return early here so that the UNION isn't attached to the subquery by the 2261 # following call to _parse_set_operations, but instead becomes the parent node 2262 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2263 elif self._match(TokenType.VALUES): 2264 this = self.expression( 2265 exp.Values, 2266 expressions=self._parse_csv(self._parse_value), 2267 alias=self._parse_table_alias(), 2268 ) 2269 elif from_: 2270 this = exp.select("*").from_(from_.this, copy=False) 2271 else: 2272 this = None 2273 2274 return self._parse_set_operations(this) 2275 2276 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2277 if not skip_with_token and not self._match(TokenType.WITH): 2278 return None 2279 2280 comments = self._prev_comments 2281 recursive = self._match(TokenType.RECURSIVE) 2282 2283 expressions = [] 2284 while True: 2285 expressions.append(self._parse_cte()) 2286 2287 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2288 break 2289 else: 2290 self._match(TokenType.WITH) 2291 2292 return self.expression( 2293 exp.With, comments=comments, expressions=expressions, recursive=recursive 2294 ) 2295 2296 def _parse_cte(self) -> exp.CTE: 2297 alias = self._parse_table_alias() 2298 if not alias or not alias.this: 2299 self.raise_error("Expected CTE to have alias") 2300 2301 self._match(TokenType.ALIAS) 2302 return self.expression( 2303 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2304 ) 2305 2306 def _parse_table_alias( 2307 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2308 ) -> t.Optional[exp.TableAlias]: 2309 any_token = self._match(TokenType.ALIAS) 2310 alias = ( 2311 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2312 or self._parse_string_as_identifier() 2313 ) 2314 2315 index = self._index 2316 if self._match(TokenType.L_PAREN): 2317 columns = self._parse_csv(self._parse_function_parameter) 2318 self._match_r_paren() if columns else self._retreat(index) 2319 else: 2320 columns = None 2321 2322 if not alias and not columns: 2323 return None 2324 2325 return self.expression(exp.TableAlias, this=alias, columns=columns) 2326 2327 def _parse_subquery( 2328 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2329 ) -> t.Optional[exp.Subquery]: 2330 if not this: 2331 return None 2332 2333 return self.expression( 2334 exp.Subquery, 2335 this=this, 2336 pivots=self._parse_pivots(), 2337 alias=self._parse_table_alias() if parse_alias else None, 2338 ) 2339 2340 def _parse_query_modifiers( 2341 self, this: t.Optional[exp.Expression] 2342 ) -> t.Optional[exp.Expression]: 2343 if isinstance(this, self.MODIFIABLES): 2344 for join in iter(self._parse_join, None): 2345 this.append("joins", join) 2346 for lateral in iter(self._parse_lateral, None): 2347 this.append("laterals", lateral) 2348 2349 while True: 2350 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2351 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2352 key, expression = parser(self) 2353 2354 if expression: 2355 this.set(key, expression) 2356 if key == "limit": 2357 offset = expression.args.pop("offset", None) 2358 if offset: 2359 this.set("offset", exp.Offset(expression=offset)) 2360 continue 2361 break 2362 return this 2363 2364 def _parse_hint(self) -> t.Optional[exp.Hint]: 2365 if self._match(TokenType.HINT): 2366 hints = [] 2367 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2368 hints.extend(hint) 2369 2370 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2371 self.raise_error("Expected */ after HINT") 2372 2373 return self.expression(exp.Hint, expressions=hints) 2374 2375 return None 2376 2377 def _parse_into(self) -> t.Optional[exp.Into]: 2378 if not self._match(TokenType.INTO): 2379 return None 2380 2381 temp = self._match(TokenType.TEMPORARY) 2382 unlogged = self._match_text_seq("UNLOGGED") 2383 self._match(TokenType.TABLE) 2384 2385 return self.expression( 2386 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2387 ) 2388 2389 def _parse_from( 2390 self, joins: bool = False, skip_from_token: bool = False 2391 ) -> t.Optional[exp.From]: 2392 if not skip_from_token and not self._match(TokenType.FROM): 2393 return None 2394 2395 return self.expression( 2396 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2397 ) 2398 2399 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2400 if not self._match(TokenType.MATCH_RECOGNIZE): 2401 return None 2402 2403 self._match_l_paren() 2404 2405 partition = self._parse_partition_by() 2406 order = self._parse_order() 2407 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2408 2409 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2410 rows = exp.var("ONE ROW PER MATCH") 2411 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2412 text = "ALL ROWS PER MATCH" 2413 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2414 text += f" SHOW EMPTY MATCHES" 2415 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2416 text += f" OMIT EMPTY MATCHES" 2417 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2418 text += f" WITH UNMATCHED ROWS" 2419 rows = exp.var(text) 2420 else: 2421 rows = None 2422 2423 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2424 text = "AFTER MATCH SKIP" 2425 if self._match_text_seq("PAST", "LAST", "ROW"): 2426 text += f" PAST LAST ROW" 2427 elif self._match_text_seq("TO", "NEXT", "ROW"): 2428 text += f" TO NEXT ROW" 2429 elif self._match_text_seq("TO", "FIRST"): 2430 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2431 elif self._match_text_seq("TO", "LAST"): 2432 text += f" TO LAST {self._advance_any().text}" # type: ignore 2433 after = exp.var(text) 2434 else: 2435 after = None 2436 2437 if self._match_text_seq("PATTERN"): 2438 self._match_l_paren() 2439 2440 if not self._curr: 2441 self.raise_error("Expecting )", self._curr) 2442 2443 paren = 1 2444 start = self._curr 2445 2446 while self._curr and paren > 0: 2447 if self._curr.token_type == TokenType.L_PAREN: 2448 paren += 1 2449 if self._curr.token_type == TokenType.R_PAREN: 2450 paren -= 1 2451 2452 end = self._prev 2453 self._advance() 2454 2455 if paren > 0: 2456 self.raise_error("Expecting )", self._curr) 2457 2458 pattern = exp.var(self._find_sql(start, end)) 2459 else: 2460 pattern = None 2461 2462 define = ( 2463 self._parse_csv( 2464 lambda: self.expression( 2465 exp.Alias, 2466 alias=self._parse_id_var(any_token=True), 2467 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2468 ) 2469 ) 2470 if self._match_text_seq("DEFINE") 2471 else None 2472 ) 2473 2474 self._match_r_paren() 2475 2476 return self.expression( 2477 exp.MatchRecognize, 2478 partition_by=partition, 2479 order=order, 2480 measures=measures, 2481 rows=rows, 2482 after=after, 2483 pattern=pattern, 2484 define=define, 2485 alias=self._parse_table_alias(), 2486 ) 2487 2488 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2489 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2490 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2491 2492 if outer_apply or cross_apply: 2493 this = self._parse_select(table=True) 2494 view = None 2495 outer = not cross_apply 2496 elif self._match(TokenType.LATERAL): 2497 this = self._parse_select(table=True) 2498 view = self._match(TokenType.VIEW) 2499 outer = self._match(TokenType.OUTER) 2500 else: 2501 return None 2502 2503 if not this: 2504 this = ( 2505 self._parse_unnest() 2506 or self._parse_function() 2507 or self._parse_id_var(any_token=False) 2508 ) 2509 2510 while self._match(TokenType.DOT): 2511 this = exp.Dot( 2512 this=this, 2513 expression=self._parse_function() or self._parse_id_var(any_token=False), 2514 ) 2515 2516 if view: 2517 table = self._parse_id_var(any_token=False) 2518 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2519 table_alias: t.Optional[exp.TableAlias] = self.expression( 2520 exp.TableAlias, this=table, columns=columns 2521 ) 2522 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2523 # We move the alias from the lateral's child node to the lateral itself 2524 table_alias = this.args["alias"].pop() 2525 else: 2526 table_alias = self._parse_table_alias() 2527 2528 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2529 2530 def _parse_join_parts( 2531 self, 2532 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2533 return ( 2534 self._match_set(self.JOIN_METHODS) and self._prev, 2535 self._match_set(self.JOIN_SIDES) and self._prev, 2536 self._match_set(self.JOIN_KINDS) and self._prev, 2537 ) 2538 2539 def _parse_join( 2540 self, skip_join_token: bool = False, parse_bracket: bool = False 2541 ) -> t.Optional[exp.Join]: 2542 if self._match(TokenType.COMMA): 2543 return self.expression(exp.Join, this=self._parse_table()) 2544 2545 index = self._index 2546 method, side, kind = self._parse_join_parts() 2547 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2548 join = self._match(TokenType.JOIN) 2549 2550 if not skip_join_token and not join: 2551 self._retreat(index) 2552 kind = None 2553 method = None 2554 side = None 2555 2556 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2557 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2558 2559 if not skip_join_token and not join and not outer_apply and not cross_apply: 2560 return None 2561 2562 if outer_apply: 2563 side = Token(TokenType.LEFT, "LEFT") 2564 2565 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2566 2567 if method: 2568 kwargs["method"] = method.text 2569 if side: 2570 kwargs["side"] = side.text 2571 if kind: 2572 kwargs["kind"] = kind.text 2573 if hint: 2574 kwargs["hint"] = hint 2575 2576 if self._match(TokenType.ON): 2577 kwargs["on"] = self._parse_conjunction() 2578 elif self._match(TokenType.USING): 2579 kwargs["using"] = self._parse_wrapped_id_vars() 2580 elif not (kind and kind.token_type == TokenType.CROSS): 2581 index = self._index 2582 join = self._parse_join() 2583 2584 if join and self._match(TokenType.ON): 2585 kwargs["on"] = self._parse_conjunction() 2586 elif join and self._match(TokenType.USING): 2587 kwargs["using"] = self._parse_wrapped_id_vars() 2588 else: 2589 join = None 2590 self._retreat(index) 2591 2592 kwargs["this"].set("joins", [join] if join else None) 2593 2594 comments = [c for token in (method, side, kind) if token for c in token.comments] 2595 return self.expression(exp.Join, comments=comments, **kwargs) 2596 2597 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2598 this = self._parse_conjunction() 2599 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2600 return this 2601 2602 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2603 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2604 2605 return this 2606 2607 def _parse_index( 2608 self, 2609 index: t.Optional[exp.Expression] = None, 2610 ) -> t.Optional[exp.Index]: 2611 if index: 2612 unique = None 2613 primary = None 2614 amp = None 2615 2616 self._match(TokenType.ON) 2617 self._match(TokenType.TABLE) # hive 2618 table = self._parse_table_parts(schema=True) 2619 else: 2620 unique = self._match(TokenType.UNIQUE) 2621 primary = self._match_text_seq("PRIMARY") 2622 amp = self._match_text_seq("AMP") 2623 2624 if not self._match(TokenType.INDEX): 2625 return None 2626 2627 index = self._parse_id_var() 2628 table = None 2629 2630 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2631 2632 if self._match(TokenType.L_PAREN, advance=False): 2633 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2634 else: 2635 columns = None 2636 2637 return self.expression( 2638 exp.Index, 2639 this=index, 2640 table=table, 2641 using=using, 2642 columns=columns, 2643 unique=unique, 2644 primary=primary, 2645 amp=amp, 2646 partition_by=self._parse_partition_by(), 2647 where=self._parse_where(), 2648 ) 2649 2650 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2651 hints: t.List[exp.Expression] = [] 2652 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2653 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2654 hints.append( 2655 self.expression( 2656 exp.WithTableHint, 2657 expressions=self._parse_csv( 2658 lambda: self._parse_function() or self._parse_var(any_token=True) 2659 ), 2660 ) 2661 ) 2662 self._match_r_paren() 2663 else: 2664 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2665 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2666 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2667 2668 self._match_texts(("INDEX", "KEY")) 2669 if self._match(TokenType.FOR): 2670 hint.set("target", self._advance_any() and self._prev.text.upper()) 2671 2672 hint.set("expressions", self._parse_wrapped_id_vars()) 2673 hints.append(hint) 2674 2675 return hints or None 2676 2677 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2678 return ( 2679 (not schema and self._parse_function(optional_parens=False)) 2680 or self._parse_id_var(any_token=False) 2681 or self._parse_string_as_identifier() 2682 or self._parse_placeholder() 2683 ) 2684 2685 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2686 catalog = None 2687 db = None 2688 table = self._parse_table_part(schema=schema) 2689 2690 while self._match(TokenType.DOT): 2691 if catalog: 2692 # This allows nesting the table in arbitrarily many dot expressions if needed 2693 table = self.expression( 2694 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2695 ) 2696 else: 2697 catalog = db 2698 db = table 2699 table = self._parse_table_part(schema=schema) 2700 2701 if not table: 2702 self.raise_error(f"Expected table name but got {self._curr}") 2703 2704 return self.expression( 2705 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2706 ) 2707 2708 def _parse_table( 2709 self, 2710 schema: bool = False, 2711 joins: bool = False, 2712 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2713 parse_bracket: bool = False, 2714 ) -> t.Optional[exp.Expression]: 2715 lateral = self._parse_lateral() 2716 if lateral: 2717 return lateral 2718 2719 unnest = self._parse_unnest() 2720 if unnest: 2721 return unnest 2722 2723 values = self._parse_derived_table_values() 2724 if values: 2725 return values 2726 2727 subquery = self._parse_select(table=True) 2728 if subquery: 2729 if not subquery.args.get("pivots"): 2730 subquery.set("pivots", self._parse_pivots()) 2731 return subquery 2732 2733 bracket = parse_bracket and self._parse_bracket(None) 2734 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2735 this = t.cast( 2736 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2737 ) 2738 2739 if schema: 2740 return self._parse_schema(this=this) 2741 2742 version = self._parse_version() 2743 2744 if version: 2745 this.set("version", version) 2746 2747 if self.ALIAS_POST_TABLESAMPLE: 2748 table_sample = self._parse_table_sample() 2749 2750 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2751 if alias: 2752 this.set("alias", alias) 2753 2754 if self._match_text_seq("AT"): 2755 this.set("index", self._parse_id_var()) 2756 2757 this.set("hints", self._parse_table_hints()) 2758 2759 if not this.args.get("pivots"): 2760 this.set("pivots", self._parse_pivots()) 2761 2762 if not self.ALIAS_POST_TABLESAMPLE: 2763 table_sample = self._parse_table_sample() 2764 2765 if table_sample: 2766 table_sample.set("this", this) 2767 this = table_sample 2768 2769 if joins: 2770 for join in iter(self._parse_join, None): 2771 this.append("joins", join) 2772 2773 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2774 this.set("ordinality", True) 2775 this.set("alias", self._parse_table_alias()) 2776 2777 return this 2778 2779 def _parse_version(self) -> t.Optional[exp.Version]: 2780 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2781 this = "TIMESTAMP" 2782 elif self._match(TokenType.VERSION_SNAPSHOT): 2783 this = "VERSION" 2784 else: 2785 return None 2786 2787 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2788 kind = self._prev.text.upper() 2789 start = self._parse_bitwise() 2790 self._match_texts(("TO", "AND")) 2791 end = self._parse_bitwise() 2792 expression: t.Optional[exp.Expression] = self.expression( 2793 exp.Tuple, expressions=[start, end] 2794 ) 2795 elif self._match_text_seq("CONTAINED", "IN"): 2796 kind = "CONTAINED IN" 2797 expression = self.expression( 2798 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2799 ) 2800 elif self._match(TokenType.ALL): 2801 kind = "ALL" 2802 expression = None 2803 else: 2804 self._match_text_seq("AS", "OF") 2805 kind = "AS OF" 2806 expression = self._parse_type() 2807 2808 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2809 2810 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2811 if not self._match(TokenType.UNNEST): 2812 return None 2813 2814 expressions = self._parse_wrapped_csv(self._parse_equality) 2815 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2816 2817 alias = self._parse_table_alias() if with_alias else None 2818 2819 if alias: 2820 if self.UNNEST_COLUMN_ONLY: 2821 if alias.args.get("columns"): 2822 self.raise_error("Unexpected extra column alias in unnest.") 2823 2824 alias.set("columns", [alias.this]) 2825 alias.set("this", None) 2826 2827 columns = alias.args.get("columns") or [] 2828 if offset and len(expressions) < len(columns): 2829 offset = columns.pop() 2830 2831 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2832 self._match(TokenType.ALIAS) 2833 offset = self._parse_id_var( 2834 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2835 ) or exp.to_identifier("offset") 2836 2837 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2838 2839 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2840 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2841 if not is_derived and not self._match(TokenType.VALUES): 2842 return None 2843 2844 expressions = self._parse_csv(self._parse_value) 2845 alias = self._parse_table_alias() 2846 2847 if is_derived: 2848 self._match_r_paren() 2849 2850 return self.expression( 2851 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2852 ) 2853 2854 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2855 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2856 as_modifier and self._match_text_seq("USING", "SAMPLE") 2857 ): 2858 return None 2859 2860 bucket_numerator = None 2861 bucket_denominator = None 2862 bucket_field = None 2863 percent = None 2864 rows = None 2865 size = None 2866 seed = None 2867 2868 kind = ( 2869 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2870 ) 2871 method = self._parse_var(tokens=(TokenType.ROW,)) 2872 2873 matched_l_paren = self._match(TokenType.L_PAREN) 2874 2875 if self.TABLESAMPLE_CSV: 2876 num = None 2877 expressions = self._parse_csv(self._parse_primary) 2878 else: 2879 expressions = None 2880 num = ( 2881 self._parse_factor() 2882 if self._match(TokenType.NUMBER, advance=False) 2883 else self._parse_primary() 2884 ) 2885 2886 if self._match_text_seq("BUCKET"): 2887 bucket_numerator = self._parse_number() 2888 self._match_text_seq("OUT", "OF") 2889 bucket_denominator = bucket_denominator = self._parse_number() 2890 self._match(TokenType.ON) 2891 bucket_field = self._parse_field() 2892 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2893 percent = num 2894 elif self._match(TokenType.ROWS): 2895 rows = num 2896 elif num: 2897 size = num 2898 2899 if matched_l_paren: 2900 self._match_r_paren() 2901 2902 if self._match(TokenType.L_PAREN): 2903 method = self._parse_var() 2904 seed = self._match(TokenType.COMMA) and self._parse_number() 2905 self._match_r_paren() 2906 elif self._match_texts(("SEED", "REPEATABLE")): 2907 seed = self._parse_wrapped(self._parse_number) 2908 2909 return self.expression( 2910 exp.TableSample, 2911 expressions=expressions, 2912 method=method, 2913 bucket_numerator=bucket_numerator, 2914 bucket_denominator=bucket_denominator, 2915 bucket_field=bucket_field, 2916 percent=percent, 2917 rows=rows, 2918 size=size, 2919 seed=seed, 2920 kind=kind, 2921 ) 2922 2923 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2924 return list(iter(self._parse_pivot, None)) or None 2925 2926 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2927 return list(iter(self._parse_join, None)) or None 2928 2929 # https://duckdb.org/docs/sql/statements/pivot 2930 def _parse_simplified_pivot(self) -> exp.Pivot: 2931 def _parse_on() -> t.Optional[exp.Expression]: 2932 this = self._parse_bitwise() 2933 return self._parse_in(this) if self._match(TokenType.IN) else this 2934 2935 this = self._parse_table() 2936 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2937 using = self._match(TokenType.USING) and self._parse_csv( 2938 lambda: self._parse_alias(self._parse_function()) 2939 ) 2940 group = self._parse_group() 2941 return self.expression( 2942 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2943 ) 2944 2945 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2946 index = self._index 2947 include_nulls = None 2948 2949 if self._match(TokenType.PIVOT): 2950 unpivot = False 2951 elif self._match(TokenType.UNPIVOT): 2952 unpivot = True 2953 2954 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2955 if self._match_text_seq("INCLUDE", "NULLS"): 2956 include_nulls = True 2957 elif self._match_text_seq("EXCLUDE", "NULLS"): 2958 include_nulls = False 2959 else: 2960 return None 2961 2962 expressions = [] 2963 field = None 2964 2965 if not self._match(TokenType.L_PAREN): 2966 self._retreat(index) 2967 return None 2968 2969 if unpivot: 2970 expressions = self._parse_csv(self._parse_column) 2971 else: 2972 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2973 2974 if not expressions: 2975 self.raise_error("Failed to parse PIVOT's aggregation list") 2976 2977 if not self._match(TokenType.FOR): 2978 self.raise_error("Expecting FOR") 2979 2980 value = self._parse_column() 2981 2982 if not self._match(TokenType.IN): 2983 self.raise_error("Expecting IN") 2984 2985 field = self._parse_in(value, alias=True) 2986 2987 self._match_r_paren() 2988 2989 pivot = self.expression( 2990 exp.Pivot, 2991 expressions=expressions, 2992 field=field, 2993 unpivot=unpivot, 2994 include_nulls=include_nulls, 2995 ) 2996 2997 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2998 pivot.set("alias", self._parse_table_alias()) 2999 3000 if not unpivot: 3001 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3002 3003 columns: t.List[exp.Expression] = [] 3004 for fld in pivot.args["field"].expressions: 3005 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3006 for name in names: 3007 if self.PREFIXED_PIVOT_COLUMNS: 3008 name = f"{name}_{field_name}" if name else field_name 3009 else: 3010 name = f"{field_name}_{name}" if name else field_name 3011 3012 columns.append(exp.to_identifier(name)) 3013 3014 pivot.set("columns", columns) 3015 3016 return pivot 3017 3018 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3019 return [agg.alias for agg in aggregations] 3020 3021 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3022 if not skip_where_token and not self._match(TokenType.WHERE): 3023 return None 3024 3025 return self.expression( 3026 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3027 ) 3028 3029 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3030 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3031 return None 3032 3033 elements = defaultdict(list) 3034 3035 if self._match(TokenType.ALL): 3036 return self.expression(exp.Group, all=True) 3037 3038 while True: 3039 expressions = self._parse_csv(self._parse_conjunction) 3040 if expressions: 3041 elements["expressions"].extend(expressions) 3042 3043 grouping_sets = self._parse_grouping_sets() 3044 if grouping_sets: 3045 elements["grouping_sets"].extend(grouping_sets) 3046 3047 rollup = None 3048 cube = None 3049 totals = None 3050 3051 index = self._index 3052 with_ = self._match(TokenType.WITH) 3053 if self._match(TokenType.ROLLUP): 3054 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3055 elements["rollup"].extend(ensure_list(rollup)) 3056 3057 if self._match(TokenType.CUBE): 3058 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3059 elements["cube"].extend(ensure_list(cube)) 3060 3061 if self._match_text_seq("TOTALS"): 3062 totals = True 3063 elements["totals"] = True # type: ignore 3064 3065 if not (grouping_sets or rollup or cube or totals): 3066 if with_: 3067 self._retreat(index) 3068 break 3069 3070 return self.expression(exp.Group, **elements) # type: ignore 3071 3072 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3073 if not self._match(TokenType.GROUPING_SETS): 3074 return None 3075 3076 return self._parse_wrapped_csv(self._parse_grouping_set) 3077 3078 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3079 if self._match(TokenType.L_PAREN): 3080 grouping_set = self._parse_csv(self._parse_column) 3081 self._match_r_paren() 3082 return self.expression(exp.Tuple, expressions=grouping_set) 3083 3084 return self._parse_column() 3085 3086 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3087 if not skip_having_token and not self._match(TokenType.HAVING): 3088 return None 3089 return self.expression(exp.Having, this=self._parse_conjunction()) 3090 3091 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3092 if not self._match(TokenType.QUALIFY): 3093 return None 3094 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3095 3096 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3097 if skip_start_token: 3098 start = None 3099 elif self._match(TokenType.START_WITH): 3100 start = self._parse_conjunction() 3101 else: 3102 return None 3103 3104 self._match(TokenType.CONNECT_BY) 3105 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3106 exp.Prior, this=self._parse_bitwise() 3107 ) 3108 connect = self._parse_conjunction() 3109 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3110 3111 if not start and self._match(TokenType.START_WITH): 3112 start = self._parse_conjunction() 3113 3114 return self.expression(exp.Connect, start=start, connect=connect) 3115 3116 def _parse_order( 3117 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3118 ) -> t.Optional[exp.Expression]: 3119 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3120 return this 3121 3122 return self.expression( 3123 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3124 ) 3125 3126 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3127 if not self._match(token): 3128 return None 3129 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3130 3131 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3132 this = parse_method() if parse_method else self._parse_conjunction() 3133 3134 asc = self._match(TokenType.ASC) 3135 desc = self._match(TokenType.DESC) or (asc and False) 3136 3137 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3138 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3139 3140 nulls_first = is_nulls_first or False 3141 explicitly_null_ordered = is_nulls_first or is_nulls_last 3142 3143 if ( 3144 not explicitly_null_ordered 3145 and ( 3146 (not desc and self.NULL_ORDERING == "nulls_are_small") 3147 or (desc and self.NULL_ORDERING != "nulls_are_small") 3148 ) 3149 and self.NULL_ORDERING != "nulls_are_last" 3150 ): 3151 nulls_first = True 3152 3153 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3154 3155 def _parse_limit( 3156 self, this: t.Optional[exp.Expression] = None, top: bool = False 3157 ) -> t.Optional[exp.Expression]: 3158 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3159 comments = self._prev_comments 3160 if top: 3161 limit_paren = self._match(TokenType.L_PAREN) 3162 expression = self._parse_number() 3163 3164 if limit_paren: 3165 self._match_r_paren() 3166 else: 3167 expression = self._parse_term() 3168 3169 if self._match(TokenType.COMMA): 3170 offset = expression 3171 expression = self._parse_term() 3172 else: 3173 offset = None 3174 3175 limit_exp = self.expression( 3176 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3177 ) 3178 3179 return limit_exp 3180 3181 if self._match(TokenType.FETCH): 3182 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3183 direction = self._prev.text if direction else "FIRST" 3184 3185 count = self._parse_field(tokens=self.FETCH_TOKENS) 3186 percent = self._match(TokenType.PERCENT) 3187 3188 self._match_set((TokenType.ROW, TokenType.ROWS)) 3189 3190 only = self._match_text_seq("ONLY") 3191 with_ties = self._match_text_seq("WITH", "TIES") 3192 3193 if only and with_ties: 3194 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3195 3196 return self.expression( 3197 exp.Fetch, 3198 direction=direction, 3199 count=count, 3200 percent=percent, 3201 with_ties=with_ties, 3202 ) 3203 3204 return this 3205 3206 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3207 if not self._match(TokenType.OFFSET): 3208 return this 3209 3210 count = self._parse_term() 3211 self._match_set((TokenType.ROW, TokenType.ROWS)) 3212 return self.expression(exp.Offset, this=this, expression=count) 3213 3214 def _parse_locks(self) -> t.List[exp.Lock]: 3215 locks = [] 3216 while True: 3217 if self._match_text_seq("FOR", "UPDATE"): 3218 update = True 3219 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3220 "LOCK", "IN", "SHARE", "MODE" 3221 ): 3222 update = False 3223 else: 3224 break 3225 3226 expressions = None 3227 if self._match_text_seq("OF"): 3228 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3229 3230 wait: t.Optional[bool | exp.Expression] = None 3231 if self._match_text_seq("NOWAIT"): 3232 wait = True 3233 elif self._match_text_seq("WAIT"): 3234 wait = self._parse_primary() 3235 elif self._match_text_seq("SKIP", "LOCKED"): 3236 wait = False 3237 3238 locks.append( 3239 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3240 ) 3241 3242 return locks 3243 3244 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3245 if not self._match_set(self.SET_OPERATIONS): 3246 return this 3247 3248 token_type = self._prev.token_type 3249 3250 if token_type == TokenType.UNION: 3251 expression = exp.Union 3252 elif token_type == TokenType.EXCEPT: 3253 expression = exp.Except 3254 else: 3255 expression = exp.Intersect 3256 3257 return self.expression( 3258 expression, 3259 comments=self._prev.comments, 3260 this=this, 3261 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3262 by_name=self._match_text_seq("BY", "NAME"), 3263 expression=self._parse_set_operations(self._parse_select(nested=True)), 3264 ) 3265 3266 def _parse_expression(self) -> t.Optional[exp.Expression]: 3267 return self._parse_alias(self._parse_conjunction()) 3268 3269 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3270 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3271 3272 def _parse_equality(self) -> t.Optional[exp.Expression]: 3273 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3274 3275 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3276 return self._parse_tokens(self._parse_range, self.COMPARISON) 3277 3278 def _parse_range(self) -> t.Optional[exp.Expression]: 3279 this = self._parse_bitwise() 3280 negate = self._match(TokenType.NOT) 3281 3282 if self._match_set(self.RANGE_PARSERS): 3283 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3284 if not expression: 3285 return this 3286 3287 this = expression 3288 elif self._match(TokenType.ISNULL): 3289 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3290 3291 # Postgres supports ISNULL and NOTNULL for conditions. 3292 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3293 if self._match(TokenType.NOTNULL): 3294 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3295 this = self.expression(exp.Not, this=this) 3296 3297 if negate: 3298 this = self.expression(exp.Not, this=this) 3299 3300 if self._match(TokenType.IS): 3301 this = self._parse_is(this) 3302 3303 return this 3304 3305 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3306 index = self._index - 1 3307 negate = self._match(TokenType.NOT) 3308 3309 if self._match_text_seq("DISTINCT", "FROM"): 3310 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3311 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3312 3313 expression = self._parse_null() or self._parse_boolean() 3314 if not expression: 3315 self._retreat(index) 3316 return None 3317 3318 this = self.expression(exp.Is, this=this, expression=expression) 3319 return self.expression(exp.Not, this=this) if negate else this 3320 3321 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3322 unnest = self._parse_unnest(with_alias=False) 3323 if unnest: 3324 this = self.expression(exp.In, this=this, unnest=unnest) 3325 elif self._match(TokenType.L_PAREN): 3326 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3327 3328 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3329 this = self.expression(exp.In, this=this, query=expressions[0]) 3330 else: 3331 this = self.expression(exp.In, this=this, expressions=expressions) 3332 3333 self._match_r_paren(this) 3334 else: 3335 this = self.expression(exp.In, this=this, field=self._parse_field()) 3336 3337 return this 3338 3339 def _parse_between(self, this: exp.Expression) -> exp.Between: 3340 low = self._parse_bitwise() 3341 self._match(TokenType.AND) 3342 high = self._parse_bitwise() 3343 return self.expression(exp.Between, this=this, low=low, high=high) 3344 3345 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3346 if not self._match(TokenType.ESCAPE): 3347 return this 3348 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3349 3350 def _parse_interval(self) -> t.Optional[exp.Interval]: 3351 index = self._index 3352 3353 if not self._match(TokenType.INTERVAL): 3354 return None 3355 3356 if self._match(TokenType.STRING, advance=False): 3357 this = self._parse_primary() 3358 else: 3359 this = self._parse_term() 3360 3361 if not this: 3362 self._retreat(index) 3363 return None 3364 3365 unit = self._parse_function() or self._parse_var(any_token=True) 3366 3367 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3368 # each INTERVAL expression into this canonical form so it's easy to transpile 3369 if this and this.is_number: 3370 this = exp.Literal.string(this.name) 3371 elif this and this.is_string: 3372 parts = this.name.split() 3373 3374 if len(parts) == 2: 3375 if unit: 3376 # This is not actually a unit, it's something else (e.g. a "window side") 3377 unit = None 3378 self._retreat(self._index - 1) 3379 3380 this = exp.Literal.string(parts[0]) 3381 unit = self.expression(exp.Var, this=parts[1]) 3382 3383 return self.expression(exp.Interval, this=this, unit=unit) 3384 3385 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3386 this = self._parse_term() 3387 3388 while True: 3389 if self._match_set(self.BITWISE): 3390 this = self.expression( 3391 self.BITWISE[self._prev.token_type], 3392 this=this, 3393 expression=self._parse_term(), 3394 ) 3395 elif self._match(TokenType.DQMARK): 3396 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3397 elif self._match_pair(TokenType.LT, TokenType.LT): 3398 this = self.expression( 3399 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3400 ) 3401 elif self._match_pair(TokenType.GT, TokenType.GT): 3402 this = self.expression( 3403 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3404 ) 3405 else: 3406 break 3407 3408 return this 3409 3410 def _parse_term(self) -> t.Optional[exp.Expression]: 3411 return self._parse_tokens(self._parse_factor, self.TERM) 3412 3413 def _parse_factor(self) -> t.Optional[exp.Expression]: 3414 if self.EXPONENT: 3415 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3416 else: 3417 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3418 if isinstance(factor, exp.Div): 3419 factor.args["typed"] = self.TYPED_DIVISION 3420 factor.args["safe"] = self.SAFE_DIVISION 3421 return factor 3422 3423 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3424 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3425 3426 def _parse_unary(self) -> t.Optional[exp.Expression]: 3427 if self._match_set(self.UNARY_PARSERS): 3428 return self.UNARY_PARSERS[self._prev.token_type](self) 3429 return self._parse_at_time_zone(self._parse_type()) 3430 3431 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3432 interval = parse_interval and self._parse_interval() 3433 if interval: 3434 return interval 3435 3436 index = self._index 3437 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3438 this = self._parse_column() 3439 3440 if data_type: 3441 if isinstance(this, exp.Literal): 3442 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3443 if parser: 3444 return parser(self, this, data_type) 3445 return self.expression(exp.Cast, this=this, to=data_type) 3446 if not data_type.expressions: 3447 self._retreat(index) 3448 return self._parse_column() 3449 return self._parse_column_ops(data_type) 3450 3451 return this and self._parse_column_ops(this) 3452 3453 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3454 this = self._parse_type() 3455 if not this: 3456 return None 3457 3458 return self.expression( 3459 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3460 ) 3461 3462 def _parse_types( 3463 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3464 ) -> t.Optional[exp.Expression]: 3465 index = self._index 3466 3467 prefix = self._match_text_seq("SYSUDTLIB", ".") 3468 3469 if not self._match_set(self.TYPE_TOKENS): 3470 identifier = allow_identifiers and self._parse_id_var( 3471 any_token=False, tokens=(TokenType.VAR,) 3472 ) 3473 3474 if identifier: 3475 tokens = self._tokenizer.tokenize(identifier.name) 3476 3477 if len(tokens) != 1: 3478 self.raise_error("Unexpected identifier", self._prev) 3479 3480 if tokens[0].token_type in self.TYPE_TOKENS: 3481 self._prev = tokens[0] 3482 elif self.SUPPORTS_USER_DEFINED_TYPES: 3483 type_name = identifier.name 3484 3485 while self._match(TokenType.DOT): 3486 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3487 3488 return exp.DataType.build(type_name, udt=True) 3489 else: 3490 return None 3491 else: 3492 return None 3493 3494 type_token = self._prev.token_type 3495 3496 if type_token == TokenType.PSEUDO_TYPE: 3497 return self.expression(exp.PseudoType, this=self._prev.text) 3498 3499 if type_token == TokenType.OBJECT_IDENTIFIER: 3500 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3501 3502 nested = type_token in self.NESTED_TYPE_TOKENS 3503 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3504 expressions = None 3505 maybe_func = False 3506 3507 if self._match(TokenType.L_PAREN): 3508 if is_struct: 3509 expressions = self._parse_csv(self._parse_struct_types) 3510 elif nested: 3511 expressions = self._parse_csv( 3512 lambda: self._parse_types( 3513 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3514 ) 3515 ) 3516 elif type_token in self.ENUM_TYPE_TOKENS: 3517 expressions = self._parse_csv(self._parse_equality) 3518 else: 3519 expressions = self._parse_csv(self._parse_type_size) 3520 3521 if not expressions or not self._match(TokenType.R_PAREN): 3522 self._retreat(index) 3523 return None 3524 3525 maybe_func = True 3526 3527 this: t.Optional[exp.Expression] = None 3528 values: t.Optional[t.List[exp.Expression]] = None 3529 3530 if nested and self._match(TokenType.LT): 3531 if is_struct: 3532 expressions = self._parse_csv(self._parse_struct_types) 3533 else: 3534 expressions = self._parse_csv( 3535 lambda: self._parse_types( 3536 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3537 ) 3538 ) 3539 3540 if not self._match(TokenType.GT): 3541 self.raise_error("Expecting >") 3542 3543 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3544 values = self._parse_csv(self._parse_conjunction) 3545 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3546 3547 if type_token in self.TIMESTAMPS: 3548 if self._match_text_seq("WITH", "TIME", "ZONE"): 3549 maybe_func = False 3550 tz_type = ( 3551 exp.DataType.Type.TIMETZ 3552 if type_token in self.TIMES 3553 else exp.DataType.Type.TIMESTAMPTZ 3554 ) 3555 this = exp.DataType(this=tz_type, expressions=expressions) 3556 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3557 maybe_func = False 3558 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3559 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3560 maybe_func = False 3561 elif type_token == TokenType.INTERVAL: 3562 unit = self._parse_var() 3563 3564 if self._match_text_seq("TO"): 3565 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3566 else: 3567 span = None 3568 3569 if span or not unit: 3570 this = self.expression( 3571 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3572 ) 3573 else: 3574 this = self.expression(exp.Interval, unit=unit) 3575 3576 if maybe_func and check_func: 3577 index2 = self._index 3578 peek = self._parse_string() 3579 3580 if not peek: 3581 self._retreat(index) 3582 return None 3583 3584 self._retreat(index2) 3585 3586 if not this: 3587 if self._match_text_seq("UNSIGNED"): 3588 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3589 if not unsigned_type_token: 3590 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3591 3592 type_token = unsigned_type_token or type_token 3593 3594 this = exp.DataType( 3595 this=exp.DataType.Type[type_token.value], 3596 expressions=expressions, 3597 nested=nested, 3598 values=values, 3599 prefix=prefix, 3600 ) 3601 3602 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3603 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3604 3605 return this 3606 3607 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3608 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3609 self._match(TokenType.COLON) 3610 return self._parse_column_def(this) 3611 3612 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3613 if not self._match_text_seq("AT", "TIME", "ZONE"): 3614 return this 3615 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3616 3617 def _parse_column(self) -> t.Optional[exp.Expression]: 3618 this = self._parse_field() 3619 if isinstance(this, exp.Identifier): 3620 this = self.expression(exp.Column, this=this) 3621 elif not this: 3622 return self._parse_bracket(this) 3623 return self._parse_column_ops(this) 3624 3625 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3626 this = self._parse_bracket(this) 3627 3628 while self._match_set(self.COLUMN_OPERATORS): 3629 op_token = self._prev.token_type 3630 op = self.COLUMN_OPERATORS.get(op_token) 3631 3632 if op_token == TokenType.DCOLON: 3633 field = self._parse_types() 3634 if not field: 3635 self.raise_error("Expected type") 3636 elif op and self._curr: 3637 self._advance() 3638 value = self._prev.text 3639 field = ( 3640 exp.Literal.number(value) 3641 if self._prev.token_type == TokenType.NUMBER 3642 else exp.Literal.string(value) 3643 ) 3644 else: 3645 field = self._parse_field(anonymous_func=True, any_token=True) 3646 3647 if isinstance(field, exp.Func): 3648 # bigquery allows function calls like x.y.count(...) 3649 # SAFE.SUBSTR(...) 3650 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3651 this = self._replace_columns_with_dots(this) 3652 3653 if op: 3654 this = op(self, this, field) 3655 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3656 this = self.expression( 3657 exp.Column, 3658 this=field, 3659 table=this.this, 3660 db=this.args.get("table"), 3661 catalog=this.args.get("db"), 3662 ) 3663 else: 3664 this = self.expression(exp.Dot, this=this, expression=field) 3665 this = self._parse_bracket(this) 3666 return this 3667 3668 def _parse_primary(self) -> t.Optional[exp.Expression]: 3669 if self._match_set(self.PRIMARY_PARSERS): 3670 token_type = self._prev.token_type 3671 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3672 3673 if token_type == TokenType.STRING: 3674 expressions = [primary] 3675 while self._match(TokenType.STRING): 3676 expressions.append(exp.Literal.string(self._prev.text)) 3677 3678 if len(expressions) > 1: 3679 return self.expression(exp.Concat, expressions=expressions) 3680 3681 return primary 3682 3683 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3684 return exp.Literal.number(f"0.{self._prev.text}") 3685 3686 if self._match(TokenType.L_PAREN): 3687 comments = self._prev_comments 3688 query = self._parse_select() 3689 3690 if query: 3691 expressions = [query] 3692 else: 3693 expressions = self._parse_expressions() 3694 3695 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3696 3697 if isinstance(this, exp.Subqueryable): 3698 this = self._parse_set_operations( 3699 self._parse_subquery(this=this, parse_alias=False) 3700 ) 3701 elif len(expressions) > 1: 3702 this = self.expression(exp.Tuple, expressions=expressions) 3703 else: 3704 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3705 3706 if this: 3707 this.add_comments(comments) 3708 3709 self._match_r_paren(expression=this) 3710 return this 3711 3712 return None 3713 3714 def _parse_field( 3715 self, 3716 any_token: bool = False, 3717 tokens: t.Optional[t.Collection[TokenType]] = None, 3718 anonymous_func: bool = False, 3719 ) -> t.Optional[exp.Expression]: 3720 return ( 3721 self._parse_primary() 3722 or self._parse_function(anonymous=anonymous_func) 3723 or self._parse_id_var(any_token=any_token, tokens=tokens) 3724 ) 3725 3726 def _parse_function( 3727 self, 3728 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3729 anonymous: bool = False, 3730 optional_parens: bool = True, 3731 ) -> t.Optional[exp.Expression]: 3732 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3733 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3734 fn_syntax = False 3735 if ( 3736 self._match(TokenType.L_BRACE, advance=False) 3737 and self._next 3738 and self._next.text.upper() == "FN" 3739 ): 3740 self._advance(2) 3741 fn_syntax = True 3742 3743 func = self._parse_function_call( 3744 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3745 ) 3746 3747 if fn_syntax: 3748 self._match(TokenType.R_BRACE) 3749 3750 return func 3751 3752 def _parse_function_call( 3753 self, 3754 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3755 anonymous: bool = False, 3756 optional_parens: bool = True, 3757 ) -> t.Optional[exp.Expression]: 3758 if not self._curr: 3759 return None 3760 3761 token_type = self._curr.token_type 3762 this = self._curr.text 3763 upper = this.upper() 3764 3765 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3766 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3767 self._advance() 3768 return parser(self) 3769 3770 if not self._next or self._next.token_type != TokenType.L_PAREN: 3771 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3772 self._advance() 3773 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3774 3775 return None 3776 3777 if token_type not in self.FUNC_TOKENS: 3778 return None 3779 3780 self._advance(2) 3781 3782 parser = self.FUNCTION_PARSERS.get(upper) 3783 if parser and not anonymous: 3784 this = parser(self) 3785 else: 3786 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3787 3788 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3789 this = self.expression(subquery_predicate, this=self._parse_select()) 3790 self._match_r_paren() 3791 return this 3792 3793 if functions is None: 3794 functions = self.FUNCTIONS 3795 3796 function = functions.get(upper) 3797 3798 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3799 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3800 3801 if function and not anonymous: 3802 func = self.validate_expression(function(args), args) 3803 if not self.NORMALIZE_FUNCTIONS: 3804 func.meta["name"] = this 3805 this = func 3806 else: 3807 this = self.expression(exp.Anonymous, this=this, expressions=args) 3808 3809 self._match_r_paren(this) 3810 return self._parse_window(this) 3811 3812 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3813 return self._parse_column_def(self._parse_id_var()) 3814 3815 def _parse_user_defined_function( 3816 self, kind: t.Optional[TokenType] = None 3817 ) -> t.Optional[exp.Expression]: 3818 this = self._parse_id_var() 3819 3820 while self._match(TokenType.DOT): 3821 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3822 3823 if not self._match(TokenType.L_PAREN): 3824 return this 3825 3826 expressions = self._parse_csv(self._parse_function_parameter) 3827 self._match_r_paren() 3828 return self.expression( 3829 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3830 ) 3831 3832 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3833 literal = self._parse_primary() 3834 if literal: 3835 return self.expression(exp.Introducer, this=token.text, expression=literal) 3836 3837 return self.expression(exp.Identifier, this=token.text) 3838 3839 def _parse_session_parameter(self) -> exp.SessionParameter: 3840 kind = None 3841 this = self._parse_id_var() or self._parse_primary() 3842 3843 if this and self._match(TokenType.DOT): 3844 kind = this.name 3845 this = self._parse_var() or self._parse_primary() 3846 3847 return self.expression(exp.SessionParameter, this=this, kind=kind) 3848 3849 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3850 index = self._index 3851 3852 if self._match(TokenType.L_PAREN): 3853 expressions = t.cast( 3854 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3855 ) 3856 3857 if not self._match(TokenType.R_PAREN): 3858 self._retreat(index) 3859 else: 3860 expressions = [self._parse_id_var()] 3861 3862 if self._match_set(self.LAMBDAS): 3863 return self.LAMBDAS[self._prev.token_type](self, expressions) 3864 3865 self._retreat(index) 3866 3867 this: t.Optional[exp.Expression] 3868 3869 if self._match(TokenType.DISTINCT): 3870 this = self.expression( 3871 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3872 ) 3873 else: 3874 this = self._parse_select_or_expression(alias=alias) 3875 3876 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3877 3878 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3879 index = self._index 3880 3881 if not self.errors: 3882 try: 3883 if self._parse_select(nested=True): 3884 return this 3885 except ParseError: 3886 pass 3887 finally: 3888 self.errors.clear() 3889 self._retreat(index) 3890 3891 if not self._match(TokenType.L_PAREN): 3892 return this 3893 3894 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3895 3896 self._match_r_paren() 3897 return self.expression(exp.Schema, this=this, expressions=args) 3898 3899 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3900 return self._parse_column_def(self._parse_field(any_token=True)) 3901 3902 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3903 # column defs are not really columns, they're identifiers 3904 if isinstance(this, exp.Column): 3905 this = this.this 3906 3907 kind = self._parse_types(schema=True) 3908 3909 if self._match_text_seq("FOR", "ORDINALITY"): 3910 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3911 3912 constraints: t.List[exp.Expression] = [] 3913 3914 if not kind and self._match(TokenType.ALIAS): 3915 constraints.append( 3916 self.expression( 3917 exp.ComputedColumnConstraint, 3918 this=self._parse_conjunction(), 3919 persisted=self._match_text_seq("PERSISTED"), 3920 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3921 ) 3922 ) 3923 3924 while True: 3925 constraint = self._parse_column_constraint() 3926 if not constraint: 3927 break 3928 constraints.append(constraint) 3929 3930 if not kind and not constraints: 3931 return this 3932 3933 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3934 3935 def _parse_auto_increment( 3936 self, 3937 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3938 start = None 3939 increment = None 3940 3941 if self._match(TokenType.L_PAREN, advance=False): 3942 args = self._parse_wrapped_csv(self._parse_bitwise) 3943 start = seq_get(args, 0) 3944 increment = seq_get(args, 1) 3945 elif self._match_text_seq("START"): 3946 start = self._parse_bitwise() 3947 self._match_text_seq("INCREMENT") 3948 increment = self._parse_bitwise() 3949 3950 if start and increment: 3951 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3952 3953 return exp.AutoIncrementColumnConstraint() 3954 3955 def _parse_compress(self) -> exp.CompressColumnConstraint: 3956 if self._match(TokenType.L_PAREN, advance=False): 3957 return self.expression( 3958 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3959 ) 3960 3961 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3962 3963 def _parse_generated_as_identity( 3964 self, 3965 ) -> ( 3966 exp.GeneratedAsIdentityColumnConstraint 3967 | exp.ComputedColumnConstraint 3968 | exp.GeneratedAsRowColumnConstraint 3969 ): 3970 if self._match_text_seq("BY", "DEFAULT"): 3971 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3972 this = self.expression( 3973 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3974 ) 3975 else: 3976 self._match_text_seq("ALWAYS") 3977 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3978 3979 self._match(TokenType.ALIAS) 3980 3981 if self._match_text_seq("ROW"): 3982 start = self._match_text_seq("START") 3983 if not start: 3984 self._match(TokenType.END) 3985 hidden = self._match_text_seq("HIDDEN") 3986 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 3987 3988 identity = self._match_text_seq("IDENTITY") 3989 3990 if self._match(TokenType.L_PAREN): 3991 if self._match(TokenType.START_WITH): 3992 this.set("start", self._parse_bitwise()) 3993 if self._match_text_seq("INCREMENT", "BY"): 3994 this.set("increment", self._parse_bitwise()) 3995 if self._match_text_seq("MINVALUE"): 3996 this.set("minvalue", self._parse_bitwise()) 3997 if self._match_text_seq("MAXVALUE"): 3998 this.set("maxvalue", self._parse_bitwise()) 3999 4000 if self._match_text_seq("CYCLE"): 4001 this.set("cycle", True) 4002 elif self._match_text_seq("NO", "CYCLE"): 4003 this.set("cycle", False) 4004 4005 if not identity: 4006 this.set("expression", self._parse_bitwise()) 4007 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4008 args = self._parse_csv(self._parse_bitwise) 4009 this.set("start", seq_get(args, 0)) 4010 this.set("increment", seq_get(args, 1)) 4011 4012 self._match_r_paren() 4013 4014 return this 4015 4016 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4017 self._match_text_seq("LENGTH") 4018 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4019 4020 def _parse_not_constraint( 4021 self, 4022 ) -> t.Optional[exp.Expression]: 4023 if self._match_text_seq("NULL"): 4024 return self.expression(exp.NotNullColumnConstraint) 4025 if self._match_text_seq("CASESPECIFIC"): 4026 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4027 if self._match_text_seq("FOR", "REPLICATION"): 4028 return self.expression(exp.NotForReplicationColumnConstraint) 4029 return None 4030 4031 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4032 if self._match(TokenType.CONSTRAINT): 4033 this = self._parse_id_var() 4034 else: 4035 this = None 4036 4037 if self._match_texts(self.CONSTRAINT_PARSERS): 4038 return self.expression( 4039 exp.ColumnConstraint, 4040 this=this, 4041 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4042 ) 4043 4044 return this 4045 4046 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4047 if not self._match(TokenType.CONSTRAINT): 4048 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4049 4050 this = self._parse_id_var() 4051 expressions = [] 4052 4053 while True: 4054 constraint = self._parse_unnamed_constraint() or self._parse_function() 4055 if not constraint: 4056 break 4057 expressions.append(constraint) 4058 4059 return self.expression(exp.Constraint, this=this, expressions=expressions) 4060 4061 def _parse_unnamed_constraint( 4062 self, constraints: t.Optional[t.Collection[str]] = None 4063 ) -> t.Optional[exp.Expression]: 4064 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4065 constraints or self.CONSTRAINT_PARSERS 4066 ): 4067 return None 4068 4069 constraint = self._prev.text.upper() 4070 if constraint not in self.CONSTRAINT_PARSERS: 4071 self.raise_error(f"No parser found for schema constraint {constraint}.") 4072 4073 return self.CONSTRAINT_PARSERS[constraint](self) 4074 4075 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4076 self._match_text_seq("KEY") 4077 return self.expression( 4078 exp.UniqueColumnConstraint, 4079 this=self._parse_schema(self._parse_id_var(any_token=False)), 4080 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4081 ) 4082 4083 def _parse_key_constraint_options(self) -> t.List[str]: 4084 options = [] 4085 while True: 4086 if not self._curr: 4087 break 4088 4089 if self._match(TokenType.ON): 4090 action = None 4091 on = self._advance_any() and self._prev.text 4092 4093 if self._match_text_seq("NO", "ACTION"): 4094 action = "NO ACTION" 4095 elif self._match_text_seq("CASCADE"): 4096 action = "CASCADE" 4097 elif self._match_text_seq("RESTRICT"): 4098 action = "RESTRICT" 4099 elif self._match_pair(TokenType.SET, TokenType.NULL): 4100 action = "SET NULL" 4101 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4102 action = "SET DEFAULT" 4103 else: 4104 self.raise_error("Invalid key constraint") 4105 4106 options.append(f"ON {on} {action}") 4107 elif self._match_text_seq("NOT", "ENFORCED"): 4108 options.append("NOT ENFORCED") 4109 elif self._match_text_seq("DEFERRABLE"): 4110 options.append("DEFERRABLE") 4111 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4112 options.append("INITIALLY DEFERRED") 4113 elif self._match_text_seq("NORELY"): 4114 options.append("NORELY") 4115 elif self._match_text_seq("MATCH", "FULL"): 4116 options.append("MATCH FULL") 4117 else: 4118 break 4119 4120 return options 4121 4122 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4123 if match and not self._match(TokenType.REFERENCES): 4124 return None 4125 4126 expressions = None 4127 this = self._parse_table(schema=True) 4128 options = self._parse_key_constraint_options() 4129 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4130 4131 def _parse_foreign_key(self) -> exp.ForeignKey: 4132 expressions = self._parse_wrapped_id_vars() 4133 reference = self._parse_references() 4134 options = {} 4135 4136 while self._match(TokenType.ON): 4137 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4138 self.raise_error("Expected DELETE or UPDATE") 4139 4140 kind = self._prev.text.lower() 4141 4142 if self._match_text_seq("NO", "ACTION"): 4143 action = "NO ACTION" 4144 elif self._match(TokenType.SET): 4145 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4146 action = "SET " + self._prev.text.upper() 4147 else: 4148 self._advance() 4149 action = self._prev.text.upper() 4150 4151 options[kind] = action 4152 4153 return self.expression( 4154 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4155 ) 4156 4157 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4158 return self._parse_field() 4159 4160 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4161 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4162 4163 id_vars = self._parse_wrapped_id_vars() 4164 return self.expression( 4165 exp.PeriodForSystemTimeConstraint, 4166 this=seq_get(id_vars, 0), 4167 expression=seq_get(id_vars, 1), 4168 ) 4169 4170 def _parse_primary_key( 4171 self, wrapped_optional: bool = False, in_props: bool = False 4172 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4173 desc = ( 4174 self._match_set((TokenType.ASC, TokenType.DESC)) 4175 and self._prev.token_type == TokenType.DESC 4176 ) 4177 4178 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4179 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4180 4181 expressions = self._parse_wrapped_csv( 4182 self._parse_primary_key_part, optional=wrapped_optional 4183 ) 4184 options = self._parse_key_constraint_options() 4185 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4186 4187 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4188 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4189 return this 4190 4191 bracket_kind = self._prev.token_type 4192 4193 if self._match(TokenType.COLON): 4194 expressions: t.List[exp.Expression] = [ 4195 self.expression(exp.Slice, expression=self._parse_conjunction()) 4196 ] 4197 else: 4198 expressions = self._parse_csv( 4199 lambda: self._parse_slice( 4200 self._parse_alias(self._parse_conjunction(), explicit=True) 4201 ) 4202 ) 4203 4204 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4205 self.raise_error("Expected ]") 4206 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4207 self.raise_error("Expected }") 4208 4209 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4210 if bracket_kind == TokenType.L_BRACE: 4211 this = self.expression(exp.Struct, expressions=expressions) 4212 elif not this or this.name.upper() == "ARRAY": 4213 this = self.expression(exp.Array, expressions=expressions) 4214 else: 4215 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4216 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4217 4218 self._add_comments(this) 4219 return self._parse_bracket(this) 4220 4221 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4222 if self._match(TokenType.COLON): 4223 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4224 return this 4225 4226 def _parse_case(self) -> t.Optional[exp.Expression]: 4227 ifs = [] 4228 default = None 4229 4230 comments = self._prev_comments 4231 expression = self._parse_conjunction() 4232 4233 while self._match(TokenType.WHEN): 4234 this = self._parse_conjunction() 4235 self._match(TokenType.THEN) 4236 then = self._parse_conjunction() 4237 ifs.append(self.expression(exp.If, this=this, true=then)) 4238 4239 if self._match(TokenType.ELSE): 4240 default = self._parse_conjunction() 4241 4242 if not self._match(TokenType.END): 4243 self.raise_error("Expected END after CASE", self._prev) 4244 4245 return self._parse_window( 4246 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4247 ) 4248 4249 def _parse_if(self) -> t.Optional[exp.Expression]: 4250 if self._match(TokenType.L_PAREN): 4251 args = self._parse_csv(self._parse_conjunction) 4252 this = self.validate_expression(exp.If.from_arg_list(args), args) 4253 self._match_r_paren() 4254 else: 4255 index = self._index - 1 4256 condition = self._parse_conjunction() 4257 4258 if not condition: 4259 self._retreat(index) 4260 return None 4261 4262 self._match(TokenType.THEN) 4263 true = self._parse_conjunction() 4264 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4265 self._match(TokenType.END) 4266 this = self.expression(exp.If, this=condition, true=true, false=false) 4267 4268 return self._parse_window(this) 4269 4270 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4271 if not self._match_text_seq("VALUE", "FOR"): 4272 self._retreat(self._index - 1) 4273 return None 4274 4275 return self.expression( 4276 exp.NextValueFor, 4277 this=self._parse_column(), 4278 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4279 ) 4280 4281 def _parse_extract(self) -> exp.Extract: 4282 this = self._parse_function() or self._parse_var() or self._parse_type() 4283 4284 if self._match(TokenType.FROM): 4285 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4286 4287 if not self._match(TokenType.COMMA): 4288 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4289 4290 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4291 4292 def _parse_any_value(self) -> exp.AnyValue: 4293 this = self._parse_lambda() 4294 is_max = None 4295 having = None 4296 4297 if self._match(TokenType.HAVING): 4298 self._match_texts(("MAX", "MIN")) 4299 is_max = self._prev.text == "MAX" 4300 having = self._parse_column() 4301 4302 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4303 4304 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4305 this = self._parse_conjunction() 4306 4307 if not self._match(TokenType.ALIAS): 4308 if self._match(TokenType.COMMA): 4309 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4310 4311 self.raise_error("Expected AS after CAST") 4312 4313 fmt = None 4314 to = self._parse_types() 4315 4316 if self._match(TokenType.FORMAT): 4317 fmt_string = self._parse_string() 4318 fmt = self._parse_at_time_zone(fmt_string) 4319 4320 if not to: 4321 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4322 if to.this in exp.DataType.TEMPORAL_TYPES: 4323 this = self.expression( 4324 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4325 this=this, 4326 format=exp.Literal.string( 4327 format_time( 4328 fmt_string.this if fmt_string else "", 4329 self.FORMAT_MAPPING or self.TIME_MAPPING, 4330 self.FORMAT_TRIE or self.TIME_TRIE, 4331 ) 4332 ), 4333 ) 4334 4335 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4336 this.set("zone", fmt.args["zone"]) 4337 return this 4338 elif not to: 4339 self.raise_error("Expected TYPE after CAST") 4340 elif isinstance(to, exp.Identifier): 4341 to = exp.DataType.build(to.name, udt=True) 4342 elif to.this == exp.DataType.Type.CHAR: 4343 if self._match(TokenType.CHARACTER_SET): 4344 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4345 4346 return self.expression( 4347 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4348 ) 4349 4350 def _parse_concat(self) -> t.Optional[exp.Expression]: 4351 args = self._parse_csv(self._parse_conjunction) 4352 if self.CONCAT_NULL_OUTPUTS_STRING: 4353 args = self._ensure_string_if_null(args) 4354 4355 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4356 # we find such a call we replace it with its argument. 4357 if len(args) == 1: 4358 return args[0] 4359 4360 return self.expression( 4361 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4362 ) 4363 4364 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4365 args = self._parse_csv(self._parse_conjunction) 4366 if len(args) < 2: 4367 return self.expression(exp.ConcatWs, expressions=args) 4368 delim, *values = args 4369 if self.CONCAT_NULL_OUTPUTS_STRING: 4370 values = self._ensure_string_if_null(values) 4371 4372 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4373 4374 def _parse_string_agg(self) -> exp.Expression: 4375 if self._match(TokenType.DISTINCT): 4376 args: t.List[t.Optional[exp.Expression]] = [ 4377 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4378 ] 4379 if self._match(TokenType.COMMA): 4380 args.extend(self._parse_csv(self._parse_conjunction)) 4381 else: 4382 args = self._parse_csv(self._parse_conjunction) # type: ignore 4383 4384 index = self._index 4385 if not self._match(TokenType.R_PAREN) and args: 4386 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4387 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4388 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4389 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4390 4391 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4392 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4393 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4394 if not self._match_text_seq("WITHIN", "GROUP"): 4395 self._retreat(index) 4396 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4397 4398 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4399 order = self._parse_order(this=seq_get(args, 0)) 4400 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4401 4402 def _parse_convert( 4403 self, strict: bool, safe: t.Optional[bool] = None 4404 ) -> t.Optional[exp.Expression]: 4405 this = self._parse_bitwise() 4406 4407 if self._match(TokenType.USING): 4408 to: t.Optional[exp.Expression] = self.expression( 4409 exp.CharacterSet, this=self._parse_var() 4410 ) 4411 elif self._match(TokenType.COMMA): 4412 to = self._parse_types() 4413 else: 4414 to = None 4415 4416 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4417 4418 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4419 """ 4420 There are generally two variants of the DECODE function: 4421 4422 - DECODE(bin, charset) 4423 - DECODE(expression, search, result [, search, result] ... [, default]) 4424 4425 The second variant will always be parsed into a CASE expression. Note that NULL 4426 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4427 instead of relying on pattern matching. 4428 """ 4429 args = self._parse_csv(self._parse_conjunction) 4430 4431 if len(args) < 3: 4432 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4433 4434 expression, *expressions = args 4435 if not expression: 4436 return None 4437 4438 ifs = [] 4439 for search, result in zip(expressions[::2], expressions[1::2]): 4440 if not search or not result: 4441 return None 4442 4443 if isinstance(search, exp.Literal): 4444 ifs.append( 4445 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4446 ) 4447 elif isinstance(search, exp.Null): 4448 ifs.append( 4449 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4450 ) 4451 else: 4452 cond = exp.or_( 4453 exp.EQ(this=expression.copy(), expression=search), 4454 exp.and_( 4455 exp.Is(this=expression.copy(), expression=exp.Null()), 4456 exp.Is(this=search.copy(), expression=exp.Null()), 4457 copy=False, 4458 ), 4459 copy=False, 4460 ) 4461 ifs.append(exp.If(this=cond, true=result)) 4462 4463 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4464 4465 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4466 self._match_text_seq("KEY") 4467 key = self._parse_column() 4468 self._match_set((TokenType.COLON, TokenType.COMMA)) 4469 self._match_text_seq("VALUE") 4470 value = self._parse_bitwise() 4471 4472 if not key and not value: 4473 return None 4474 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4475 4476 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4477 if not this or not self._match_text_seq("FORMAT", "JSON"): 4478 return this 4479 4480 return self.expression(exp.FormatJson, this=this) 4481 4482 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4483 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4484 for value in values: 4485 if self._match_text_seq(value, "ON", on): 4486 return f"{value} ON {on}" 4487 4488 return None 4489 4490 def _parse_json_object(self) -> exp.JSONObject: 4491 star = self._parse_star() 4492 expressions = ( 4493 [star] 4494 if star 4495 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4496 ) 4497 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4498 4499 unique_keys = None 4500 if self._match_text_seq("WITH", "UNIQUE"): 4501 unique_keys = True 4502 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4503 unique_keys = False 4504 4505 self._match_text_seq("KEYS") 4506 4507 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4508 self._parse_type() 4509 ) 4510 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4511 4512 return self.expression( 4513 exp.JSONObject, 4514 expressions=expressions, 4515 null_handling=null_handling, 4516 unique_keys=unique_keys, 4517 return_type=return_type, 4518 encoding=encoding, 4519 ) 4520 4521 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4522 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4523 if not self._match_text_seq("NESTED"): 4524 this = self._parse_id_var() 4525 kind = self._parse_types(allow_identifiers=False) 4526 nested = None 4527 else: 4528 this = None 4529 kind = None 4530 nested = True 4531 4532 path = self._match_text_seq("PATH") and self._parse_string() 4533 nested_schema = nested and self._parse_json_schema() 4534 4535 return self.expression( 4536 exp.JSONColumnDef, 4537 this=this, 4538 kind=kind, 4539 path=path, 4540 nested_schema=nested_schema, 4541 ) 4542 4543 def _parse_json_schema(self) -> exp.JSONSchema: 4544 self._match_text_seq("COLUMNS") 4545 return self.expression( 4546 exp.JSONSchema, 4547 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4548 ) 4549 4550 def _parse_json_table(self) -> exp.JSONTable: 4551 this = self._parse_format_json(self._parse_bitwise()) 4552 path = self._match(TokenType.COMMA) and self._parse_string() 4553 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4554 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4555 schema = self._parse_json_schema() 4556 4557 return exp.JSONTable( 4558 this=this, 4559 schema=schema, 4560 path=path, 4561 error_handling=error_handling, 4562 empty_handling=empty_handling, 4563 ) 4564 4565 def _parse_logarithm(self) -> exp.Func: 4566 # Default argument order is base, expression 4567 args = self._parse_csv(self._parse_range) 4568 4569 if len(args) > 1: 4570 if not self.LOG_BASE_FIRST: 4571 args.reverse() 4572 return exp.Log.from_arg_list(args) 4573 4574 return self.expression( 4575 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4576 ) 4577 4578 def _parse_match_against(self) -> exp.MatchAgainst: 4579 expressions = self._parse_csv(self._parse_column) 4580 4581 self._match_text_seq(")", "AGAINST", "(") 4582 4583 this = self._parse_string() 4584 4585 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4586 modifier = "IN NATURAL LANGUAGE MODE" 4587 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4588 modifier = f"{modifier} WITH QUERY EXPANSION" 4589 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4590 modifier = "IN BOOLEAN MODE" 4591 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4592 modifier = "WITH QUERY EXPANSION" 4593 else: 4594 modifier = None 4595 4596 return self.expression( 4597 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4598 ) 4599 4600 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4601 def _parse_open_json(self) -> exp.OpenJSON: 4602 this = self._parse_bitwise() 4603 path = self._match(TokenType.COMMA) and self._parse_string() 4604 4605 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4606 this = self._parse_field(any_token=True) 4607 kind = self._parse_types() 4608 path = self._parse_string() 4609 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4610 4611 return self.expression( 4612 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4613 ) 4614 4615 expressions = None 4616 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4617 self._match_l_paren() 4618 expressions = self._parse_csv(_parse_open_json_column_def) 4619 4620 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4621 4622 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4623 args = self._parse_csv(self._parse_bitwise) 4624 4625 if self._match(TokenType.IN): 4626 return self.expression( 4627 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4628 ) 4629 4630 if haystack_first: 4631 haystack = seq_get(args, 0) 4632 needle = seq_get(args, 1) 4633 else: 4634 needle = seq_get(args, 0) 4635 haystack = seq_get(args, 1) 4636 4637 return self.expression( 4638 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4639 ) 4640 4641 def _parse_predict(self) -> exp.Predict: 4642 self._match_text_seq("MODEL") 4643 this = self._parse_table() 4644 4645 self._match(TokenType.COMMA) 4646 self._match_text_seq("TABLE") 4647 4648 return self.expression( 4649 exp.Predict, 4650 this=this, 4651 expression=self._parse_table(), 4652 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4653 ) 4654 4655 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4656 args = self._parse_csv(self._parse_table) 4657 return exp.JoinHint(this=func_name.upper(), expressions=args) 4658 4659 def _parse_substring(self) -> exp.Substring: 4660 # Postgres supports the form: substring(string [from int] [for int]) 4661 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4662 4663 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4664 4665 if self._match(TokenType.FROM): 4666 args.append(self._parse_bitwise()) 4667 if self._match(TokenType.FOR): 4668 args.append(self._parse_bitwise()) 4669 4670 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4671 4672 def _parse_trim(self) -> exp.Trim: 4673 # https://www.w3resource.com/sql/character-functions/trim.php 4674 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4675 4676 position = None 4677 collation = None 4678 expression = None 4679 4680 if self._match_texts(self.TRIM_TYPES): 4681 position = self._prev.text.upper() 4682 4683 this = self._parse_bitwise() 4684 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4685 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4686 expression = self._parse_bitwise() 4687 4688 if invert_order: 4689 this, expression = expression, this 4690 4691 if self._match(TokenType.COLLATE): 4692 collation = self._parse_bitwise() 4693 4694 return self.expression( 4695 exp.Trim, this=this, position=position, expression=expression, collation=collation 4696 ) 4697 4698 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4699 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4700 4701 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4702 return self._parse_window(self._parse_id_var(), alias=True) 4703 4704 def _parse_respect_or_ignore_nulls( 4705 self, this: t.Optional[exp.Expression] 4706 ) -> t.Optional[exp.Expression]: 4707 if self._match_text_seq("IGNORE", "NULLS"): 4708 return self.expression(exp.IgnoreNulls, this=this) 4709 if self._match_text_seq("RESPECT", "NULLS"): 4710 return self.expression(exp.RespectNulls, this=this) 4711 return this 4712 4713 def _parse_window( 4714 self, this: t.Optional[exp.Expression], alias: bool = False 4715 ) -> t.Optional[exp.Expression]: 4716 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4717 self._match(TokenType.WHERE) 4718 this = self.expression( 4719 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4720 ) 4721 self._match_r_paren() 4722 4723 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4724 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4725 if self._match_text_seq("WITHIN", "GROUP"): 4726 order = self._parse_wrapped(self._parse_order) 4727 this = self.expression(exp.WithinGroup, this=this, expression=order) 4728 4729 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4730 # Some dialects choose to implement and some do not. 4731 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4732 4733 # There is some code above in _parse_lambda that handles 4734 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4735 4736 # The below changes handle 4737 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4738 4739 # Oracle allows both formats 4740 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4741 # and Snowflake chose to do the same for familiarity 4742 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4743 this = self._parse_respect_or_ignore_nulls(this) 4744 4745 # bigquery select from window x AS (partition by ...) 4746 if alias: 4747 over = None 4748 self._match(TokenType.ALIAS) 4749 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4750 return this 4751 else: 4752 over = self._prev.text.upper() 4753 4754 if not self._match(TokenType.L_PAREN): 4755 return self.expression( 4756 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4757 ) 4758 4759 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4760 4761 first = self._match(TokenType.FIRST) 4762 if self._match_text_seq("LAST"): 4763 first = False 4764 4765 partition, order = self._parse_partition_and_order() 4766 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4767 4768 if kind: 4769 self._match(TokenType.BETWEEN) 4770 start = self._parse_window_spec() 4771 self._match(TokenType.AND) 4772 end = self._parse_window_spec() 4773 4774 spec = self.expression( 4775 exp.WindowSpec, 4776 kind=kind, 4777 start=start["value"], 4778 start_side=start["side"], 4779 end=end["value"], 4780 end_side=end["side"], 4781 ) 4782 else: 4783 spec = None 4784 4785 self._match_r_paren() 4786 4787 window = self.expression( 4788 exp.Window, 4789 this=this, 4790 partition_by=partition, 4791 order=order, 4792 spec=spec, 4793 alias=window_alias, 4794 over=over, 4795 first=first, 4796 ) 4797 4798 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4799 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4800 return self._parse_window(window, alias=alias) 4801 4802 return window 4803 4804 def _parse_partition_and_order( 4805 self, 4806 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4807 return self._parse_partition_by(), self._parse_order() 4808 4809 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4810 self._match(TokenType.BETWEEN) 4811 4812 return { 4813 "value": ( 4814 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4815 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4816 or self._parse_bitwise() 4817 ), 4818 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4819 } 4820 4821 def _parse_alias( 4822 self, this: t.Optional[exp.Expression], explicit: bool = False 4823 ) -> t.Optional[exp.Expression]: 4824 any_token = self._match(TokenType.ALIAS) 4825 4826 if explicit and not any_token: 4827 return this 4828 4829 if self._match(TokenType.L_PAREN): 4830 aliases = self.expression( 4831 exp.Aliases, 4832 this=this, 4833 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4834 ) 4835 self._match_r_paren(aliases) 4836 return aliases 4837 4838 alias = self._parse_id_var(any_token) 4839 4840 if alias: 4841 return self.expression(exp.Alias, this=this, alias=alias) 4842 4843 return this 4844 4845 def _parse_id_var( 4846 self, 4847 any_token: bool = True, 4848 tokens: t.Optional[t.Collection[TokenType]] = None, 4849 ) -> t.Optional[exp.Expression]: 4850 identifier = self._parse_identifier() 4851 4852 if identifier: 4853 return identifier 4854 4855 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4856 quoted = self._prev.token_type == TokenType.STRING 4857 return exp.Identifier(this=self._prev.text, quoted=quoted) 4858 4859 return None 4860 4861 def _parse_string(self) -> t.Optional[exp.Expression]: 4862 if self._match(TokenType.STRING): 4863 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4864 return self._parse_placeholder() 4865 4866 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4867 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4868 4869 def _parse_number(self) -> t.Optional[exp.Expression]: 4870 if self._match(TokenType.NUMBER): 4871 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4872 return self._parse_placeholder() 4873 4874 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4875 if self._match(TokenType.IDENTIFIER): 4876 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4877 return self._parse_placeholder() 4878 4879 def _parse_var( 4880 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4881 ) -> t.Optional[exp.Expression]: 4882 if ( 4883 (any_token and self._advance_any()) 4884 or self._match(TokenType.VAR) 4885 or (self._match_set(tokens) if tokens else False) 4886 ): 4887 return self.expression(exp.Var, this=self._prev.text) 4888 return self._parse_placeholder() 4889 4890 def _advance_any(self) -> t.Optional[Token]: 4891 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4892 self._advance() 4893 return self._prev 4894 return None 4895 4896 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4897 return self._parse_var() or self._parse_string() 4898 4899 def _parse_null(self) -> t.Optional[exp.Expression]: 4900 if self._match_set(self.NULL_TOKENS): 4901 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4902 return self._parse_placeholder() 4903 4904 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4905 if self._match(TokenType.TRUE): 4906 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4907 if self._match(TokenType.FALSE): 4908 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4909 return self._parse_placeholder() 4910 4911 def _parse_star(self) -> t.Optional[exp.Expression]: 4912 if self._match(TokenType.STAR): 4913 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4914 return self._parse_placeholder() 4915 4916 def _parse_parameter(self) -> exp.Parameter: 4917 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4918 return ( 4919 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4920 ) 4921 4922 self._match(TokenType.L_BRACE) 4923 this = _parse_parameter_part() 4924 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4925 self._match(TokenType.R_BRACE) 4926 4927 return self.expression(exp.Parameter, this=this, expression=expression) 4928 4929 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4930 if self._match_set(self.PLACEHOLDER_PARSERS): 4931 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4932 if placeholder: 4933 return placeholder 4934 self._advance(-1) 4935 return None 4936 4937 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4938 if not self._match(TokenType.EXCEPT): 4939 return None 4940 if self._match(TokenType.L_PAREN, advance=False): 4941 return self._parse_wrapped_csv(self._parse_column) 4942 4943 except_column = self._parse_column() 4944 return [except_column] if except_column else None 4945 4946 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4947 if not self._match(TokenType.REPLACE): 4948 return None 4949 if self._match(TokenType.L_PAREN, advance=False): 4950 return self._parse_wrapped_csv(self._parse_expression) 4951 4952 replace_expression = self._parse_expression() 4953 return [replace_expression] if replace_expression else None 4954 4955 def _parse_csv( 4956 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4957 ) -> t.List[exp.Expression]: 4958 parse_result = parse_method() 4959 items = [parse_result] if parse_result is not None else [] 4960 4961 while self._match(sep): 4962 self._add_comments(parse_result) 4963 parse_result = parse_method() 4964 if parse_result is not None: 4965 items.append(parse_result) 4966 4967 return items 4968 4969 def _parse_tokens( 4970 self, parse_method: t.Callable, expressions: t.Dict 4971 ) -> t.Optional[exp.Expression]: 4972 this = parse_method() 4973 4974 while self._match_set(expressions): 4975 this = self.expression( 4976 expressions[self._prev.token_type], 4977 this=this, 4978 comments=self._prev_comments, 4979 expression=parse_method(), 4980 ) 4981 4982 return this 4983 4984 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4985 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4986 4987 def _parse_wrapped_csv( 4988 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4989 ) -> t.List[exp.Expression]: 4990 return self._parse_wrapped( 4991 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4992 ) 4993 4994 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4995 wrapped = self._match(TokenType.L_PAREN) 4996 if not wrapped and not optional: 4997 self.raise_error("Expecting (") 4998 parse_result = parse_method() 4999 if wrapped: 5000 self._match_r_paren() 5001 return parse_result 5002 5003 def _parse_expressions(self) -> t.List[exp.Expression]: 5004 return self._parse_csv(self._parse_expression) 5005 5006 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5007 return self._parse_select() or self._parse_set_operations( 5008 self._parse_expression() if alias else self._parse_conjunction() 5009 ) 5010 5011 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5012 return self._parse_query_modifiers( 5013 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5014 ) 5015 5016 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5017 this = None 5018 if self._match_texts(self.TRANSACTION_KIND): 5019 this = self._prev.text 5020 5021 self._match_texts(("TRANSACTION", "WORK")) 5022 5023 modes = [] 5024 while True: 5025 mode = [] 5026 while self._match(TokenType.VAR): 5027 mode.append(self._prev.text) 5028 5029 if mode: 5030 modes.append(" ".join(mode)) 5031 if not self._match(TokenType.COMMA): 5032 break 5033 5034 return self.expression(exp.Transaction, this=this, modes=modes) 5035 5036 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5037 chain = None 5038 savepoint = None 5039 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5040 5041 self._match_texts(("TRANSACTION", "WORK")) 5042 5043 if self._match_text_seq("TO"): 5044 self._match_text_seq("SAVEPOINT") 5045 savepoint = self._parse_id_var() 5046 5047 if self._match(TokenType.AND): 5048 chain = not self._match_text_seq("NO") 5049 self._match_text_seq("CHAIN") 5050 5051 if is_rollback: 5052 return self.expression(exp.Rollback, savepoint=savepoint) 5053 5054 return self.expression(exp.Commit, chain=chain) 5055 5056 def _parse_refresh(self) -> exp.Refresh: 5057 self._match(TokenType.TABLE) 5058 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5059 5060 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5061 if not self._match_text_seq("ADD"): 5062 return None 5063 5064 self._match(TokenType.COLUMN) 5065 exists_column = self._parse_exists(not_=True) 5066 expression = self._parse_field_def() 5067 5068 if expression: 5069 expression.set("exists", exists_column) 5070 5071 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5072 if self._match_texts(("FIRST", "AFTER")): 5073 position = self._prev.text 5074 column_position = self.expression( 5075 exp.ColumnPosition, this=self._parse_column(), position=position 5076 ) 5077 expression.set("position", column_position) 5078 5079 return expression 5080 5081 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5082 drop = self._match(TokenType.DROP) and self._parse_drop() 5083 if drop and not isinstance(drop, exp.Command): 5084 drop.set("kind", drop.args.get("kind", "COLUMN")) 5085 return drop 5086 5087 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5088 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5089 return self.expression( 5090 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5091 ) 5092 5093 def _parse_add_constraint(self) -> exp.AddConstraint: 5094 this = None 5095 kind = self._prev.token_type 5096 5097 if kind == TokenType.CONSTRAINT: 5098 this = self._parse_id_var() 5099 5100 if self._match_text_seq("CHECK"): 5101 expression = self._parse_wrapped(self._parse_conjunction) 5102 enforced = self._match_text_seq("ENFORCED") 5103 5104 return self.expression( 5105 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5106 ) 5107 5108 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5109 expression = self._parse_foreign_key() 5110 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5111 expression = self._parse_primary_key() 5112 else: 5113 expression = None 5114 5115 return self.expression(exp.AddConstraint, this=this, expression=expression) 5116 5117 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5118 index = self._index - 1 5119 5120 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5121 return self._parse_csv(self._parse_add_constraint) 5122 5123 self._retreat(index) 5124 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5125 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5126 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5127 5128 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5129 self._match(TokenType.COLUMN) 5130 column = self._parse_field(any_token=True) 5131 5132 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5133 return self.expression(exp.AlterColumn, this=column, drop=True) 5134 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5135 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5136 5137 self._match_text_seq("SET", "DATA") 5138 return self.expression( 5139 exp.AlterColumn, 5140 this=column, 5141 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5142 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5143 using=self._match(TokenType.USING) and self._parse_conjunction(), 5144 ) 5145 5146 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5147 index = self._index - 1 5148 5149 partition_exists = self._parse_exists() 5150 if self._match(TokenType.PARTITION, advance=False): 5151 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5152 5153 self._retreat(index) 5154 return self._parse_csv(self._parse_drop_column) 5155 5156 def _parse_alter_table_rename(self) -> exp.RenameTable: 5157 self._match_text_seq("TO") 5158 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5159 5160 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5161 start = self._prev 5162 5163 if not self._match(TokenType.TABLE): 5164 return self._parse_as_command(start) 5165 5166 exists = self._parse_exists() 5167 only = self._match_text_seq("ONLY") 5168 this = self._parse_table(schema=True) 5169 5170 if self._next: 5171 self._advance() 5172 5173 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5174 if parser: 5175 actions = ensure_list(parser(self)) 5176 5177 if not self._curr: 5178 return self.expression( 5179 exp.AlterTable, 5180 this=this, 5181 exists=exists, 5182 actions=actions, 5183 only=only, 5184 ) 5185 5186 return self._parse_as_command(start) 5187 5188 def _parse_merge(self) -> exp.Merge: 5189 self._match(TokenType.INTO) 5190 target = self._parse_table() 5191 5192 if target and self._match(TokenType.ALIAS, advance=False): 5193 target.set("alias", self._parse_table_alias()) 5194 5195 self._match(TokenType.USING) 5196 using = self._parse_table() 5197 5198 self._match(TokenType.ON) 5199 on = self._parse_conjunction() 5200 5201 return self.expression( 5202 exp.Merge, 5203 this=target, 5204 using=using, 5205 on=on, 5206 expressions=self._parse_when_matched(), 5207 ) 5208 5209 def _parse_when_matched(self) -> t.List[exp.When]: 5210 whens = [] 5211 5212 while self._match(TokenType.WHEN): 5213 matched = not self._match(TokenType.NOT) 5214 self._match_text_seq("MATCHED") 5215 source = ( 5216 False 5217 if self._match_text_seq("BY", "TARGET") 5218 else self._match_text_seq("BY", "SOURCE") 5219 ) 5220 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5221 5222 self._match(TokenType.THEN) 5223 5224 if self._match(TokenType.INSERT): 5225 _this = self._parse_star() 5226 if _this: 5227 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5228 else: 5229 then = self.expression( 5230 exp.Insert, 5231 this=self._parse_value(), 5232 expression=self._match(TokenType.VALUES) and self._parse_value(), 5233 ) 5234 elif self._match(TokenType.UPDATE): 5235 expressions = self._parse_star() 5236 if expressions: 5237 then = self.expression(exp.Update, expressions=expressions) 5238 else: 5239 then = self.expression( 5240 exp.Update, 5241 expressions=self._match(TokenType.SET) 5242 and self._parse_csv(self._parse_equality), 5243 ) 5244 elif self._match(TokenType.DELETE): 5245 then = self.expression(exp.Var, this=self._prev.text) 5246 else: 5247 then = None 5248 5249 whens.append( 5250 self.expression( 5251 exp.When, 5252 matched=matched, 5253 source=source, 5254 condition=condition, 5255 then=then, 5256 ) 5257 ) 5258 return whens 5259 5260 def _parse_show(self) -> t.Optional[exp.Expression]: 5261 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5262 if parser: 5263 return parser(self) 5264 return self._parse_as_command(self._prev) 5265 5266 def _parse_set_item_assignment( 5267 self, kind: t.Optional[str] = None 5268 ) -> t.Optional[exp.Expression]: 5269 index = self._index 5270 5271 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5272 return self._parse_set_transaction(global_=kind == "GLOBAL") 5273 5274 left = self._parse_primary() or self._parse_id_var() 5275 assignment_delimiter = self._match_texts(("=", "TO")) 5276 5277 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5278 self._retreat(index) 5279 return None 5280 5281 right = self._parse_statement() or self._parse_id_var() 5282 this = self.expression(exp.EQ, this=left, expression=right) 5283 5284 return self.expression(exp.SetItem, this=this, kind=kind) 5285 5286 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5287 self._match_text_seq("TRANSACTION") 5288 characteristics = self._parse_csv( 5289 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5290 ) 5291 return self.expression( 5292 exp.SetItem, 5293 expressions=characteristics, 5294 kind="TRANSACTION", 5295 **{"global": global_}, # type: ignore 5296 ) 5297 5298 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5299 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5300 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5301 5302 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5303 index = self._index 5304 set_ = self.expression( 5305 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5306 ) 5307 5308 if self._curr: 5309 self._retreat(index) 5310 return self._parse_as_command(self._prev) 5311 5312 return set_ 5313 5314 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5315 for option in options: 5316 if self._match_text_seq(*option.split(" ")): 5317 return exp.var(option) 5318 return None 5319 5320 def _parse_as_command(self, start: Token) -> exp.Command: 5321 while self._curr: 5322 self._advance() 5323 text = self._find_sql(start, self._prev) 5324 size = len(start.text) 5325 return exp.Command(this=text[:size], expression=text[size:]) 5326 5327 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5328 settings = [] 5329 5330 self._match_l_paren() 5331 kind = self._parse_id_var() 5332 5333 if self._match(TokenType.L_PAREN): 5334 while True: 5335 key = self._parse_id_var() 5336 value = self._parse_primary() 5337 5338 if not key and value is None: 5339 break 5340 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5341 self._match(TokenType.R_PAREN) 5342 5343 self._match_r_paren() 5344 5345 return self.expression( 5346 exp.DictProperty, 5347 this=this, 5348 kind=kind.this if kind else None, 5349 settings=settings, 5350 ) 5351 5352 def _parse_dict_range(self, this: str) -> exp.DictRange: 5353 self._match_l_paren() 5354 has_min = self._match_text_seq("MIN") 5355 if has_min: 5356 min = self._parse_var() or self._parse_primary() 5357 self._match_text_seq("MAX") 5358 max = self._parse_var() or self._parse_primary() 5359 else: 5360 max = self._parse_var() or self._parse_primary() 5361 min = exp.Literal.number(0) 5362 self._match_r_paren() 5363 return self.expression(exp.DictRange, this=this, min=min, max=max) 5364 5365 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5366 index = self._index 5367 expression = self._parse_column() 5368 if not self._match(TokenType.IN): 5369 self._retreat(index - 1) 5370 return None 5371 iterator = self._parse_column() 5372 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5373 return self.expression( 5374 exp.Comprehension, 5375 this=this, 5376 expression=expression, 5377 iterator=iterator, 5378 condition=condition, 5379 ) 5380 5381 def _find_parser( 5382 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5383 ) -> t.Optional[t.Callable]: 5384 if not self._curr: 5385 return None 5386 5387 index = self._index 5388 this = [] 5389 while True: 5390 # The current token might be multiple words 5391 curr = self._curr.text.upper() 5392 key = curr.split(" ") 5393 this.append(curr) 5394 5395 self._advance() 5396 result, trie = in_trie(trie, key) 5397 if result == TrieResult.FAILED: 5398 break 5399 5400 if result == TrieResult.EXISTS: 5401 subparser = parsers[" ".join(this)] 5402 return subparser 5403 5404 self._retreat(index) 5405 return None 5406 5407 def _match(self, token_type, advance=True, expression=None): 5408 if not self._curr: 5409 return None 5410 5411 if self._curr.token_type == token_type: 5412 if advance: 5413 self._advance() 5414 self._add_comments(expression) 5415 return True 5416 5417 return None 5418 5419 def _match_set(self, types, advance=True): 5420 if not self._curr: 5421 return None 5422 5423 if self._curr.token_type in types: 5424 if advance: 5425 self._advance() 5426 return True 5427 5428 return None 5429 5430 def _match_pair(self, token_type_a, token_type_b, advance=True): 5431 if not self._curr or not self._next: 5432 return None 5433 5434 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5435 if advance: 5436 self._advance(2) 5437 return True 5438 5439 return None 5440 5441 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5442 if not self._match(TokenType.L_PAREN, expression=expression): 5443 self.raise_error("Expecting (") 5444 5445 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5446 if not self._match(TokenType.R_PAREN, expression=expression): 5447 self.raise_error("Expecting )") 5448 5449 def _match_texts(self, texts, advance=True): 5450 if self._curr and self._curr.text.upper() in texts: 5451 if advance: 5452 self._advance() 5453 return True 5454 return False 5455 5456 def _match_text_seq(self, *texts, advance=True): 5457 index = self._index 5458 for text in texts: 5459 if self._curr and self._curr.text.upper() == text: 5460 self._advance() 5461 else: 5462 self._retreat(index) 5463 return False 5464 5465 if not advance: 5466 self._retreat(index) 5467 5468 return True 5469 5470 @t.overload 5471 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5472 ... 5473 5474 @t.overload 5475 def _replace_columns_with_dots( 5476 self, this: t.Optional[exp.Expression] 5477 ) -> t.Optional[exp.Expression]: 5478 ... 5479 5480 def _replace_columns_with_dots(self, this): 5481 if isinstance(this, exp.Dot): 5482 exp.replace_children(this, self._replace_columns_with_dots) 5483 elif isinstance(this, exp.Column): 5484 exp.replace_children(this, self._replace_columns_with_dots) 5485 table = this.args.get("table") 5486 this = ( 5487 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5488 ) 5489 5490 return this 5491 5492 def _replace_lambda( 5493 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5494 ) -> t.Optional[exp.Expression]: 5495 if not node: 5496 return node 5497 5498 for column in node.find_all(exp.Column): 5499 if column.parts[0].name in lambda_variables: 5500 dot_or_id = column.to_dot() if column.table else column.this 5501 parent = column.parent 5502 5503 while isinstance(parent, exp.Dot): 5504 if not isinstance(parent.parent, exp.Dot): 5505 parent.replace(dot_or_id) 5506 break 5507 parent = parent.parent 5508 else: 5509 if column is node: 5510 node = dot_or_id 5511 else: 5512 column.replace(dot_or_id) 5513 return node 5514 5515 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5516 return [ 5517 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5518 for value in values 5519 if value 5520 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
971 def __init__( 972 self, 973 error_level: t.Optional[ErrorLevel] = None, 974 error_message_context: int = 100, 975 max_errors: int = 3, 976 ): 977 self.error_level = error_level or ErrorLevel.IMMEDIATE 978 self.error_message_context = error_message_context 979 self.max_errors = max_errors 980 self._tokenizer = self.TOKENIZER_CLASS() 981 self.reset()
993 def parse( 994 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 995 ) -> t.List[t.Optional[exp.Expression]]: 996 """ 997 Parses a list of tokens and returns a list of syntax trees, one tree 998 per parsed SQL statement. 999 1000 Args: 1001 raw_tokens: The list of tokens. 1002 sql: The original SQL string, used to produce helpful debug messages. 1003 1004 Returns: 1005 The list of the produced syntax trees. 1006 """ 1007 return self._parse( 1008 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1009 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1011 def parse_into( 1012 self, 1013 expression_types: exp.IntoType, 1014 raw_tokens: t.List[Token], 1015 sql: t.Optional[str] = None, 1016 ) -> t.List[t.Optional[exp.Expression]]: 1017 """ 1018 Parses a list of tokens into a given Expression type. If a collection of Expression 1019 types is given instead, this method will try to parse the token list into each one 1020 of them, stopping at the first for which the parsing succeeds. 1021 1022 Args: 1023 expression_types: The expression type(s) to try and parse the token list into. 1024 raw_tokens: The list of tokens. 1025 sql: The original SQL string, used to produce helpful debug messages. 1026 1027 Returns: 1028 The target Expression. 1029 """ 1030 errors = [] 1031 for expression_type in ensure_list(expression_types): 1032 parser = self.EXPRESSION_PARSERS.get(expression_type) 1033 if not parser: 1034 raise TypeError(f"No parser registered for {expression_type}") 1035 1036 try: 1037 return self._parse(parser, raw_tokens, sql) 1038 except ParseError as e: 1039 e.errors[0]["into_expression"] = expression_type 1040 errors.append(e) 1041 1042 raise ParseError( 1043 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1044 errors=merge_errors(errors), 1045 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1082 def check_errors(self) -> None: 1083 """Logs or raises any found errors, depending on the chosen error level setting.""" 1084 if self.error_level == ErrorLevel.WARN: 1085 for error in self.errors: 1086 logger.error(str(error)) 1087 elif self.error_level == ErrorLevel.RAISE and self.errors: 1088 raise ParseError( 1089 concat_messages(self.errors, self.max_errors), 1090 errors=merge_errors(self.errors), 1091 )
Logs or raises any found errors, depending on the chosen error level setting.
1093 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1094 """ 1095 Appends an error in the list of recorded errors or raises it, depending on the chosen 1096 error level setting. 1097 """ 1098 token = token or self._curr or self._prev or Token.string("") 1099 start = token.start 1100 end = token.end + 1 1101 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1102 highlight = self.sql[start:end] 1103 end_context = self.sql[end : end + self.error_message_context] 1104 1105 error = ParseError.new( 1106 f"{message}. Line {token.line}, Col: {token.col}.\n" 1107 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1108 description=message, 1109 line=token.line, 1110 col=token.col, 1111 start_context=start_context, 1112 highlight=highlight, 1113 end_context=end_context, 1114 ) 1115 1116 if self.error_level == ErrorLevel.IMMEDIATE: 1117 raise error 1118 1119 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1121 def expression( 1122 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1123 ) -> E: 1124 """ 1125 Creates a new, validated Expression. 1126 1127 Args: 1128 exp_class: The expression class to instantiate. 1129 comments: An optional list of comments to attach to the expression. 1130 kwargs: The arguments to set for the expression along with their respective values. 1131 1132 Returns: 1133 The target expression. 1134 """ 1135 instance = exp_class(**kwargs) 1136 instance.add_comments(comments) if comments else self._add_comments(instance) 1137 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1144 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1145 """ 1146 Validates an Expression, making sure that all its mandatory arguments are set. 1147 1148 Args: 1149 expression: The expression to validate. 1150 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1151 1152 Returns: 1153 The validated expression. 1154 """ 1155 if self.error_level != ErrorLevel.IGNORE: 1156 for error_message in expression.error_messages(args): 1157 self.raise_error(error_message) 1158 1159 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.