sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.FUNCTION, 249 TokenType.INDEX, 250 TokenType.PROCEDURE, 251 *DB_CREATABLES, 252 } 253 254 # Tokens that can represent identifiers 255 ID_VAR_TOKENS = { 256 TokenType.VAR, 257 TokenType.ANTI, 258 TokenType.APPLY, 259 TokenType.ASC, 260 TokenType.AUTO_INCREMENT, 261 TokenType.BEGIN, 262 TokenType.CACHE, 263 TokenType.CASE, 264 TokenType.COLLATE, 265 TokenType.COMMAND, 266 TokenType.COMMENT, 267 TokenType.COMMIT, 268 TokenType.CONSTRAINT, 269 TokenType.DEFAULT, 270 TokenType.DELETE, 271 TokenType.DESC, 272 TokenType.DESCRIBE, 273 TokenType.DICTIONARY, 274 TokenType.DIV, 275 TokenType.END, 276 TokenType.EXECUTE, 277 TokenType.ESCAPE, 278 TokenType.FALSE, 279 TokenType.FIRST, 280 TokenType.FILTER, 281 TokenType.FORMAT, 282 TokenType.FULL, 283 TokenType.IS, 284 TokenType.ISNULL, 285 TokenType.INTERVAL, 286 TokenType.KEEP, 287 TokenType.KILL, 288 TokenType.LEFT, 289 TokenType.LOAD, 290 TokenType.MERGE, 291 TokenType.NATURAL, 292 TokenType.NEXT, 293 TokenType.OFFSET, 294 TokenType.ORDINALITY, 295 TokenType.OVERLAPS, 296 TokenType.OVERWRITE, 297 TokenType.PARTITION, 298 TokenType.PERCENT, 299 TokenType.PIVOT, 300 TokenType.PRAGMA, 301 TokenType.RANGE, 302 TokenType.REFERENCES, 303 TokenType.RIGHT, 304 TokenType.ROW, 305 TokenType.ROWS, 306 TokenType.SEMI, 307 TokenType.SET, 308 TokenType.SETTINGS, 309 TokenType.SHOW, 310 TokenType.TEMPORARY, 311 TokenType.TOP, 312 TokenType.TRUE, 313 TokenType.UNIQUE, 314 TokenType.UNPIVOT, 315 TokenType.UPDATE, 316 TokenType.USE, 317 TokenType.VOLATILE, 318 TokenType.WINDOW, 319 *CREATABLES, 320 *SUBQUERY_PREDICATES, 321 *TYPE_TOKENS, 322 *NO_PAREN_FUNCTIONS, 323 } 324 325 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 326 327 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 328 TokenType.ANTI, 329 TokenType.APPLY, 330 TokenType.ASOF, 331 TokenType.FULL, 332 TokenType.LEFT, 333 TokenType.LOCK, 334 TokenType.NATURAL, 335 TokenType.OFFSET, 336 TokenType.RIGHT, 337 TokenType.SEMI, 338 TokenType.WINDOW, 339 } 340 341 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 342 343 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 344 345 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 346 347 FUNC_TOKENS = { 348 TokenType.COLLATE, 349 TokenType.COMMAND, 350 TokenType.CURRENT_DATE, 351 TokenType.CURRENT_DATETIME, 352 TokenType.CURRENT_TIMESTAMP, 353 TokenType.CURRENT_TIME, 354 TokenType.CURRENT_USER, 355 TokenType.FILTER, 356 TokenType.FIRST, 357 TokenType.FORMAT, 358 TokenType.GLOB, 359 TokenType.IDENTIFIER, 360 TokenType.INDEX, 361 TokenType.ISNULL, 362 TokenType.ILIKE, 363 TokenType.INSERT, 364 TokenType.LIKE, 365 TokenType.MERGE, 366 TokenType.OFFSET, 367 TokenType.PRIMARY_KEY, 368 TokenType.RANGE, 369 TokenType.REPLACE, 370 TokenType.RLIKE, 371 TokenType.ROW, 372 TokenType.UNNEST, 373 TokenType.VAR, 374 TokenType.LEFT, 375 TokenType.RIGHT, 376 TokenType.DATE, 377 TokenType.DATETIME, 378 TokenType.TABLE, 379 TokenType.TIMESTAMP, 380 TokenType.TIMESTAMPTZ, 381 TokenType.WINDOW, 382 TokenType.XOR, 383 *TYPE_TOKENS, 384 *SUBQUERY_PREDICATES, 385 } 386 387 CONJUNCTION = { 388 TokenType.AND: exp.And, 389 TokenType.OR: exp.Or, 390 } 391 392 EQUALITY = { 393 TokenType.EQ: exp.EQ, 394 TokenType.NEQ: exp.NEQ, 395 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 396 } 397 398 COMPARISON = { 399 TokenType.GT: exp.GT, 400 TokenType.GTE: exp.GTE, 401 TokenType.LT: exp.LT, 402 TokenType.LTE: exp.LTE, 403 } 404 405 BITWISE = { 406 TokenType.AMP: exp.BitwiseAnd, 407 TokenType.CARET: exp.BitwiseXor, 408 TokenType.PIPE: exp.BitwiseOr, 409 TokenType.DPIPE: exp.DPipe, 410 } 411 412 TERM = { 413 TokenType.DASH: exp.Sub, 414 TokenType.PLUS: exp.Add, 415 TokenType.MOD: exp.Mod, 416 TokenType.COLLATE: exp.Collate, 417 } 418 419 FACTOR = { 420 TokenType.DIV: exp.IntDiv, 421 TokenType.LR_ARROW: exp.Distance, 422 TokenType.SLASH: exp.Div, 423 TokenType.STAR: exp.Mul, 424 } 425 426 TIMES = { 427 TokenType.TIME, 428 TokenType.TIMETZ, 429 } 430 431 TIMESTAMPS = { 432 TokenType.TIMESTAMP, 433 TokenType.TIMESTAMPTZ, 434 TokenType.TIMESTAMPLTZ, 435 *TIMES, 436 } 437 438 SET_OPERATIONS = { 439 TokenType.UNION, 440 TokenType.INTERSECT, 441 TokenType.EXCEPT, 442 } 443 444 JOIN_METHODS = { 445 TokenType.NATURAL, 446 TokenType.ASOF, 447 } 448 449 JOIN_SIDES = { 450 TokenType.LEFT, 451 TokenType.RIGHT, 452 TokenType.FULL, 453 } 454 455 JOIN_KINDS = { 456 TokenType.INNER, 457 TokenType.OUTER, 458 TokenType.CROSS, 459 TokenType.SEMI, 460 TokenType.ANTI, 461 } 462 463 JOIN_HINTS: t.Set[str] = set() 464 465 LAMBDAS = { 466 TokenType.ARROW: lambda self, expressions: self.expression( 467 exp.Lambda, 468 this=self._replace_lambda( 469 self._parse_conjunction(), 470 {node.name for node in expressions}, 471 ), 472 expressions=expressions, 473 ), 474 TokenType.FARROW: lambda self, expressions: self.expression( 475 exp.Kwarg, 476 this=exp.var(expressions[0].name), 477 expression=self._parse_conjunction(), 478 ), 479 } 480 481 COLUMN_OPERATORS = { 482 TokenType.DOT: None, 483 TokenType.DCOLON: lambda self, this, to: self.expression( 484 exp.Cast if self.STRICT_CAST else exp.TryCast, 485 this=this, 486 to=to, 487 ), 488 TokenType.ARROW: lambda self, this, path: self.expression( 489 exp.JSONExtract, 490 this=this, 491 expression=path, 492 ), 493 TokenType.DARROW: lambda self, this, path: self.expression( 494 exp.JSONExtractScalar, 495 this=this, 496 expression=path, 497 ), 498 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 499 exp.JSONBExtract, 500 this=this, 501 expression=path, 502 ), 503 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 504 exp.JSONBExtractScalar, 505 this=this, 506 expression=path, 507 ), 508 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 509 exp.JSONBContains, 510 this=this, 511 expression=key, 512 ), 513 } 514 515 EXPRESSION_PARSERS = { 516 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 517 exp.Column: lambda self: self._parse_column(), 518 exp.Condition: lambda self: self._parse_conjunction(), 519 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 520 exp.Expression: lambda self: self._parse_statement(), 521 exp.From: lambda self: self._parse_from(), 522 exp.Group: lambda self: self._parse_group(), 523 exp.Having: lambda self: self._parse_having(), 524 exp.Identifier: lambda self: self._parse_id_var(), 525 exp.Join: lambda self: self._parse_join(), 526 exp.Lambda: lambda self: self._parse_lambda(), 527 exp.Lateral: lambda self: self._parse_lateral(), 528 exp.Limit: lambda self: self._parse_limit(), 529 exp.Offset: lambda self: self._parse_offset(), 530 exp.Order: lambda self: self._parse_order(), 531 exp.Ordered: lambda self: self._parse_ordered(), 532 exp.Properties: lambda self: self._parse_properties(), 533 exp.Qualify: lambda self: self._parse_qualify(), 534 exp.Returning: lambda self: self._parse_returning(), 535 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 536 exp.Table: lambda self: self._parse_table_parts(), 537 exp.TableAlias: lambda self: self._parse_table_alias(), 538 exp.Where: lambda self: self._parse_where(), 539 exp.Window: lambda self: self._parse_named_window(), 540 exp.With: lambda self: self._parse_with(), 541 "JOIN_TYPE": lambda self: self._parse_join_parts(), 542 } 543 544 STATEMENT_PARSERS = { 545 TokenType.ALTER: lambda self: self._parse_alter(), 546 TokenType.BEGIN: lambda self: self._parse_transaction(), 547 TokenType.CACHE: lambda self: self._parse_cache(), 548 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 549 TokenType.COMMENT: lambda self: self._parse_comment(), 550 TokenType.CREATE: lambda self: self._parse_create(), 551 TokenType.DELETE: lambda self: self._parse_delete(), 552 TokenType.DESC: lambda self: self._parse_describe(), 553 TokenType.DESCRIBE: lambda self: self._parse_describe(), 554 TokenType.DROP: lambda self: self._parse_drop(), 555 TokenType.INSERT: lambda self: self._parse_insert(), 556 TokenType.KILL: lambda self: self._parse_kill(), 557 TokenType.LOAD: lambda self: self._parse_load(), 558 TokenType.MERGE: lambda self: self._parse_merge(), 559 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 560 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 561 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 562 TokenType.SET: lambda self: self._parse_set(), 563 TokenType.UNCACHE: lambda self: self._parse_uncache(), 564 TokenType.UPDATE: lambda self: self._parse_update(), 565 TokenType.USE: lambda self: self.expression( 566 exp.Use, 567 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 568 and exp.var(self._prev.text), 569 this=self._parse_table(schema=False), 570 ), 571 } 572 573 UNARY_PARSERS = { 574 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 575 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 576 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 577 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 578 } 579 580 PRIMARY_PARSERS = { 581 TokenType.STRING: lambda self, token: self.expression( 582 exp.Literal, this=token.text, is_string=True 583 ), 584 TokenType.NUMBER: lambda self, token: self.expression( 585 exp.Literal, this=token.text, is_string=False 586 ), 587 TokenType.STAR: lambda self, _: self.expression( 588 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 589 ), 590 TokenType.NULL: lambda self, _: self.expression(exp.Null), 591 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 592 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 593 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 594 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 595 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 596 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 597 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 598 exp.National, this=token.text 599 ), 600 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 601 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 602 exp.RawString, this=token.text 603 ), 604 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 605 } 606 607 PLACEHOLDER_PARSERS = { 608 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 609 TokenType.PARAMETER: lambda self: self._parse_parameter(), 610 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 611 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 612 else None, 613 } 614 615 RANGE_PARSERS = { 616 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 617 TokenType.GLOB: binary_range_parser(exp.Glob), 618 TokenType.ILIKE: binary_range_parser(exp.ILike), 619 TokenType.IN: lambda self, this: self._parse_in(this), 620 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 621 TokenType.IS: lambda self, this: self._parse_is(this), 622 TokenType.LIKE: binary_range_parser(exp.Like), 623 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 624 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 625 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 626 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 627 } 628 629 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 630 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 631 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 632 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 633 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 634 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 635 "CHECKSUM": lambda self: self._parse_checksum(), 636 "CLUSTER BY": lambda self: self._parse_cluster(), 637 "CLUSTERED": lambda self: self._parse_clustered_by(), 638 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 639 exp.CollateProperty, **kwargs 640 ), 641 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 642 "COPY": lambda self: self._parse_copy_property(), 643 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 644 "DEFINER": lambda self: self._parse_definer(), 645 "DETERMINISTIC": lambda self: self.expression( 646 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 647 ), 648 "DISTKEY": lambda self: self._parse_distkey(), 649 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 650 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 651 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 652 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 653 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 654 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 655 "FREESPACE": lambda self: self._parse_freespace(), 656 "HEAP": lambda self: self.expression(exp.HeapProperty), 657 "IMMUTABLE": lambda self: self.expression( 658 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 659 ), 660 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 661 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 662 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 663 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 664 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 665 "LIKE": lambda self: self._parse_create_like(), 666 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 667 "LOCK": lambda self: self._parse_locking(), 668 "LOCKING": lambda self: self._parse_locking(), 669 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 670 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 671 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 672 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 673 "NO": lambda self: self._parse_no_property(), 674 "ON": lambda self: self._parse_on_property(), 675 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 676 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 677 "PARTITION": lambda self: self._parse_partitioned_of(), 678 "PARTITION BY": lambda self: self._parse_partitioned_by(), 679 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 680 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 681 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 682 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 683 "REMOTE": lambda self: self._parse_remote_with_connection(), 684 "RETURNS": lambda self: self._parse_returns(), 685 "ROW": lambda self: self._parse_row(), 686 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 687 "SAMPLE": lambda self: self.expression( 688 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 689 ), 690 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 691 "SETTINGS": lambda self: self.expression( 692 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 693 ), 694 "SORTKEY": lambda self: self._parse_sortkey(), 695 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 696 "STABLE": lambda self: self.expression( 697 exp.StabilityProperty, this=exp.Literal.string("STABLE") 698 ), 699 "STORED": lambda self: self._parse_stored(), 700 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 701 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 702 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 703 "TO": lambda self: self._parse_to_table(), 704 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 705 "TRANSFORM": lambda self: self.expression( 706 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 707 ), 708 "TTL": lambda self: self._parse_ttl(), 709 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 710 "VOLATILE": lambda self: self._parse_volatile_property(), 711 "WITH": lambda self: self._parse_with_property(), 712 } 713 714 CONSTRAINT_PARSERS = { 715 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 716 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 717 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 718 "CHARACTER SET": lambda self: self.expression( 719 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 720 ), 721 "CHECK": lambda self: self.expression( 722 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 723 ), 724 "COLLATE": lambda self: self.expression( 725 exp.CollateColumnConstraint, this=self._parse_var() 726 ), 727 "COMMENT": lambda self: self.expression( 728 exp.CommentColumnConstraint, this=self._parse_string() 729 ), 730 "COMPRESS": lambda self: self._parse_compress(), 731 "CLUSTERED": lambda self: self.expression( 732 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 733 ), 734 "NONCLUSTERED": lambda self: self.expression( 735 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 736 ), 737 "DEFAULT": lambda self: self.expression( 738 exp.DefaultColumnConstraint, this=self._parse_bitwise() 739 ), 740 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 741 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 742 "FORMAT": lambda self: self.expression( 743 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 744 ), 745 "GENERATED": lambda self: self._parse_generated_as_identity(), 746 "IDENTITY": lambda self: self._parse_auto_increment(), 747 "INLINE": lambda self: self._parse_inline(), 748 "LIKE": lambda self: self._parse_create_like(), 749 "NOT": lambda self: self._parse_not_constraint(), 750 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 751 "ON": lambda self: ( 752 self._match(TokenType.UPDATE) 753 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 754 ) 755 or self.expression(exp.OnProperty, this=self._parse_id_var()), 756 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 757 "PRIMARY KEY": lambda self: self._parse_primary_key(), 758 "REFERENCES": lambda self: self._parse_references(match=False), 759 "TITLE": lambda self: self.expression( 760 exp.TitleColumnConstraint, this=self._parse_var_or_string() 761 ), 762 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 763 "UNIQUE": lambda self: self._parse_unique(), 764 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 765 "WITH": lambda self: self.expression( 766 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 767 ), 768 } 769 770 ALTER_PARSERS = { 771 "ADD": lambda self: self._parse_alter_table_add(), 772 "ALTER": lambda self: self._parse_alter_table_alter(), 773 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 774 "DROP": lambda self: self._parse_alter_table_drop(), 775 "RENAME": lambda self: self._parse_alter_table_rename(), 776 } 777 778 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 779 780 NO_PAREN_FUNCTION_PARSERS = { 781 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 782 "CASE": lambda self: self._parse_case(), 783 "IF": lambda self: self._parse_if(), 784 "NEXT": lambda self: self._parse_next_value_for(), 785 } 786 787 INVALID_FUNC_NAME_TOKENS = { 788 TokenType.IDENTIFIER, 789 TokenType.STRING, 790 } 791 792 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 793 794 FUNCTION_PARSERS = { 795 "ANY_VALUE": lambda self: self._parse_any_value(), 796 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 797 "CONCAT": lambda self: self._parse_concat(), 798 "CONCAT_WS": lambda self: self._parse_concat_ws(), 799 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 800 "DECODE": lambda self: self._parse_decode(), 801 "EXTRACT": lambda self: self._parse_extract(), 802 "JSON_OBJECT": lambda self: self._parse_json_object(), 803 "JSON_TABLE": lambda self: self._parse_json_table(), 804 "LOG": lambda self: self._parse_logarithm(), 805 "MATCH": lambda self: self._parse_match_against(), 806 "OPENJSON": lambda self: self._parse_open_json(), 807 "POSITION": lambda self: self._parse_position(), 808 "PREDICT": lambda self: self._parse_predict(), 809 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 810 "STRING_AGG": lambda self: self._parse_string_agg(), 811 "SUBSTRING": lambda self: self._parse_substring(), 812 "TRIM": lambda self: self._parse_trim(), 813 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 814 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 815 } 816 817 QUERY_MODIFIER_PARSERS = { 818 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 819 TokenType.WHERE: lambda self: ("where", self._parse_where()), 820 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 821 TokenType.HAVING: lambda self: ("having", self._parse_having()), 822 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 823 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 824 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 825 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 826 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 827 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 828 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 829 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 830 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 831 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 832 TokenType.CLUSTER_BY: lambda self: ( 833 "cluster", 834 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 835 ), 836 TokenType.DISTRIBUTE_BY: lambda self: ( 837 "distribute", 838 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 839 ), 840 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 841 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 842 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 843 } 844 845 SET_PARSERS = { 846 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 847 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 848 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 849 "TRANSACTION": lambda self: self._parse_set_transaction(), 850 } 851 852 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 853 854 TYPE_LITERAL_PARSERS = { 855 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 856 } 857 858 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 859 860 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 861 862 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 863 864 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 865 TRANSACTION_CHARACTERISTICS = { 866 "ISOLATION LEVEL REPEATABLE READ", 867 "ISOLATION LEVEL READ COMMITTED", 868 "ISOLATION LEVEL READ UNCOMMITTED", 869 "ISOLATION LEVEL SERIALIZABLE", 870 "READ WRITE", 871 "READ ONLY", 872 } 873 874 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 875 876 CLONE_KEYWORDS = {"CLONE", "COPY"} 877 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 878 879 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 880 881 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 882 883 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 884 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 885 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 886 887 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 888 889 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 890 891 DISTINCT_TOKENS = {TokenType.DISTINCT} 892 893 NULL_TOKENS = {TokenType.NULL} 894 895 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 896 897 STRICT_CAST = True 898 899 # A NULL arg in CONCAT yields NULL by default 900 CONCAT_NULL_OUTPUTS_STRING = False 901 902 PREFIXED_PIVOT_COLUMNS = False 903 IDENTIFY_PIVOT_STRINGS = False 904 905 LOG_BASE_FIRST = True 906 LOG_DEFAULTS_TO_LN = False 907 908 # Whether or not ADD is present for each column added by ALTER TABLE 909 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 910 911 # Whether or not the table sample clause expects CSV syntax 912 TABLESAMPLE_CSV = False 913 914 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 915 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 916 917 # Whether the TRIM function expects the characters to trim as its first argument 918 TRIM_PATTERN_FIRST = False 919 920 __slots__ = ( 921 "error_level", 922 "error_message_context", 923 "max_errors", 924 "sql", 925 "errors", 926 "_tokens", 927 "_index", 928 "_curr", 929 "_next", 930 "_prev", 931 "_prev_comments", 932 "_tokenizer", 933 ) 934 935 # Autofilled 936 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 937 INDEX_OFFSET: int = 0 938 UNNEST_COLUMN_ONLY: bool = False 939 ALIAS_POST_TABLESAMPLE: bool = False 940 STRICT_STRING_CONCAT = False 941 SUPPORTS_USER_DEFINED_TYPES = True 942 NORMALIZE_FUNCTIONS = "upper" 943 NULL_ORDERING: str = "nulls_are_small" 944 SHOW_TRIE: t.Dict = {} 945 SET_TRIE: t.Dict = {} 946 FORMAT_MAPPING: t.Dict[str, str] = {} 947 FORMAT_TRIE: t.Dict = {} 948 TIME_MAPPING: t.Dict[str, str] = {} 949 TIME_TRIE: t.Dict = {} 950 951 def __init__( 952 self, 953 error_level: t.Optional[ErrorLevel] = None, 954 error_message_context: int = 100, 955 max_errors: int = 3, 956 ): 957 self.error_level = error_level or ErrorLevel.IMMEDIATE 958 self.error_message_context = error_message_context 959 self.max_errors = max_errors 960 self._tokenizer = self.TOKENIZER_CLASS() 961 self.reset() 962 963 def reset(self): 964 self.sql = "" 965 self.errors = [] 966 self._tokens = [] 967 self._index = 0 968 self._curr = None 969 self._next = None 970 self._prev = None 971 self._prev_comments = None 972 973 def parse( 974 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 975 ) -> t.List[t.Optional[exp.Expression]]: 976 """ 977 Parses a list of tokens and returns a list of syntax trees, one tree 978 per parsed SQL statement. 979 980 Args: 981 raw_tokens: The list of tokens. 982 sql: The original SQL string, used to produce helpful debug messages. 983 984 Returns: 985 The list of the produced syntax trees. 986 """ 987 return self._parse( 988 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 989 ) 990 991 def parse_into( 992 self, 993 expression_types: exp.IntoType, 994 raw_tokens: t.List[Token], 995 sql: t.Optional[str] = None, 996 ) -> t.List[t.Optional[exp.Expression]]: 997 """ 998 Parses a list of tokens into a given Expression type. If a collection of Expression 999 types is given instead, this method will try to parse the token list into each one 1000 of them, stopping at the first for which the parsing succeeds. 1001 1002 Args: 1003 expression_types: The expression type(s) to try and parse the token list into. 1004 raw_tokens: The list of tokens. 1005 sql: The original SQL string, used to produce helpful debug messages. 1006 1007 Returns: 1008 The target Expression. 1009 """ 1010 errors = [] 1011 for expression_type in ensure_list(expression_types): 1012 parser = self.EXPRESSION_PARSERS.get(expression_type) 1013 if not parser: 1014 raise TypeError(f"No parser registered for {expression_type}") 1015 1016 try: 1017 return self._parse(parser, raw_tokens, sql) 1018 except ParseError as e: 1019 e.errors[0]["into_expression"] = expression_type 1020 errors.append(e) 1021 1022 raise ParseError( 1023 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1024 errors=merge_errors(errors), 1025 ) from errors[-1] 1026 1027 def _parse( 1028 self, 1029 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1030 raw_tokens: t.List[Token], 1031 sql: t.Optional[str] = None, 1032 ) -> t.List[t.Optional[exp.Expression]]: 1033 self.reset() 1034 self.sql = sql or "" 1035 1036 total = len(raw_tokens) 1037 chunks: t.List[t.List[Token]] = [[]] 1038 1039 for i, token in enumerate(raw_tokens): 1040 if token.token_type == TokenType.SEMICOLON: 1041 if i < total - 1: 1042 chunks.append([]) 1043 else: 1044 chunks[-1].append(token) 1045 1046 expressions = [] 1047 1048 for tokens in chunks: 1049 self._index = -1 1050 self._tokens = tokens 1051 self._advance() 1052 1053 expressions.append(parse_method(self)) 1054 1055 if self._index < len(self._tokens): 1056 self.raise_error("Invalid expression / Unexpected token") 1057 1058 self.check_errors() 1059 1060 return expressions 1061 1062 def check_errors(self) -> None: 1063 """Logs or raises any found errors, depending on the chosen error level setting.""" 1064 if self.error_level == ErrorLevel.WARN: 1065 for error in self.errors: 1066 logger.error(str(error)) 1067 elif self.error_level == ErrorLevel.RAISE and self.errors: 1068 raise ParseError( 1069 concat_messages(self.errors, self.max_errors), 1070 errors=merge_errors(self.errors), 1071 ) 1072 1073 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1074 """ 1075 Appends an error in the list of recorded errors or raises it, depending on the chosen 1076 error level setting. 1077 """ 1078 token = token or self._curr or self._prev or Token.string("") 1079 start = token.start 1080 end = token.end + 1 1081 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1082 highlight = self.sql[start:end] 1083 end_context = self.sql[end : end + self.error_message_context] 1084 1085 error = ParseError.new( 1086 f"{message}. Line {token.line}, Col: {token.col}.\n" 1087 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1088 description=message, 1089 line=token.line, 1090 col=token.col, 1091 start_context=start_context, 1092 highlight=highlight, 1093 end_context=end_context, 1094 ) 1095 1096 if self.error_level == ErrorLevel.IMMEDIATE: 1097 raise error 1098 1099 self.errors.append(error) 1100 1101 def expression( 1102 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1103 ) -> E: 1104 """ 1105 Creates a new, validated Expression. 1106 1107 Args: 1108 exp_class: The expression class to instantiate. 1109 comments: An optional list of comments to attach to the expression. 1110 kwargs: The arguments to set for the expression along with their respective values. 1111 1112 Returns: 1113 The target expression. 1114 """ 1115 instance = exp_class(**kwargs) 1116 instance.add_comments(comments) if comments else self._add_comments(instance) 1117 return self.validate_expression(instance) 1118 1119 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1120 if expression and self._prev_comments: 1121 expression.add_comments(self._prev_comments) 1122 self._prev_comments = None 1123 1124 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1125 """ 1126 Validates an Expression, making sure that all its mandatory arguments are set. 1127 1128 Args: 1129 expression: The expression to validate. 1130 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1131 1132 Returns: 1133 The validated expression. 1134 """ 1135 if self.error_level != ErrorLevel.IGNORE: 1136 for error_message in expression.error_messages(args): 1137 self.raise_error(error_message) 1138 1139 return expression 1140 1141 def _find_sql(self, start: Token, end: Token) -> str: 1142 return self.sql[start.start : end.end + 1] 1143 1144 def _advance(self, times: int = 1) -> None: 1145 self._index += times 1146 self._curr = seq_get(self._tokens, self._index) 1147 self._next = seq_get(self._tokens, self._index + 1) 1148 1149 if self._index > 0: 1150 self._prev = self._tokens[self._index - 1] 1151 self._prev_comments = self._prev.comments 1152 else: 1153 self._prev = None 1154 self._prev_comments = None 1155 1156 def _retreat(self, index: int) -> None: 1157 if index != self._index: 1158 self._advance(index - self._index) 1159 1160 def _parse_command(self) -> exp.Command: 1161 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1162 1163 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1164 start = self._prev 1165 exists = self._parse_exists() if allow_exists else None 1166 1167 self._match(TokenType.ON) 1168 1169 kind = self._match_set(self.CREATABLES) and self._prev 1170 if not kind: 1171 return self._parse_as_command(start) 1172 1173 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1174 this = self._parse_user_defined_function(kind=kind.token_type) 1175 elif kind.token_type == TokenType.TABLE: 1176 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1177 elif kind.token_type == TokenType.COLUMN: 1178 this = self._parse_column() 1179 else: 1180 this = self._parse_id_var() 1181 1182 self._match(TokenType.IS) 1183 1184 return self.expression( 1185 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1186 ) 1187 1188 def _parse_to_table( 1189 self, 1190 ) -> exp.ToTableProperty: 1191 table = self._parse_table_parts(schema=True) 1192 return self.expression(exp.ToTableProperty, this=table) 1193 1194 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1195 def _parse_ttl(self) -> exp.Expression: 1196 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1197 this = self._parse_bitwise() 1198 1199 if self._match_text_seq("DELETE"): 1200 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1201 if self._match_text_seq("RECOMPRESS"): 1202 return self.expression( 1203 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1204 ) 1205 if self._match_text_seq("TO", "DISK"): 1206 return self.expression( 1207 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1208 ) 1209 if self._match_text_seq("TO", "VOLUME"): 1210 return self.expression( 1211 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1212 ) 1213 1214 return this 1215 1216 expressions = self._parse_csv(_parse_ttl_action) 1217 where = self._parse_where() 1218 group = self._parse_group() 1219 1220 aggregates = None 1221 if group and self._match(TokenType.SET): 1222 aggregates = self._parse_csv(self._parse_set_item) 1223 1224 return self.expression( 1225 exp.MergeTreeTTL, 1226 expressions=expressions, 1227 where=where, 1228 group=group, 1229 aggregates=aggregates, 1230 ) 1231 1232 def _parse_statement(self) -> t.Optional[exp.Expression]: 1233 if self._curr is None: 1234 return None 1235 1236 if self._match_set(self.STATEMENT_PARSERS): 1237 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1238 1239 if self._match_set(Tokenizer.COMMANDS): 1240 return self._parse_command() 1241 1242 expression = self._parse_expression() 1243 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1244 return self._parse_query_modifiers(expression) 1245 1246 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1247 start = self._prev 1248 temporary = self._match(TokenType.TEMPORARY) 1249 materialized = self._match_text_seq("MATERIALIZED") 1250 1251 kind = self._match_set(self.CREATABLES) and self._prev.text 1252 if not kind: 1253 return self._parse_as_command(start) 1254 1255 return self.expression( 1256 exp.Drop, 1257 comments=start.comments, 1258 exists=exists or self._parse_exists(), 1259 this=self._parse_table(schema=True), 1260 kind=kind, 1261 temporary=temporary, 1262 materialized=materialized, 1263 cascade=self._match_text_seq("CASCADE"), 1264 constraints=self._match_text_seq("CONSTRAINTS"), 1265 purge=self._match_text_seq("PURGE"), 1266 ) 1267 1268 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1269 return ( 1270 self._match_text_seq("IF") 1271 and (not not_ or self._match(TokenType.NOT)) 1272 and self._match(TokenType.EXISTS) 1273 ) 1274 1275 def _parse_create(self) -> exp.Create | exp.Command: 1276 # Note: this can't be None because we've matched a statement parser 1277 start = self._prev 1278 comments = self._prev_comments 1279 1280 replace = start.text.upper() == "REPLACE" or self._match_pair( 1281 TokenType.OR, TokenType.REPLACE 1282 ) 1283 unique = self._match(TokenType.UNIQUE) 1284 1285 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1286 self._advance() 1287 1288 properties = None 1289 create_token = self._match_set(self.CREATABLES) and self._prev 1290 1291 if not create_token: 1292 # exp.Properties.Location.POST_CREATE 1293 properties = self._parse_properties() 1294 create_token = self._match_set(self.CREATABLES) and self._prev 1295 1296 if not properties or not create_token: 1297 return self._parse_as_command(start) 1298 1299 exists = self._parse_exists(not_=True) 1300 this = None 1301 expression: t.Optional[exp.Expression] = None 1302 indexes = None 1303 no_schema_binding = None 1304 begin = None 1305 end = None 1306 clone = None 1307 1308 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1309 nonlocal properties 1310 if properties and temp_props: 1311 properties.expressions.extend(temp_props.expressions) 1312 elif temp_props: 1313 properties = temp_props 1314 1315 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1316 this = self._parse_user_defined_function(kind=create_token.token_type) 1317 1318 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1319 extend_props(self._parse_properties()) 1320 1321 self._match(TokenType.ALIAS) 1322 1323 if self._match(TokenType.COMMAND): 1324 expression = self._parse_as_command(self._prev) 1325 else: 1326 begin = self._match(TokenType.BEGIN) 1327 return_ = self._match_text_seq("RETURN") 1328 1329 if self._match(TokenType.STRING, advance=False): 1330 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1331 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1332 expression = self._parse_string() 1333 extend_props(self._parse_properties()) 1334 else: 1335 expression = self._parse_statement() 1336 1337 end = self._match_text_seq("END") 1338 1339 if return_: 1340 expression = self.expression(exp.Return, this=expression) 1341 elif create_token.token_type == TokenType.INDEX: 1342 this = self._parse_index(index=self._parse_id_var()) 1343 elif create_token.token_type in self.DB_CREATABLES: 1344 table_parts = self._parse_table_parts(schema=True) 1345 1346 # exp.Properties.Location.POST_NAME 1347 self._match(TokenType.COMMA) 1348 extend_props(self._parse_properties(before=True)) 1349 1350 this = self._parse_schema(this=table_parts) 1351 1352 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1353 extend_props(self._parse_properties()) 1354 1355 self._match(TokenType.ALIAS) 1356 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1357 # exp.Properties.Location.POST_ALIAS 1358 extend_props(self._parse_properties()) 1359 1360 expression = self._parse_ddl_select() 1361 1362 if create_token.token_type == TokenType.TABLE: 1363 # exp.Properties.Location.POST_EXPRESSION 1364 extend_props(self._parse_properties()) 1365 1366 indexes = [] 1367 while True: 1368 index = self._parse_index() 1369 1370 # exp.Properties.Location.POST_INDEX 1371 extend_props(self._parse_properties()) 1372 1373 if not index: 1374 break 1375 else: 1376 self._match(TokenType.COMMA) 1377 indexes.append(index) 1378 elif create_token.token_type == TokenType.VIEW: 1379 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1380 no_schema_binding = True 1381 1382 shallow = self._match_text_seq("SHALLOW") 1383 1384 if self._match_texts(self.CLONE_KEYWORDS): 1385 copy = self._prev.text.lower() == "copy" 1386 clone = self._parse_table(schema=True) 1387 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1388 clone_kind = ( 1389 self._match(TokenType.L_PAREN) 1390 and self._match_texts(self.CLONE_KINDS) 1391 and self._prev.text.upper() 1392 ) 1393 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1394 self._match(TokenType.R_PAREN) 1395 clone = self.expression( 1396 exp.Clone, 1397 this=clone, 1398 when=when, 1399 kind=clone_kind, 1400 shallow=shallow, 1401 expression=clone_expression, 1402 copy=copy, 1403 ) 1404 1405 return self.expression( 1406 exp.Create, 1407 comments=comments, 1408 this=this, 1409 kind=create_token.text, 1410 replace=replace, 1411 unique=unique, 1412 expression=expression, 1413 exists=exists, 1414 properties=properties, 1415 indexes=indexes, 1416 no_schema_binding=no_schema_binding, 1417 begin=begin, 1418 end=end, 1419 clone=clone, 1420 ) 1421 1422 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1423 # only used for teradata currently 1424 self._match(TokenType.COMMA) 1425 1426 kwargs = { 1427 "no": self._match_text_seq("NO"), 1428 "dual": self._match_text_seq("DUAL"), 1429 "before": self._match_text_seq("BEFORE"), 1430 "default": self._match_text_seq("DEFAULT"), 1431 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1432 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1433 "after": self._match_text_seq("AFTER"), 1434 "minimum": self._match_texts(("MIN", "MINIMUM")), 1435 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1436 } 1437 1438 if self._match_texts(self.PROPERTY_PARSERS): 1439 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1440 try: 1441 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1442 except TypeError: 1443 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1444 1445 return None 1446 1447 def _parse_property(self) -> t.Optional[exp.Expression]: 1448 if self._match_texts(self.PROPERTY_PARSERS): 1449 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1450 1451 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1452 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1453 1454 if self._match_text_seq("COMPOUND", "SORTKEY"): 1455 return self._parse_sortkey(compound=True) 1456 1457 if self._match_text_seq("SQL", "SECURITY"): 1458 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1459 1460 index = self._index 1461 key = self._parse_column() 1462 1463 if not self._match(TokenType.EQ): 1464 self._retreat(index) 1465 return None 1466 1467 return self.expression( 1468 exp.Property, 1469 this=key.to_dot() if isinstance(key, exp.Column) else key, 1470 value=self._parse_column() or self._parse_var(any_token=True), 1471 ) 1472 1473 def _parse_stored(self) -> exp.FileFormatProperty: 1474 self._match(TokenType.ALIAS) 1475 1476 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1477 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1478 1479 return self.expression( 1480 exp.FileFormatProperty, 1481 this=self.expression( 1482 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1483 ) 1484 if input_format or output_format 1485 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1486 ) 1487 1488 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1489 self._match(TokenType.EQ) 1490 self._match(TokenType.ALIAS) 1491 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1492 1493 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1494 properties = [] 1495 while True: 1496 if before: 1497 prop = self._parse_property_before() 1498 else: 1499 prop = self._parse_property() 1500 1501 if not prop: 1502 break 1503 for p in ensure_list(prop): 1504 properties.append(p) 1505 1506 if properties: 1507 return self.expression(exp.Properties, expressions=properties) 1508 1509 return None 1510 1511 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1512 return self.expression( 1513 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1514 ) 1515 1516 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1517 if self._index >= 2: 1518 pre_volatile_token = self._tokens[self._index - 2] 1519 else: 1520 pre_volatile_token = None 1521 1522 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1523 return exp.VolatileProperty() 1524 1525 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1526 1527 def _parse_with_property( 1528 self, 1529 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1530 if self._match(TokenType.L_PAREN, advance=False): 1531 return self._parse_wrapped_csv(self._parse_property) 1532 1533 if self._match_text_seq("JOURNAL"): 1534 return self._parse_withjournaltable() 1535 1536 if self._match_text_seq("DATA"): 1537 return self._parse_withdata(no=False) 1538 elif self._match_text_seq("NO", "DATA"): 1539 return self._parse_withdata(no=True) 1540 1541 if not self._next: 1542 return None 1543 1544 return self._parse_withisolatedloading() 1545 1546 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1547 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1548 self._match(TokenType.EQ) 1549 1550 user = self._parse_id_var() 1551 self._match(TokenType.PARAMETER) 1552 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1553 1554 if not user or not host: 1555 return None 1556 1557 return exp.DefinerProperty(this=f"{user}@{host}") 1558 1559 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1560 self._match(TokenType.TABLE) 1561 self._match(TokenType.EQ) 1562 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1563 1564 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1565 return self.expression(exp.LogProperty, no=no) 1566 1567 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1568 return self.expression(exp.JournalProperty, **kwargs) 1569 1570 def _parse_checksum(self) -> exp.ChecksumProperty: 1571 self._match(TokenType.EQ) 1572 1573 on = None 1574 if self._match(TokenType.ON): 1575 on = True 1576 elif self._match_text_seq("OFF"): 1577 on = False 1578 1579 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1580 1581 def _parse_cluster(self) -> exp.Cluster: 1582 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1583 1584 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1585 self._match_text_seq("BY") 1586 1587 self._match_l_paren() 1588 expressions = self._parse_csv(self._parse_column) 1589 self._match_r_paren() 1590 1591 if self._match_text_seq("SORTED", "BY"): 1592 self._match_l_paren() 1593 sorted_by = self._parse_csv(self._parse_ordered) 1594 self._match_r_paren() 1595 else: 1596 sorted_by = None 1597 1598 self._match(TokenType.INTO) 1599 buckets = self._parse_number() 1600 self._match_text_seq("BUCKETS") 1601 1602 return self.expression( 1603 exp.ClusteredByProperty, 1604 expressions=expressions, 1605 sorted_by=sorted_by, 1606 buckets=buckets, 1607 ) 1608 1609 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1610 if not self._match_text_seq("GRANTS"): 1611 self._retreat(self._index - 1) 1612 return None 1613 1614 return self.expression(exp.CopyGrantsProperty) 1615 1616 def _parse_freespace(self) -> exp.FreespaceProperty: 1617 self._match(TokenType.EQ) 1618 return self.expression( 1619 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1620 ) 1621 1622 def _parse_mergeblockratio( 1623 self, no: bool = False, default: bool = False 1624 ) -> exp.MergeBlockRatioProperty: 1625 if self._match(TokenType.EQ): 1626 return self.expression( 1627 exp.MergeBlockRatioProperty, 1628 this=self._parse_number(), 1629 percent=self._match(TokenType.PERCENT), 1630 ) 1631 1632 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1633 1634 def _parse_datablocksize( 1635 self, 1636 default: t.Optional[bool] = None, 1637 minimum: t.Optional[bool] = None, 1638 maximum: t.Optional[bool] = None, 1639 ) -> exp.DataBlocksizeProperty: 1640 self._match(TokenType.EQ) 1641 size = self._parse_number() 1642 1643 units = None 1644 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1645 units = self._prev.text 1646 1647 return self.expression( 1648 exp.DataBlocksizeProperty, 1649 size=size, 1650 units=units, 1651 default=default, 1652 minimum=minimum, 1653 maximum=maximum, 1654 ) 1655 1656 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1657 self._match(TokenType.EQ) 1658 always = self._match_text_seq("ALWAYS") 1659 manual = self._match_text_seq("MANUAL") 1660 never = self._match_text_seq("NEVER") 1661 default = self._match_text_seq("DEFAULT") 1662 1663 autotemp = None 1664 if self._match_text_seq("AUTOTEMP"): 1665 autotemp = self._parse_schema() 1666 1667 return self.expression( 1668 exp.BlockCompressionProperty, 1669 always=always, 1670 manual=manual, 1671 never=never, 1672 default=default, 1673 autotemp=autotemp, 1674 ) 1675 1676 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1677 no = self._match_text_seq("NO") 1678 concurrent = self._match_text_seq("CONCURRENT") 1679 self._match_text_seq("ISOLATED", "LOADING") 1680 for_all = self._match_text_seq("FOR", "ALL") 1681 for_insert = self._match_text_seq("FOR", "INSERT") 1682 for_none = self._match_text_seq("FOR", "NONE") 1683 return self.expression( 1684 exp.IsolatedLoadingProperty, 1685 no=no, 1686 concurrent=concurrent, 1687 for_all=for_all, 1688 for_insert=for_insert, 1689 for_none=for_none, 1690 ) 1691 1692 def _parse_locking(self) -> exp.LockingProperty: 1693 if self._match(TokenType.TABLE): 1694 kind = "TABLE" 1695 elif self._match(TokenType.VIEW): 1696 kind = "VIEW" 1697 elif self._match(TokenType.ROW): 1698 kind = "ROW" 1699 elif self._match_text_seq("DATABASE"): 1700 kind = "DATABASE" 1701 else: 1702 kind = None 1703 1704 if kind in ("DATABASE", "TABLE", "VIEW"): 1705 this = self._parse_table_parts() 1706 else: 1707 this = None 1708 1709 if self._match(TokenType.FOR): 1710 for_or_in = "FOR" 1711 elif self._match(TokenType.IN): 1712 for_or_in = "IN" 1713 else: 1714 for_or_in = None 1715 1716 if self._match_text_seq("ACCESS"): 1717 lock_type = "ACCESS" 1718 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1719 lock_type = "EXCLUSIVE" 1720 elif self._match_text_seq("SHARE"): 1721 lock_type = "SHARE" 1722 elif self._match_text_seq("READ"): 1723 lock_type = "READ" 1724 elif self._match_text_seq("WRITE"): 1725 lock_type = "WRITE" 1726 elif self._match_text_seq("CHECKSUM"): 1727 lock_type = "CHECKSUM" 1728 else: 1729 lock_type = None 1730 1731 override = self._match_text_seq("OVERRIDE") 1732 1733 return self.expression( 1734 exp.LockingProperty, 1735 this=this, 1736 kind=kind, 1737 for_or_in=for_or_in, 1738 lock_type=lock_type, 1739 override=override, 1740 ) 1741 1742 def _parse_partition_by(self) -> t.List[exp.Expression]: 1743 if self._match(TokenType.PARTITION_BY): 1744 return self._parse_csv(self._parse_conjunction) 1745 return [] 1746 1747 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1748 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1749 if self._match_text_seq("MINVALUE"): 1750 return exp.var("MINVALUE") 1751 if self._match_text_seq("MAXVALUE"): 1752 return exp.var("MAXVALUE") 1753 return self._parse_bitwise() 1754 1755 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1756 expression = None 1757 from_expressions = None 1758 to_expressions = None 1759 1760 if self._match(TokenType.IN): 1761 this = self._parse_wrapped_csv(self._parse_bitwise) 1762 elif self._match(TokenType.FROM): 1763 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1764 self._match_text_seq("TO") 1765 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1766 elif self._match_text_seq("WITH", "(", "MODULUS"): 1767 this = self._parse_number() 1768 self._match_text_seq(",", "REMAINDER") 1769 expression = self._parse_number() 1770 self._match_r_paren() 1771 else: 1772 self.raise_error("Failed to parse partition bound spec.") 1773 1774 return self.expression( 1775 exp.PartitionBoundSpec, 1776 this=this, 1777 expression=expression, 1778 from_expressions=from_expressions, 1779 to_expressions=to_expressions, 1780 ) 1781 1782 # https://www.postgresql.org/docs/current/sql-createtable.html 1783 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1784 if not self._match_text_seq("OF"): 1785 self._retreat(self._index - 1) 1786 return None 1787 1788 this = self._parse_table(schema=True) 1789 1790 if self._match(TokenType.DEFAULT): 1791 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1792 elif self._match_text_seq("FOR", "VALUES"): 1793 expression = self._parse_partition_bound_spec() 1794 else: 1795 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1796 1797 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1798 1799 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1800 self._match(TokenType.EQ) 1801 return self.expression( 1802 exp.PartitionedByProperty, 1803 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1804 ) 1805 1806 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1807 if self._match_text_seq("AND", "STATISTICS"): 1808 statistics = True 1809 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1810 statistics = False 1811 else: 1812 statistics = None 1813 1814 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1815 1816 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1817 if self._match_text_seq("PRIMARY", "INDEX"): 1818 return exp.NoPrimaryIndexProperty() 1819 return None 1820 1821 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1822 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1823 return exp.OnCommitProperty() 1824 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1825 return exp.OnCommitProperty(delete=True) 1826 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1827 1828 def _parse_distkey(self) -> exp.DistKeyProperty: 1829 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1830 1831 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1832 table = self._parse_table(schema=True) 1833 1834 options = [] 1835 while self._match_texts(("INCLUDING", "EXCLUDING")): 1836 this = self._prev.text.upper() 1837 1838 id_var = self._parse_id_var() 1839 if not id_var: 1840 return None 1841 1842 options.append( 1843 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1844 ) 1845 1846 return self.expression(exp.LikeProperty, this=table, expressions=options) 1847 1848 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1849 return self.expression( 1850 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1851 ) 1852 1853 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1854 self._match(TokenType.EQ) 1855 return self.expression( 1856 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1857 ) 1858 1859 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1860 self._match_text_seq("WITH", "CONNECTION") 1861 return self.expression( 1862 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1863 ) 1864 1865 def _parse_returns(self) -> exp.ReturnsProperty: 1866 value: t.Optional[exp.Expression] 1867 is_table = self._match(TokenType.TABLE) 1868 1869 if is_table: 1870 if self._match(TokenType.LT): 1871 value = self.expression( 1872 exp.Schema, 1873 this="TABLE", 1874 expressions=self._parse_csv(self._parse_struct_types), 1875 ) 1876 if not self._match(TokenType.GT): 1877 self.raise_error("Expecting >") 1878 else: 1879 value = self._parse_schema(exp.var("TABLE")) 1880 else: 1881 value = self._parse_types() 1882 1883 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1884 1885 def _parse_describe(self) -> exp.Describe: 1886 kind = self._match_set(self.CREATABLES) and self._prev.text 1887 this = self._parse_table(schema=True) 1888 properties = self._parse_properties() 1889 expressions = properties.expressions if properties else None 1890 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1891 1892 def _parse_insert(self) -> exp.Insert: 1893 comments = ensure_list(self._prev_comments) 1894 overwrite = self._match(TokenType.OVERWRITE) 1895 ignore = self._match(TokenType.IGNORE) 1896 local = self._match_text_seq("LOCAL") 1897 alternative = None 1898 1899 if self._match_text_seq("DIRECTORY"): 1900 this: t.Optional[exp.Expression] = self.expression( 1901 exp.Directory, 1902 this=self._parse_var_or_string(), 1903 local=local, 1904 row_format=self._parse_row_format(match_row=True), 1905 ) 1906 else: 1907 if self._match(TokenType.OR): 1908 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1909 1910 self._match(TokenType.INTO) 1911 comments += ensure_list(self._prev_comments) 1912 self._match(TokenType.TABLE) 1913 this = self._parse_table(schema=True) 1914 1915 returning = self._parse_returning() 1916 1917 return self.expression( 1918 exp.Insert, 1919 comments=comments, 1920 this=this, 1921 by_name=self._match_text_seq("BY", "NAME"), 1922 exists=self._parse_exists(), 1923 partition=self._parse_partition(), 1924 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1925 and self._parse_conjunction(), 1926 expression=self._parse_ddl_select(), 1927 conflict=self._parse_on_conflict(), 1928 returning=returning or self._parse_returning(), 1929 overwrite=overwrite, 1930 alternative=alternative, 1931 ignore=ignore, 1932 ) 1933 1934 def _parse_kill(self) -> exp.Kill: 1935 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1936 1937 return self.expression( 1938 exp.Kill, 1939 this=self._parse_primary(), 1940 kind=kind, 1941 ) 1942 1943 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1944 conflict = self._match_text_seq("ON", "CONFLICT") 1945 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1946 1947 if not conflict and not duplicate: 1948 return None 1949 1950 nothing = None 1951 expressions = None 1952 key = None 1953 constraint = None 1954 1955 if conflict: 1956 if self._match_text_seq("ON", "CONSTRAINT"): 1957 constraint = self._parse_id_var() 1958 else: 1959 key = self._parse_csv(self._parse_value) 1960 1961 self._match_text_seq("DO") 1962 if self._match_text_seq("NOTHING"): 1963 nothing = True 1964 else: 1965 self._match(TokenType.UPDATE) 1966 self._match(TokenType.SET) 1967 expressions = self._parse_csv(self._parse_equality) 1968 1969 return self.expression( 1970 exp.OnConflict, 1971 duplicate=duplicate, 1972 expressions=expressions, 1973 nothing=nothing, 1974 key=key, 1975 constraint=constraint, 1976 ) 1977 1978 def _parse_returning(self) -> t.Optional[exp.Returning]: 1979 if not self._match(TokenType.RETURNING): 1980 return None 1981 return self.expression( 1982 exp.Returning, 1983 expressions=self._parse_csv(self._parse_expression), 1984 into=self._match(TokenType.INTO) and self._parse_table_part(), 1985 ) 1986 1987 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1988 if not self._match(TokenType.FORMAT): 1989 return None 1990 return self._parse_row_format() 1991 1992 def _parse_row_format( 1993 self, match_row: bool = False 1994 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1995 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1996 return None 1997 1998 if self._match_text_seq("SERDE"): 1999 this = self._parse_string() 2000 2001 serde_properties = None 2002 if self._match(TokenType.SERDE_PROPERTIES): 2003 serde_properties = self.expression( 2004 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2005 ) 2006 2007 return self.expression( 2008 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2009 ) 2010 2011 self._match_text_seq("DELIMITED") 2012 2013 kwargs = {} 2014 2015 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2016 kwargs["fields"] = self._parse_string() 2017 if self._match_text_seq("ESCAPED", "BY"): 2018 kwargs["escaped"] = self._parse_string() 2019 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2020 kwargs["collection_items"] = self._parse_string() 2021 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2022 kwargs["map_keys"] = self._parse_string() 2023 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2024 kwargs["lines"] = self._parse_string() 2025 if self._match_text_seq("NULL", "DEFINED", "AS"): 2026 kwargs["null"] = self._parse_string() 2027 2028 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2029 2030 def _parse_load(self) -> exp.LoadData | exp.Command: 2031 if self._match_text_seq("DATA"): 2032 local = self._match_text_seq("LOCAL") 2033 self._match_text_seq("INPATH") 2034 inpath = self._parse_string() 2035 overwrite = self._match(TokenType.OVERWRITE) 2036 self._match_pair(TokenType.INTO, TokenType.TABLE) 2037 2038 return self.expression( 2039 exp.LoadData, 2040 this=self._parse_table(schema=True), 2041 local=local, 2042 overwrite=overwrite, 2043 inpath=inpath, 2044 partition=self._parse_partition(), 2045 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2046 serde=self._match_text_seq("SERDE") and self._parse_string(), 2047 ) 2048 return self._parse_as_command(self._prev) 2049 2050 def _parse_delete(self) -> exp.Delete: 2051 # This handles MySQL's "Multiple-Table Syntax" 2052 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2053 tables = None 2054 comments = self._prev_comments 2055 if not self._match(TokenType.FROM, advance=False): 2056 tables = self._parse_csv(self._parse_table) or None 2057 2058 returning = self._parse_returning() 2059 2060 return self.expression( 2061 exp.Delete, 2062 comments=comments, 2063 tables=tables, 2064 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2065 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2066 where=self._parse_where(), 2067 returning=returning or self._parse_returning(), 2068 limit=self._parse_limit(), 2069 ) 2070 2071 def _parse_update(self) -> exp.Update: 2072 comments = self._prev_comments 2073 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2074 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2075 returning = self._parse_returning() 2076 return self.expression( 2077 exp.Update, 2078 comments=comments, 2079 **{ # type: ignore 2080 "this": this, 2081 "expressions": expressions, 2082 "from": self._parse_from(joins=True), 2083 "where": self._parse_where(), 2084 "returning": returning or self._parse_returning(), 2085 "order": self._parse_order(), 2086 "limit": self._parse_limit(), 2087 }, 2088 ) 2089 2090 def _parse_uncache(self) -> exp.Uncache: 2091 if not self._match(TokenType.TABLE): 2092 self.raise_error("Expecting TABLE after UNCACHE") 2093 2094 return self.expression( 2095 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2096 ) 2097 2098 def _parse_cache(self) -> exp.Cache: 2099 lazy = self._match_text_seq("LAZY") 2100 self._match(TokenType.TABLE) 2101 table = self._parse_table(schema=True) 2102 2103 options = [] 2104 if self._match_text_seq("OPTIONS"): 2105 self._match_l_paren() 2106 k = self._parse_string() 2107 self._match(TokenType.EQ) 2108 v = self._parse_string() 2109 options = [k, v] 2110 self._match_r_paren() 2111 2112 self._match(TokenType.ALIAS) 2113 return self.expression( 2114 exp.Cache, 2115 this=table, 2116 lazy=lazy, 2117 options=options, 2118 expression=self._parse_select(nested=True), 2119 ) 2120 2121 def _parse_partition(self) -> t.Optional[exp.Partition]: 2122 if not self._match(TokenType.PARTITION): 2123 return None 2124 2125 return self.expression( 2126 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2127 ) 2128 2129 def _parse_value(self) -> exp.Tuple: 2130 if self._match(TokenType.L_PAREN): 2131 expressions = self._parse_csv(self._parse_conjunction) 2132 self._match_r_paren() 2133 return self.expression(exp.Tuple, expressions=expressions) 2134 2135 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2136 # https://prestodb.io/docs/current/sql/values.html 2137 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2138 2139 def _parse_projections(self) -> t.List[exp.Expression]: 2140 return self._parse_expressions() 2141 2142 def _parse_select( 2143 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2144 ) -> t.Optional[exp.Expression]: 2145 cte = self._parse_with() 2146 2147 if cte: 2148 this = self._parse_statement() 2149 2150 if not this: 2151 self.raise_error("Failed to parse any statement following CTE") 2152 return cte 2153 2154 if "with" in this.arg_types: 2155 this.set("with", cte) 2156 else: 2157 self.raise_error(f"{this.key} does not support CTE") 2158 this = cte 2159 2160 return this 2161 2162 # duckdb supports leading with FROM x 2163 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2164 2165 if self._match(TokenType.SELECT): 2166 comments = self._prev_comments 2167 2168 hint = self._parse_hint() 2169 all_ = self._match(TokenType.ALL) 2170 distinct = self._match_set(self.DISTINCT_TOKENS) 2171 2172 kind = ( 2173 self._match(TokenType.ALIAS) 2174 and self._match_texts(("STRUCT", "VALUE")) 2175 and self._prev.text 2176 ) 2177 2178 if distinct: 2179 distinct = self.expression( 2180 exp.Distinct, 2181 on=self._parse_value() if self._match(TokenType.ON) else None, 2182 ) 2183 2184 if all_ and distinct: 2185 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2186 2187 limit = self._parse_limit(top=True) 2188 projections = self._parse_projections() 2189 2190 this = self.expression( 2191 exp.Select, 2192 kind=kind, 2193 hint=hint, 2194 distinct=distinct, 2195 expressions=projections, 2196 limit=limit, 2197 ) 2198 this.comments = comments 2199 2200 into = self._parse_into() 2201 if into: 2202 this.set("into", into) 2203 2204 if not from_: 2205 from_ = self._parse_from() 2206 2207 if from_: 2208 this.set("from", from_) 2209 2210 this = self._parse_query_modifiers(this) 2211 elif (table or nested) and self._match(TokenType.L_PAREN): 2212 if self._match(TokenType.PIVOT): 2213 this = self._parse_simplified_pivot() 2214 elif self._match(TokenType.FROM): 2215 this = exp.select("*").from_( 2216 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2217 ) 2218 else: 2219 this = self._parse_table() if table else self._parse_select(nested=True) 2220 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2221 2222 self._match_r_paren() 2223 2224 # We return early here so that the UNION isn't attached to the subquery by the 2225 # following call to _parse_set_operations, but instead becomes the parent node 2226 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2227 elif self._match(TokenType.VALUES): 2228 this = self.expression( 2229 exp.Values, 2230 expressions=self._parse_csv(self._parse_value), 2231 alias=self._parse_table_alias(), 2232 ) 2233 elif from_: 2234 this = exp.select("*").from_(from_.this, copy=False) 2235 else: 2236 this = None 2237 2238 return self._parse_set_operations(this) 2239 2240 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2241 if not skip_with_token and not self._match(TokenType.WITH): 2242 return None 2243 2244 comments = self._prev_comments 2245 recursive = self._match(TokenType.RECURSIVE) 2246 2247 expressions = [] 2248 while True: 2249 expressions.append(self._parse_cte()) 2250 2251 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2252 break 2253 else: 2254 self._match(TokenType.WITH) 2255 2256 return self.expression( 2257 exp.With, comments=comments, expressions=expressions, recursive=recursive 2258 ) 2259 2260 def _parse_cte(self) -> exp.CTE: 2261 alias = self._parse_table_alias() 2262 if not alias or not alias.this: 2263 self.raise_error("Expected CTE to have alias") 2264 2265 self._match(TokenType.ALIAS) 2266 return self.expression( 2267 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2268 ) 2269 2270 def _parse_table_alias( 2271 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2272 ) -> t.Optional[exp.TableAlias]: 2273 any_token = self._match(TokenType.ALIAS) 2274 alias = ( 2275 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2276 or self._parse_string_as_identifier() 2277 ) 2278 2279 index = self._index 2280 if self._match(TokenType.L_PAREN): 2281 columns = self._parse_csv(self._parse_function_parameter) 2282 self._match_r_paren() if columns else self._retreat(index) 2283 else: 2284 columns = None 2285 2286 if not alias and not columns: 2287 return None 2288 2289 return self.expression(exp.TableAlias, this=alias, columns=columns) 2290 2291 def _parse_subquery( 2292 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2293 ) -> t.Optional[exp.Subquery]: 2294 if not this: 2295 return None 2296 2297 return self.expression( 2298 exp.Subquery, 2299 this=this, 2300 pivots=self._parse_pivots(), 2301 alias=self._parse_table_alias() if parse_alias else None, 2302 ) 2303 2304 def _parse_query_modifiers( 2305 self, this: t.Optional[exp.Expression] 2306 ) -> t.Optional[exp.Expression]: 2307 if isinstance(this, self.MODIFIABLES): 2308 for join in iter(self._parse_join, None): 2309 this.append("joins", join) 2310 for lateral in iter(self._parse_lateral, None): 2311 this.append("laterals", lateral) 2312 2313 while True: 2314 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2315 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2316 key, expression = parser(self) 2317 2318 if expression: 2319 this.set(key, expression) 2320 if key == "limit": 2321 offset = expression.args.pop("offset", None) 2322 if offset: 2323 this.set("offset", exp.Offset(expression=offset)) 2324 continue 2325 break 2326 return this 2327 2328 def _parse_hint(self) -> t.Optional[exp.Hint]: 2329 if self._match(TokenType.HINT): 2330 hints = [] 2331 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2332 hints.extend(hint) 2333 2334 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2335 self.raise_error("Expected */ after HINT") 2336 2337 return self.expression(exp.Hint, expressions=hints) 2338 2339 return None 2340 2341 def _parse_into(self) -> t.Optional[exp.Into]: 2342 if not self._match(TokenType.INTO): 2343 return None 2344 2345 temp = self._match(TokenType.TEMPORARY) 2346 unlogged = self._match_text_seq("UNLOGGED") 2347 self._match(TokenType.TABLE) 2348 2349 return self.expression( 2350 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2351 ) 2352 2353 def _parse_from( 2354 self, joins: bool = False, skip_from_token: bool = False 2355 ) -> t.Optional[exp.From]: 2356 if not skip_from_token and not self._match(TokenType.FROM): 2357 return None 2358 2359 return self.expression( 2360 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2361 ) 2362 2363 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2364 if not self._match(TokenType.MATCH_RECOGNIZE): 2365 return None 2366 2367 self._match_l_paren() 2368 2369 partition = self._parse_partition_by() 2370 order = self._parse_order() 2371 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2372 2373 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2374 rows = exp.var("ONE ROW PER MATCH") 2375 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2376 text = "ALL ROWS PER MATCH" 2377 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2378 text += f" SHOW EMPTY MATCHES" 2379 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2380 text += f" OMIT EMPTY MATCHES" 2381 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2382 text += f" WITH UNMATCHED ROWS" 2383 rows = exp.var(text) 2384 else: 2385 rows = None 2386 2387 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2388 text = "AFTER MATCH SKIP" 2389 if self._match_text_seq("PAST", "LAST", "ROW"): 2390 text += f" PAST LAST ROW" 2391 elif self._match_text_seq("TO", "NEXT", "ROW"): 2392 text += f" TO NEXT ROW" 2393 elif self._match_text_seq("TO", "FIRST"): 2394 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2395 elif self._match_text_seq("TO", "LAST"): 2396 text += f" TO LAST {self._advance_any().text}" # type: ignore 2397 after = exp.var(text) 2398 else: 2399 after = None 2400 2401 if self._match_text_seq("PATTERN"): 2402 self._match_l_paren() 2403 2404 if not self._curr: 2405 self.raise_error("Expecting )", self._curr) 2406 2407 paren = 1 2408 start = self._curr 2409 2410 while self._curr and paren > 0: 2411 if self._curr.token_type == TokenType.L_PAREN: 2412 paren += 1 2413 if self._curr.token_type == TokenType.R_PAREN: 2414 paren -= 1 2415 2416 end = self._prev 2417 self._advance() 2418 2419 if paren > 0: 2420 self.raise_error("Expecting )", self._curr) 2421 2422 pattern = exp.var(self._find_sql(start, end)) 2423 else: 2424 pattern = None 2425 2426 define = ( 2427 self._parse_csv( 2428 lambda: self.expression( 2429 exp.Alias, 2430 alias=self._parse_id_var(any_token=True), 2431 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2432 ) 2433 ) 2434 if self._match_text_seq("DEFINE") 2435 else None 2436 ) 2437 2438 self._match_r_paren() 2439 2440 return self.expression( 2441 exp.MatchRecognize, 2442 partition_by=partition, 2443 order=order, 2444 measures=measures, 2445 rows=rows, 2446 after=after, 2447 pattern=pattern, 2448 define=define, 2449 alias=self._parse_table_alias(), 2450 ) 2451 2452 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2453 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2454 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2455 2456 if outer_apply or cross_apply: 2457 this = self._parse_select(table=True) 2458 view = None 2459 outer = not cross_apply 2460 elif self._match(TokenType.LATERAL): 2461 this = self._parse_select(table=True) 2462 view = self._match(TokenType.VIEW) 2463 outer = self._match(TokenType.OUTER) 2464 else: 2465 return None 2466 2467 if not this: 2468 this = ( 2469 self._parse_unnest() 2470 or self._parse_function() 2471 or self._parse_id_var(any_token=False) 2472 ) 2473 2474 while self._match(TokenType.DOT): 2475 this = exp.Dot( 2476 this=this, 2477 expression=self._parse_function() or self._parse_id_var(any_token=False), 2478 ) 2479 2480 if view: 2481 table = self._parse_id_var(any_token=False) 2482 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2483 table_alias: t.Optional[exp.TableAlias] = self.expression( 2484 exp.TableAlias, this=table, columns=columns 2485 ) 2486 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2487 # We move the alias from the lateral's child node to the lateral itself 2488 table_alias = this.args["alias"].pop() 2489 else: 2490 table_alias = self._parse_table_alias() 2491 2492 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2493 2494 def _parse_join_parts( 2495 self, 2496 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2497 return ( 2498 self._match_set(self.JOIN_METHODS) and self._prev, 2499 self._match_set(self.JOIN_SIDES) and self._prev, 2500 self._match_set(self.JOIN_KINDS) and self._prev, 2501 ) 2502 2503 def _parse_join( 2504 self, skip_join_token: bool = False, parse_bracket: bool = False 2505 ) -> t.Optional[exp.Join]: 2506 if self._match(TokenType.COMMA): 2507 return self.expression(exp.Join, this=self._parse_table()) 2508 2509 index = self._index 2510 method, side, kind = self._parse_join_parts() 2511 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2512 join = self._match(TokenType.JOIN) 2513 2514 if not skip_join_token and not join: 2515 self._retreat(index) 2516 kind = None 2517 method = None 2518 side = None 2519 2520 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2521 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2522 2523 if not skip_join_token and not join and not outer_apply and not cross_apply: 2524 return None 2525 2526 if outer_apply: 2527 side = Token(TokenType.LEFT, "LEFT") 2528 2529 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2530 2531 if method: 2532 kwargs["method"] = method.text 2533 if side: 2534 kwargs["side"] = side.text 2535 if kind: 2536 kwargs["kind"] = kind.text 2537 if hint: 2538 kwargs["hint"] = hint 2539 2540 if self._match(TokenType.ON): 2541 kwargs["on"] = self._parse_conjunction() 2542 elif self._match(TokenType.USING): 2543 kwargs["using"] = self._parse_wrapped_id_vars() 2544 elif not (kind and kind.token_type == TokenType.CROSS): 2545 index = self._index 2546 join = self._parse_join() 2547 2548 if join and self._match(TokenType.ON): 2549 kwargs["on"] = self._parse_conjunction() 2550 elif join and self._match(TokenType.USING): 2551 kwargs["using"] = self._parse_wrapped_id_vars() 2552 else: 2553 join = None 2554 self._retreat(index) 2555 2556 kwargs["this"].set("joins", [join] if join else None) 2557 2558 comments = [c for token in (method, side, kind) if token for c in token.comments] 2559 return self.expression(exp.Join, comments=comments, **kwargs) 2560 2561 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2562 this = self._parse_conjunction() 2563 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2564 return this 2565 2566 opclass = self._parse_var(any_token=True) 2567 if opclass: 2568 return self.expression(exp.Opclass, this=this, expression=opclass) 2569 2570 return this 2571 2572 def _parse_index( 2573 self, 2574 index: t.Optional[exp.Expression] = None, 2575 ) -> t.Optional[exp.Index]: 2576 if index: 2577 unique = None 2578 primary = None 2579 amp = None 2580 2581 self._match(TokenType.ON) 2582 self._match(TokenType.TABLE) # hive 2583 table = self._parse_table_parts(schema=True) 2584 else: 2585 unique = self._match(TokenType.UNIQUE) 2586 primary = self._match_text_seq("PRIMARY") 2587 amp = self._match_text_seq("AMP") 2588 2589 if not self._match(TokenType.INDEX): 2590 return None 2591 2592 index = self._parse_id_var() 2593 table = None 2594 2595 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2596 2597 if self._match(TokenType.L_PAREN, advance=False): 2598 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2599 else: 2600 columns = None 2601 2602 return self.expression( 2603 exp.Index, 2604 this=index, 2605 table=table, 2606 using=using, 2607 columns=columns, 2608 unique=unique, 2609 primary=primary, 2610 amp=amp, 2611 partition_by=self._parse_partition_by(), 2612 where=self._parse_where(), 2613 ) 2614 2615 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2616 hints: t.List[exp.Expression] = [] 2617 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2618 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2619 hints.append( 2620 self.expression( 2621 exp.WithTableHint, 2622 expressions=self._parse_csv( 2623 lambda: self._parse_function() or self._parse_var(any_token=True) 2624 ), 2625 ) 2626 ) 2627 self._match_r_paren() 2628 else: 2629 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2630 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2631 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2632 2633 self._match_texts({"INDEX", "KEY"}) 2634 if self._match(TokenType.FOR): 2635 hint.set("target", self._advance_any() and self._prev.text.upper()) 2636 2637 hint.set("expressions", self._parse_wrapped_id_vars()) 2638 hints.append(hint) 2639 2640 return hints or None 2641 2642 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2643 return ( 2644 (not schema and self._parse_function(optional_parens=False)) 2645 or self._parse_id_var(any_token=False) 2646 or self._parse_string_as_identifier() 2647 or self._parse_placeholder() 2648 ) 2649 2650 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2651 catalog = None 2652 db = None 2653 table = self._parse_table_part(schema=schema) 2654 2655 while self._match(TokenType.DOT): 2656 if catalog: 2657 # This allows nesting the table in arbitrarily many dot expressions if needed 2658 table = self.expression( 2659 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2660 ) 2661 else: 2662 catalog = db 2663 db = table 2664 table = self._parse_table_part(schema=schema) 2665 2666 if not table: 2667 self.raise_error(f"Expected table name but got {self._curr}") 2668 2669 return self.expression( 2670 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2671 ) 2672 2673 def _parse_table( 2674 self, 2675 schema: bool = False, 2676 joins: bool = False, 2677 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2678 parse_bracket: bool = False, 2679 ) -> t.Optional[exp.Expression]: 2680 lateral = self._parse_lateral() 2681 if lateral: 2682 return lateral 2683 2684 unnest = self._parse_unnest() 2685 if unnest: 2686 return unnest 2687 2688 values = self._parse_derived_table_values() 2689 if values: 2690 return values 2691 2692 subquery = self._parse_select(table=True) 2693 if subquery: 2694 if not subquery.args.get("pivots"): 2695 subquery.set("pivots", self._parse_pivots()) 2696 return subquery 2697 2698 bracket = parse_bracket and self._parse_bracket(None) 2699 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2700 this = t.cast( 2701 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2702 ) 2703 2704 if schema: 2705 return self._parse_schema(this=this) 2706 2707 version = self._parse_version() 2708 2709 if version: 2710 this.set("version", version) 2711 2712 if self.ALIAS_POST_TABLESAMPLE: 2713 table_sample = self._parse_table_sample() 2714 2715 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2716 if alias: 2717 this.set("alias", alias) 2718 2719 if self._match_text_seq("AT"): 2720 this.set("index", self._parse_id_var()) 2721 2722 this.set("hints", self._parse_table_hints()) 2723 2724 if not this.args.get("pivots"): 2725 this.set("pivots", self._parse_pivots()) 2726 2727 if not self.ALIAS_POST_TABLESAMPLE: 2728 table_sample = self._parse_table_sample() 2729 2730 if table_sample: 2731 table_sample.set("this", this) 2732 this = table_sample 2733 2734 if joins: 2735 for join in iter(self._parse_join, None): 2736 this.append("joins", join) 2737 2738 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2739 this.set("ordinality", True) 2740 this.set("alias", self._parse_table_alias()) 2741 2742 return this 2743 2744 def _parse_version(self) -> t.Optional[exp.Version]: 2745 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2746 this = "TIMESTAMP" 2747 elif self._match(TokenType.VERSION_SNAPSHOT): 2748 this = "VERSION" 2749 else: 2750 return None 2751 2752 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2753 kind = self._prev.text.upper() 2754 start = self._parse_bitwise() 2755 self._match_texts(("TO", "AND")) 2756 end = self._parse_bitwise() 2757 expression: t.Optional[exp.Expression] = self.expression( 2758 exp.Tuple, expressions=[start, end] 2759 ) 2760 elif self._match_text_seq("CONTAINED", "IN"): 2761 kind = "CONTAINED IN" 2762 expression = self.expression( 2763 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2764 ) 2765 elif self._match(TokenType.ALL): 2766 kind = "ALL" 2767 expression = None 2768 else: 2769 self._match_text_seq("AS", "OF") 2770 kind = "AS OF" 2771 expression = self._parse_type() 2772 2773 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2774 2775 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2776 if not self._match(TokenType.UNNEST): 2777 return None 2778 2779 expressions = self._parse_wrapped_csv(self._parse_type) 2780 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2781 2782 alias = self._parse_table_alias() if with_alias else None 2783 2784 if alias: 2785 if self.UNNEST_COLUMN_ONLY: 2786 if alias.args.get("columns"): 2787 self.raise_error("Unexpected extra column alias in unnest.") 2788 2789 alias.set("columns", [alias.this]) 2790 alias.set("this", None) 2791 2792 columns = alias.args.get("columns") or [] 2793 if offset and len(expressions) < len(columns): 2794 offset = columns.pop() 2795 2796 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2797 self._match(TokenType.ALIAS) 2798 offset = self._parse_id_var( 2799 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2800 ) or exp.to_identifier("offset") 2801 2802 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2803 2804 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2805 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2806 if not is_derived and not self._match(TokenType.VALUES): 2807 return None 2808 2809 expressions = self._parse_csv(self._parse_value) 2810 alias = self._parse_table_alias() 2811 2812 if is_derived: 2813 self._match_r_paren() 2814 2815 return self.expression( 2816 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2817 ) 2818 2819 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2820 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2821 as_modifier and self._match_text_seq("USING", "SAMPLE") 2822 ): 2823 return None 2824 2825 bucket_numerator = None 2826 bucket_denominator = None 2827 bucket_field = None 2828 percent = None 2829 rows = None 2830 size = None 2831 seed = None 2832 2833 kind = ( 2834 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2835 ) 2836 method = self._parse_var(tokens=(TokenType.ROW,)) 2837 2838 matched_l_paren = self._match(TokenType.L_PAREN) 2839 2840 if self.TABLESAMPLE_CSV: 2841 num = None 2842 expressions = self._parse_csv(self._parse_primary) 2843 else: 2844 expressions = None 2845 num = ( 2846 self._parse_factor() 2847 if self._match(TokenType.NUMBER, advance=False) 2848 else self._parse_primary() 2849 ) 2850 2851 if self._match_text_seq("BUCKET"): 2852 bucket_numerator = self._parse_number() 2853 self._match_text_seq("OUT", "OF") 2854 bucket_denominator = bucket_denominator = self._parse_number() 2855 self._match(TokenType.ON) 2856 bucket_field = self._parse_field() 2857 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2858 percent = num 2859 elif self._match(TokenType.ROWS): 2860 rows = num 2861 elif num: 2862 size = num 2863 2864 if matched_l_paren: 2865 self._match_r_paren() 2866 2867 if self._match(TokenType.L_PAREN): 2868 method = self._parse_var() 2869 seed = self._match(TokenType.COMMA) and self._parse_number() 2870 self._match_r_paren() 2871 elif self._match_texts(("SEED", "REPEATABLE")): 2872 seed = self._parse_wrapped(self._parse_number) 2873 2874 return self.expression( 2875 exp.TableSample, 2876 expressions=expressions, 2877 method=method, 2878 bucket_numerator=bucket_numerator, 2879 bucket_denominator=bucket_denominator, 2880 bucket_field=bucket_field, 2881 percent=percent, 2882 rows=rows, 2883 size=size, 2884 seed=seed, 2885 kind=kind, 2886 ) 2887 2888 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2889 return list(iter(self._parse_pivot, None)) or None 2890 2891 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2892 return list(iter(self._parse_join, None)) or None 2893 2894 # https://duckdb.org/docs/sql/statements/pivot 2895 def _parse_simplified_pivot(self) -> exp.Pivot: 2896 def _parse_on() -> t.Optional[exp.Expression]: 2897 this = self._parse_bitwise() 2898 return self._parse_in(this) if self._match(TokenType.IN) else this 2899 2900 this = self._parse_table() 2901 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2902 using = self._match(TokenType.USING) and self._parse_csv( 2903 lambda: self._parse_alias(self._parse_function()) 2904 ) 2905 group = self._parse_group() 2906 return self.expression( 2907 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2908 ) 2909 2910 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2911 index = self._index 2912 include_nulls = None 2913 2914 if self._match(TokenType.PIVOT): 2915 unpivot = False 2916 elif self._match(TokenType.UNPIVOT): 2917 unpivot = True 2918 2919 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2920 if self._match_text_seq("INCLUDE", "NULLS"): 2921 include_nulls = True 2922 elif self._match_text_seq("EXCLUDE", "NULLS"): 2923 include_nulls = False 2924 else: 2925 return None 2926 2927 expressions = [] 2928 field = None 2929 2930 if not self._match(TokenType.L_PAREN): 2931 self._retreat(index) 2932 return None 2933 2934 if unpivot: 2935 expressions = self._parse_csv(self._parse_column) 2936 else: 2937 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2938 2939 if not expressions: 2940 self.raise_error("Failed to parse PIVOT's aggregation list") 2941 2942 if not self._match(TokenType.FOR): 2943 self.raise_error("Expecting FOR") 2944 2945 value = self._parse_column() 2946 2947 if not self._match(TokenType.IN): 2948 self.raise_error("Expecting IN") 2949 2950 field = self._parse_in(value, alias=True) 2951 2952 self._match_r_paren() 2953 2954 pivot = self.expression( 2955 exp.Pivot, 2956 expressions=expressions, 2957 field=field, 2958 unpivot=unpivot, 2959 include_nulls=include_nulls, 2960 ) 2961 2962 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2963 pivot.set("alias", self._parse_table_alias()) 2964 2965 if not unpivot: 2966 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2967 2968 columns: t.List[exp.Expression] = [] 2969 for fld in pivot.args["field"].expressions: 2970 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2971 for name in names: 2972 if self.PREFIXED_PIVOT_COLUMNS: 2973 name = f"{name}_{field_name}" if name else field_name 2974 else: 2975 name = f"{field_name}_{name}" if name else field_name 2976 2977 columns.append(exp.to_identifier(name)) 2978 2979 pivot.set("columns", columns) 2980 2981 return pivot 2982 2983 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2984 return [agg.alias for agg in aggregations] 2985 2986 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2987 if not skip_where_token and not self._match(TokenType.WHERE): 2988 return None 2989 2990 return self.expression( 2991 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2992 ) 2993 2994 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2995 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2996 return None 2997 2998 elements = defaultdict(list) 2999 3000 if self._match(TokenType.ALL): 3001 return self.expression(exp.Group, all=True) 3002 3003 while True: 3004 expressions = self._parse_csv(self._parse_conjunction) 3005 if expressions: 3006 elements["expressions"].extend(expressions) 3007 3008 grouping_sets = self._parse_grouping_sets() 3009 if grouping_sets: 3010 elements["grouping_sets"].extend(grouping_sets) 3011 3012 rollup = None 3013 cube = None 3014 totals = None 3015 3016 index = self._index 3017 with_ = self._match(TokenType.WITH) 3018 if self._match(TokenType.ROLLUP): 3019 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3020 elements["rollup"].extend(ensure_list(rollup)) 3021 3022 if self._match(TokenType.CUBE): 3023 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3024 elements["cube"].extend(ensure_list(cube)) 3025 3026 if self._match_text_seq("TOTALS"): 3027 totals = True 3028 elements["totals"] = True # type: ignore 3029 3030 if not (grouping_sets or rollup or cube or totals): 3031 if with_: 3032 self._retreat(index) 3033 break 3034 3035 return self.expression(exp.Group, **elements) # type: ignore 3036 3037 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3038 if not self._match(TokenType.GROUPING_SETS): 3039 return None 3040 3041 return self._parse_wrapped_csv(self._parse_grouping_set) 3042 3043 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3044 if self._match(TokenType.L_PAREN): 3045 grouping_set = self._parse_csv(self._parse_column) 3046 self._match_r_paren() 3047 return self.expression(exp.Tuple, expressions=grouping_set) 3048 3049 return self._parse_column() 3050 3051 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3052 if not skip_having_token and not self._match(TokenType.HAVING): 3053 return None 3054 return self.expression(exp.Having, this=self._parse_conjunction()) 3055 3056 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3057 if not self._match(TokenType.QUALIFY): 3058 return None 3059 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3060 3061 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3062 if skip_start_token: 3063 start = None 3064 elif self._match(TokenType.START_WITH): 3065 start = self._parse_conjunction() 3066 else: 3067 return None 3068 3069 self._match(TokenType.CONNECT_BY) 3070 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3071 exp.Prior, this=self._parse_bitwise() 3072 ) 3073 connect = self._parse_conjunction() 3074 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3075 3076 if not start and self._match(TokenType.START_WITH): 3077 start = self._parse_conjunction() 3078 3079 return self.expression(exp.Connect, start=start, connect=connect) 3080 3081 def _parse_order( 3082 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3083 ) -> t.Optional[exp.Expression]: 3084 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3085 return this 3086 3087 return self.expression( 3088 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3089 ) 3090 3091 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3092 if not self._match(token): 3093 return None 3094 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3095 3096 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3097 this = parse_method() if parse_method else self._parse_conjunction() 3098 3099 asc = self._match(TokenType.ASC) 3100 desc = self._match(TokenType.DESC) or (asc and False) 3101 3102 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3103 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3104 3105 nulls_first = is_nulls_first or False 3106 explicitly_null_ordered = is_nulls_first or is_nulls_last 3107 3108 if ( 3109 not explicitly_null_ordered 3110 and ( 3111 (not desc and self.NULL_ORDERING == "nulls_are_small") 3112 or (desc and self.NULL_ORDERING != "nulls_are_small") 3113 ) 3114 and self.NULL_ORDERING != "nulls_are_last" 3115 ): 3116 nulls_first = True 3117 3118 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3119 3120 def _parse_limit( 3121 self, this: t.Optional[exp.Expression] = None, top: bool = False 3122 ) -> t.Optional[exp.Expression]: 3123 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3124 comments = self._prev_comments 3125 if top: 3126 limit_paren = self._match(TokenType.L_PAREN) 3127 expression = self._parse_number() 3128 3129 if limit_paren: 3130 self._match_r_paren() 3131 else: 3132 expression = self._parse_term() 3133 3134 if self._match(TokenType.COMMA): 3135 offset = expression 3136 expression = self._parse_term() 3137 else: 3138 offset = None 3139 3140 limit_exp = self.expression( 3141 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3142 ) 3143 3144 return limit_exp 3145 3146 if self._match(TokenType.FETCH): 3147 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3148 direction = self._prev.text if direction else "FIRST" 3149 3150 count = self._parse_field(tokens=self.FETCH_TOKENS) 3151 percent = self._match(TokenType.PERCENT) 3152 3153 self._match_set((TokenType.ROW, TokenType.ROWS)) 3154 3155 only = self._match_text_seq("ONLY") 3156 with_ties = self._match_text_seq("WITH", "TIES") 3157 3158 if only and with_ties: 3159 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3160 3161 return self.expression( 3162 exp.Fetch, 3163 direction=direction, 3164 count=count, 3165 percent=percent, 3166 with_ties=with_ties, 3167 ) 3168 3169 return this 3170 3171 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3172 if not self._match(TokenType.OFFSET): 3173 return this 3174 3175 count = self._parse_term() 3176 self._match_set((TokenType.ROW, TokenType.ROWS)) 3177 return self.expression(exp.Offset, this=this, expression=count) 3178 3179 def _parse_locks(self) -> t.List[exp.Lock]: 3180 locks = [] 3181 while True: 3182 if self._match_text_seq("FOR", "UPDATE"): 3183 update = True 3184 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3185 "LOCK", "IN", "SHARE", "MODE" 3186 ): 3187 update = False 3188 else: 3189 break 3190 3191 expressions = None 3192 if self._match_text_seq("OF"): 3193 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3194 3195 wait: t.Optional[bool | exp.Expression] = None 3196 if self._match_text_seq("NOWAIT"): 3197 wait = True 3198 elif self._match_text_seq("WAIT"): 3199 wait = self._parse_primary() 3200 elif self._match_text_seq("SKIP", "LOCKED"): 3201 wait = False 3202 3203 locks.append( 3204 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3205 ) 3206 3207 return locks 3208 3209 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3210 if not self._match_set(self.SET_OPERATIONS): 3211 return this 3212 3213 token_type = self._prev.token_type 3214 3215 if token_type == TokenType.UNION: 3216 expression = exp.Union 3217 elif token_type == TokenType.EXCEPT: 3218 expression = exp.Except 3219 else: 3220 expression = exp.Intersect 3221 3222 return self.expression( 3223 expression, 3224 comments=self._prev.comments, 3225 this=this, 3226 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3227 by_name=self._match_text_seq("BY", "NAME"), 3228 expression=self._parse_set_operations(self._parse_select(nested=True)), 3229 ) 3230 3231 def _parse_expression(self) -> t.Optional[exp.Expression]: 3232 return self._parse_alias(self._parse_conjunction()) 3233 3234 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3235 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3236 3237 def _parse_equality(self) -> t.Optional[exp.Expression]: 3238 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3239 3240 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3241 return self._parse_tokens(self._parse_range, self.COMPARISON) 3242 3243 def _parse_range(self) -> t.Optional[exp.Expression]: 3244 this = self._parse_bitwise() 3245 negate = self._match(TokenType.NOT) 3246 3247 if self._match_set(self.RANGE_PARSERS): 3248 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3249 if not expression: 3250 return this 3251 3252 this = expression 3253 elif self._match(TokenType.ISNULL): 3254 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3255 3256 # Postgres supports ISNULL and NOTNULL for conditions. 3257 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3258 if self._match(TokenType.NOTNULL): 3259 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3260 this = self.expression(exp.Not, this=this) 3261 3262 if negate: 3263 this = self.expression(exp.Not, this=this) 3264 3265 if self._match(TokenType.IS): 3266 this = self._parse_is(this) 3267 3268 return this 3269 3270 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3271 index = self._index - 1 3272 negate = self._match(TokenType.NOT) 3273 3274 if self._match_text_seq("DISTINCT", "FROM"): 3275 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3276 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3277 3278 expression = self._parse_null() or self._parse_boolean() 3279 if not expression: 3280 self._retreat(index) 3281 return None 3282 3283 this = self.expression(exp.Is, this=this, expression=expression) 3284 return self.expression(exp.Not, this=this) if negate else this 3285 3286 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3287 unnest = self._parse_unnest(with_alias=False) 3288 if unnest: 3289 this = self.expression(exp.In, this=this, unnest=unnest) 3290 elif self._match(TokenType.L_PAREN): 3291 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3292 3293 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3294 this = self.expression(exp.In, this=this, query=expressions[0]) 3295 else: 3296 this = self.expression(exp.In, this=this, expressions=expressions) 3297 3298 self._match_r_paren(this) 3299 else: 3300 this = self.expression(exp.In, this=this, field=self._parse_field()) 3301 3302 return this 3303 3304 def _parse_between(self, this: exp.Expression) -> exp.Between: 3305 low = self._parse_bitwise() 3306 self._match(TokenType.AND) 3307 high = self._parse_bitwise() 3308 return self.expression(exp.Between, this=this, low=low, high=high) 3309 3310 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3311 if not self._match(TokenType.ESCAPE): 3312 return this 3313 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3314 3315 def _parse_interval(self) -> t.Optional[exp.Interval]: 3316 index = self._index 3317 3318 if not self._match(TokenType.INTERVAL): 3319 return None 3320 3321 if self._match(TokenType.STRING, advance=False): 3322 this = self._parse_primary() 3323 else: 3324 this = self._parse_term() 3325 3326 if not this: 3327 self._retreat(index) 3328 return None 3329 3330 unit = self._parse_function() or self._parse_var(any_token=True) 3331 3332 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3333 # each INTERVAL expression into this canonical form so it's easy to transpile 3334 if this and this.is_number: 3335 this = exp.Literal.string(this.name) 3336 elif this and this.is_string: 3337 parts = this.name.split() 3338 3339 if len(parts) == 2: 3340 if unit: 3341 # This is not actually a unit, it's something else (e.g. a "window side") 3342 unit = None 3343 self._retreat(self._index - 1) 3344 3345 this = exp.Literal.string(parts[0]) 3346 unit = self.expression(exp.Var, this=parts[1]) 3347 3348 return self.expression(exp.Interval, this=this, unit=unit) 3349 3350 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3351 this = self._parse_term() 3352 3353 while True: 3354 if self._match_set(self.BITWISE): 3355 this = self.expression( 3356 self.BITWISE[self._prev.token_type], 3357 this=this, 3358 expression=self._parse_term(), 3359 ) 3360 elif self._match(TokenType.DQMARK): 3361 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3362 elif self._match_pair(TokenType.LT, TokenType.LT): 3363 this = self.expression( 3364 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3365 ) 3366 elif self._match_pair(TokenType.GT, TokenType.GT): 3367 this = self.expression( 3368 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3369 ) 3370 else: 3371 break 3372 3373 return this 3374 3375 def _parse_term(self) -> t.Optional[exp.Expression]: 3376 return self._parse_tokens(self._parse_factor, self.TERM) 3377 3378 def _parse_factor(self) -> t.Optional[exp.Expression]: 3379 return self._parse_tokens(self._parse_unary, self.FACTOR) 3380 3381 def _parse_unary(self) -> t.Optional[exp.Expression]: 3382 if self._match_set(self.UNARY_PARSERS): 3383 return self.UNARY_PARSERS[self._prev.token_type](self) 3384 return self._parse_at_time_zone(self._parse_type()) 3385 3386 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3387 interval = parse_interval and self._parse_interval() 3388 if interval: 3389 return interval 3390 3391 index = self._index 3392 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3393 this = self._parse_column() 3394 3395 if data_type: 3396 if isinstance(this, exp.Literal): 3397 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3398 if parser: 3399 return parser(self, this, data_type) 3400 return self.expression(exp.Cast, this=this, to=data_type) 3401 if not data_type.expressions: 3402 self._retreat(index) 3403 return self._parse_column() 3404 return self._parse_column_ops(data_type) 3405 3406 return this and self._parse_column_ops(this) 3407 3408 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3409 this = self._parse_type() 3410 if not this: 3411 return None 3412 3413 return self.expression( 3414 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3415 ) 3416 3417 def _parse_types( 3418 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3419 ) -> t.Optional[exp.Expression]: 3420 index = self._index 3421 3422 prefix = self._match_text_seq("SYSUDTLIB", ".") 3423 3424 if not self._match_set(self.TYPE_TOKENS): 3425 identifier = allow_identifiers and self._parse_id_var( 3426 any_token=False, tokens=(TokenType.VAR,) 3427 ) 3428 3429 if identifier: 3430 tokens = self._tokenizer.tokenize(identifier.name) 3431 3432 if len(tokens) != 1: 3433 self.raise_error("Unexpected identifier", self._prev) 3434 3435 if tokens[0].token_type in self.TYPE_TOKENS: 3436 self._prev = tokens[0] 3437 elif self.SUPPORTS_USER_DEFINED_TYPES: 3438 type_name = identifier.name 3439 3440 while self._match(TokenType.DOT): 3441 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3442 3443 return exp.DataType.build(type_name, udt=True) 3444 else: 3445 return None 3446 else: 3447 return None 3448 3449 type_token = self._prev.token_type 3450 3451 if type_token == TokenType.PSEUDO_TYPE: 3452 return self.expression(exp.PseudoType, this=self._prev.text) 3453 3454 if type_token == TokenType.OBJECT_IDENTIFIER: 3455 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3456 3457 nested = type_token in self.NESTED_TYPE_TOKENS 3458 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3459 expressions = None 3460 maybe_func = False 3461 3462 if self._match(TokenType.L_PAREN): 3463 if is_struct: 3464 expressions = self._parse_csv(self._parse_struct_types) 3465 elif nested: 3466 expressions = self._parse_csv( 3467 lambda: self._parse_types( 3468 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3469 ) 3470 ) 3471 elif type_token in self.ENUM_TYPE_TOKENS: 3472 expressions = self._parse_csv(self._parse_equality) 3473 else: 3474 expressions = self._parse_csv(self._parse_type_size) 3475 3476 if not expressions or not self._match(TokenType.R_PAREN): 3477 self._retreat(index) 3478 return None 3479 3480 maybe_func = True 3481 3482 this: t.Optional[exp.Expression] = None 3483 values: t.Optional[t.List[exp.Expression]] = None 3484 3485 if nested and self._match(TokenType.LT): 3486 if is_struct: 3487 expressions = self._parse_csv(self._parse_struct_types) 3488 else: 3489 expressions = self._parse_csv( 3490 lambda: self._parse_types( 3491 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3492 ) 3493 ) 3494 3495 if not self._match(TokenType.GT): 3496 self.raise_error("Expecting >") 3497 3498 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3499 values = self._parse_csv(self._parse_conjunction) 3500 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3501 3502 if type_token in self.TIMESTAMPS: 3503 if self._match_text_seq("WITH", "TIME", "ZONE"): 3504 maybe_func = False 3505 tz_type = ( 3506 exp.DataType.Type.TIMETZ 3507 if type_token in self.TIMES 3508 else exp.DataType.Type.TIMESTAMPTZ 3509 ) 3510 this = exp.DataType(this=tz_type, expressions=expressions) 3511 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3512 maybe_func = False 3513 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3514 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3515 maybe_func = False 3516 elif type_token == TokenType.INTERVAL: 3517 unit = self._parse_var() 3518 3519 if self._match_text_seq("TO"): 3520 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3521 else: 3522 span = None 3523 3524 if span or not unit: 3525 this = self.expression( 3526 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3527 ) 3528 else: 3529 this = self.expression(exp.Interval, unit=unit) 3530 3531 if maybe_func and check_func: 3532 index2 = self._index 3533 peek = self._parse_string() 3534 3535 if not peek: 3536 self._retreat(index) 3537 return None 3538 3539 self._retreat(index2) 3540 3541 if not this: 3542 if self._match_text_seq("UNSIGNED"): 3543 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3544 if not unsigned_type_token: 3545 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3546 3547 type_token = unsigned_type_token or type_token 3548 3549 this = exp.DataType( 3550 this=exp.DataType.Type[type_token.value], 3551 expressions=expressions, 3552 nested=nested, 3553 values=values, 3554 prefix=prefix, 3555 ) 3556 3557 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3558 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3559 3560 return this 3561 3562 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3563 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3564 self._match(TokenType.COLON) 3565 return self._parse_column_def(this) 3566 3567 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3568 if not self._match_text_seq("AT", "TIME", "ZONE"): 3569 return this 3570 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3571 3572 def _parse_column(self) -> t.Optional[exp.Expression]: 3573 this = self._parse_field() 3574 if isinstance(this, exp.Identifier): 3575 this = self.expression(exp.Column, this=this) 3576 elif not this: 3577 return self._parse_bracket(this) 3578 return self._parse_column_ops(this) 3579 3580 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3581 this = self._parse_bracket(this) 3582 3583 while self._match_set(self.COLUMN_OPERATORS): 3584 op_token = self._prev.token_type 3585 op = self.COLUMN_OPERATORS.get(op_token) 3586 3587 if op_token == TokenType.DCOLON: 3588 field = self._parse_types() 3589 if not field: 3590 self.raise_error("Expected type") 3591 elif op and self._curr: 3592 self._advance() 3593 value = self._prev.text 3594 field = ( 3595 exp.Literal.number(value) 3596 if self._prev.token_type == TokenType.NUMBER 3597 else exp.Literal.string(value) 3598 ) 3599 else: 3600 field = self._parse_field(anonymous_func=True, any_token=True) 3601 3602 if isinstance(field, exp.Func): 3603 # bigquery allows function calls like x.y.count(...) 3604 # SAFE.SUBSTR(...) 3605 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3606 this = self._replace_columns_with_dots(this) 3607 3608 if op: 3609 this = op(self, this, field) 3610 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3611 this = self.expression( 3612 exp.Column, 3613 this=field, 3614 table=this.this, 3615 db=this.args.get("table"), 3616 catalog=this.args.get("db"), 3617 ) 3618 else: 3619 this = self.expression(exp.Dot, this=this, expression=field) 3620 this = self._parse_bracket(this) 3621 return this 3622 3623 def _parse_primary(self) -> t.Optional[exp.Expression]: 3624 if self._match_set(self.PRIMARY_PARSERS): 3625 token_type = self._prev.token_type 3626 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3627 3628 if token_type == TokenType.STRING: 3629 expressions = [primary] 3630 while self._match(TokenType.STRING): 3631 expressions.append(exp.Literal.string(self._prev.text)) 3632 3633 if len(expressions) > 1: 3634 return self.expression(exp.Concat, expressions=expressions) 3635 3636 return primary 3637 3638 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3639 return exp.Literal.number(f"0.{self._prev.text}") 3640 3641 if self._match(TokenType.L_PAREN): 3642 comments = self._prev_comments 3643 query = self._parse_select() 3644 3645 if query: 3646 expressions = [query] 3647 else: 3648 expressions = self._parse_expressions() 3649 3650 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3651 3652 if isinstance(this, exp.Subqueryable): 3653 this = self._parse_set_operations( 3654 self._parse_subquery(this=this, parse_alias=False) 3655 ) 3656 elif len(expressions) > 1: 3657 this = self.expression(exp.Tuple, expressions=expressions) 3658 else: 3659 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3660 3661 if this: 3662 this.add_comments(comments) 3663 3664 self._match_r_paren(expression=this) 3665 return this 3666 3667 return None 3668 3669 def _parse_field( 3670 self, 3671 any_token: bool = False, 3672 tokens: t.Optional[t.Collection[TokenType]] = None, 3673 anonymous_func: bool = False, 3674 ) -> t.Optional[exp.Expression]: 3675 return ( 3676 self._parse_primary() 3677 or self._parse_function(anonymous=anonymous_func) 3678 or self._parse_id_var(any_token=any_token, tokens=tokens) 3679 ) 3680 3681 def _parse_function( 3682 self, 3683 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3684 anonymous: bool = False, 3685 optional_parens: bool = True, 3686 ) -> t.Optional[exp.Expression]: 3687 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3688 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3689 fn_syntax = False 3690 if ( 3691 self._match(TokenType.L_BRACE, advance=False) 3692 and self._next 3693 and self._next.text.upper() == "FN" 3694 ): 3695 self._advance(2) 3696 fn_syntax = True 3697 3698 func = self._parse_function_call( 3699 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3700 ) 3701 3702 if fn_syntax: 3703 self._match(TokenType.R_BRACE) 3704 3705 return func 3706 3707 def _parse_function_call( 3708 self, 3709 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3710 anonymous: bool = False, 3711 optional_parens: bool = True, 3712 ) -> t.Optional[exp.Expression]: 3713 if not self._curr: 3714 return None 3715 3716 token_type = self._curr.token_type 3717 this = self._curr.text 3718 upper = this.upper() 3719 3720 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3721 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3722 self._advance() 3723 return parser(self) 3724 3725 if not self._next or self._next.token_type != TokenType.L_PAREN: 3726 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3727 self._advance() 3728 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3729 3730 return None 3731 3732 if token_type not in self.FUNC_TOKENS: 3733 return None 3734 3735 self._advance(2) 3736 3737 parser = self.FUNCTION_PARSERS.get(upper) 3738 if parser and not anonymous: 3739 this = parser(self) 3740 else: 3741 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3742 3743 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3744 this = self.expression(subquery_predicate, this=self._parse_select()) 3745 self._match_r_paren() 3746 return this 3747 3748 if functions is None: 3749 functions = self.FUNCTIONS 3750 3751 function = functions.get(upper) 3752 3753 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3754 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3755 3756 if function and not anonymous: 3757 func = self.validate_expression(function(args), args) 3758 if not self.NORMALIZE_FUNCTIONS: 3759 func.meta["name"] = this 3760 this = func 3761 else: 3762 this = self.expression(exp.Anonymous, this=this, expressions=args) 3763 3764 self._match_r_paren(this) 3765 return self._parse_window(this) 3766 3767 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3768 return self._parse_column_def(self._parse_id_var()) 3769 3770 def _parse_user_defined_function( 3771 self, kind: t.Optional[TokenType] = None 3772 ) -> t.Optional[exp.Expression]: 3773 this = self._parse_id_var() 3774 3775 while self._match(TokenType.DOT): 3776 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3777 3778 if not self._match(TokenType.L_PAREN): 3779 return this 3780 3781 expressions = self._parse_csv(self._parse_function_parameter) 3782 self._match_r_paren() 3783 return self.expression( 3784 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3785 ) 3786 3787 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3788 literal = self._parse_primary() 3789 if literal: 3790 return self.expression(exp.Introducer, this=token.text, expression=literal) 3791 3792 return self.expression(exp.Identifier, this=token.text) 3793 3794 def _parse_session_parameter(self) -> exp.SessionParameter: 3795 kind = None 3796 this = self._parse_id_var() or self._parse_primary() 3797 3798 if this and self._match(TokenType.DOT): 3799 kind = this.name 3800 this = self._parse_var() or self._parse_primary() 3801 3802 return self.expression(exp.SessionParameter, this=this, kind=kind) 3803 3804 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3805 index = self._index 3806 3807 if self._match(TokenType.L_PAREN): 3808 expressions = t.cast( 3809 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3810 ) 3811 3812 if not self._match(TokenType.R_PAREN): 3813 self._retreat(index) 3814 else: 3815 expressions = [self._parse_id_var()] 3816 3817 if self._match_set(self.LAMBDAS): 3818 return self.LAMBDAS[self._prev.token_type](self, expressions) 3819 3820 self._retreat(index) 3821 3822 this: t.Optional[exp.Expression] 3823 3824 if self._match(TokenType.DISTINCT): 3825 this = self.expression( 3826 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3827 ) 3828 else: 3829 this = self._parse_select_or_expression(alias=alias) 3830 3831 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3832 3833 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3834 index = self._index 3835 3836 if not self.errors: 3837 try: 3838 if self._parse_select(nested=True): 3839 return this 3840 except ParseError: 3841 pass 3842 finally: 3843 self.errors.clear() 3844 self._retreat(index) 3845 3846 if not self._match(TokenType.L_PAREN): 3847 return this 3848 3849 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3850 3851 self._match_r_paren() 3852 return self.expression(exp.Schema, this=this, expressions=args) 3853 3854 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3855 return self._parse_column_def(self._parse_field(any_token=True)) 3856 3857 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3858 # column defs are not really columns, they're identifiers 3859 if isinstance(this, exp.Column): 3860 this = this.this 3861 3862 kind = self._parse_types(schema=True) 3863 3864 if self._match_text_seq("FOR", "ORDINALITY"): 3865 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3866 3867 constraints: t.List[exp.Expression] = [] 3868 3869 if not kind and self._match(TokenType.ALIAS): 3870 constraints.append( 3871 self.expression( 3872 exp.ComputedColumnConstraint, 3873 this=self._parse_conjunction(), 3874 persisted=self._match_text_seq("PERSISTED"), 3875 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3876 ) 3877 ) 3878 3879 while True: 3880 constraint = self._parse_column_constraint() 3881 if not constraint: 3882 break 3883 constraints.append(constraint) 3884 3885 if not kind and not constraints: 3886 return this 3887 3888 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3889 3890 def _parse_auto_increment( 3891 self, 3892 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3893 start = None 3894 increment = None 3895 3896 if self._match(TokenType.L_PAREN, advance=False): 3897 args = self._parse_wrapped_csv(self._parse_bitwise) 3898 start = seq_get(args, 0) 3899 increment = seq_get(args, 1) 3900 elif self._match_text_seq("START"): 3901 start = self._parse_bitwise() 3902 self._match_text_seq("INCREMENT") 3903 increment = self._parse_bitwise() 3904 3905 if start and increment: 3906 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3907 3908 return exp.AutoIncrementColumnConstraint() 3909 3910 def _parse_compress(self) -> exp.CompressColumnConstraint: 3911 if self._match(TokenType.L_PAREN, advance=False): 3912 return self.expression( 3913 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3914 ) 3915 3916 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3917 3918 def _parse_generated_as_identity( 3919 self, 3920 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3921 if self._match_text_seq("BY", "DEFAULT"): 3922 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3923 this = self.expression( 3924 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3925 ) 3926 else: 3927 self._match_text_seq("ALWAYS") 3928 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3929 3930 self._match(TokenType.ALIAS) 3931 identity = self._match_text_seq("IDENTITY") 3932 3933 if self._match(TokenType.L_PAREN): 3934 if self._match(TokenType.START_WITH): 3935 this.set("start", self._parse_bitwise()) 3936 if self._match_text_seq("INCREMENT", "BY"): 3937 this.set("increment", self._parse_bitwise()) 3938 if self._match_text_seq("MINVALUE"): 3939 this.set("minvalue", self._parse_bitwise()) 3940 if self._match_text_seq("MAXVALUE"): 3941 this.set("maxvalue", self._parse_bitwise()) 3942 3943 if self._match_text_seq("CYCLE"): 3944 this.set("cycle", True) 3945 elif self._match_text_seq("NO", "CYCLE"): 3946 this.set("cycle", False) 3947 3948 if not identity: 3949 this.set("expression", self._parse_bitwise()) 3950 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 3951 args = self._parse_csv(self._parse_bitwise) 3952 this.set("start", seq_get(args, 0)) 3953 this.set("increment", seq_get(args, 1)) 3954 3955 self._match_r_paren() 3956 3957 return this 3958 3959 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3960 self._match_text_seq("LENGTH") 3961 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3962 3963 def _parse_not_constraint( 3964 self, 3965 ) -> t.Optional[exp.Expression]: 3966 if self._match_text_seq("NULL"): 3967 return self.expression(exp.NotNullColumnConstraint) 3968 if self._match_text_seq("CASESPECIFIC"): 3969 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3970 if self._match_text_seq("FOR", "REPLICATION"): 3971 return self.expression(exp.NotForReplicationColumnConstraint) 3972 return None 3973 3974 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3975 if self._match(TokenType.CONSTRAINT): 3976 this = self._parse_id_var() 3977 else: 3978 this = None 3979 3980 if self._match_texts(self.CONSTRAINT_PARSERS): 3981 return self.expression( 3982 exp.ColumnConstraint, 3983 this=this, 3984 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3985 ) 3986 3987 return this 3988 3989 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3990 if not self._match(TokenType.CONSTRAINT): 3991 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3992 3993 this = self._parse_id_var() 3994 expressions = [] 3995 3996 while True: 3997 constraint = self._parse_unnamed_constraint() or self._parse_function() 3998 if not constraint: 3999 break 4000 expressions.append(constraint) 4001 4002 return self.expression(exp.Constraint, this=this, expressions=expressions) 4003 4004 def _parse_unnamed_constraint( 4005 self, constraints: t.Optional[t.Collection[str]] = None 4006 ) -> t.Optional[exp.Expression]: 4007 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4008 constraints or self.CONSTRAINT_PARSERS 4009 ): 4010 return None 4011 4012 constraint = self._prev.text.upper() 4013 if constraint not in self.CONSTRAINT_PARSERS: 4014 self.raise_error(f"No parser found for schema constraint {constraint}.") 4015 4016 return self.CONSTRAINT_PARSERS[constraint](self) 4017 4018 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4019 self._match_text_seq("KEY") 4020 return self.expression( 4021 exp.UniqueColumnConstraint, 4022 this=self._parse_schema(self._parse_id_var(any_token=False)), 4023 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4024 ) 4025 4026 def _parse_key_constraint_options(self) -> t.List[str]: 4027 options = [] 4028 while True: 4029 if not self._curr: 4030 break 4031 4032 if self._match(TokenType.ON): 4033 action = None 4034 on = self._advance_any() and self._prev.text 4035 4036 if self._match_text_seq("NO", "ACTION"): 4037 action = "NO ACTION" 4038 elif self._match_text_seq("CASCADE"): 4039 action = "CASCADE" 4040 elif self._match_text_seq("RESTRICT"): 4041 action = "RESTRICT" 4042 elif self._match_pair(TokenType.SET, TokenType.NULL): 4043 action = "SET NULL" 4044 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4045 action = "SET DEFAULT" 4046 else: 4047 self.raise_error("Invalid key constraint") 4048 4049 options.append(f"ON {on} {action}") 4050 elif self._match_text_seq("NOT", "ENFORCED"): 4051 options.append("NOT ENFORCED") 4052 elif self._match_text_seq("DEFERRABLE"): 4053 options.append("DEFERRABLE") 4054 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4055 options.append("INITIALLY DEFERRED") 4056 elif self._match_text_seq("NORELY"): 4057 options.append("NORELY") 4058 elif self._match_text_seq("MATCH", "FULL"): 4059 options.append("MATCH FULL") 4060 else: 4061 break 4062 4063 return options 4064 4065 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4066 if match and not self._match(TokenType.REFERENCES): 4067 return None 4068 4069 expressions = None 4070 this = self._parse_table(schema=True) 4071 options = self._parse_key_constraint_options() 4072 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4073 4074 def _parse_foreign_key(self) -> exp.ForeignKey: 4075 expressions = self._parse_wrapped_id_vars() 4076 reference = self._parse_references() 4077 options = {} 4078 4079 while self._match(TokenType.ON): 4080 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4081 self.raise_error("Expected DELETE or UPDATE") 4082 4083 kind = self._prev.text.lower() 4084 4085 if self._match_text_seq("NO", "ACTION"): 4086 action = "NO ACTION" 4087 elif self._match(TokenType.SET): 4088 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4089 action = "SET " + self._prev.text.upper() 4090 else: 4091 self._advance() 4092 action = self._prev.text.upper() 4093 4094 options[kind] = action 4095 4096 return self.expression( 4097 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4098 ) 4099 4100 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4101 return self._parse_field() 4102 4103 def _parse_primary_key( 4104 self, wrapped_optional: bool = False, in_props: bool = False 4105 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4106 desc = ( 4107 self._match_set((TokenType.ASC, TokenType.DESC)) 4108 and self._prev.token_type == TokenType.DESC 4109 ) 4110 4111 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4112 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4113 4114 expressions = self._parse_wrapped_csv( 4115 self._parse_primary_key_part, optional=wrapped_optional 4116 ) 4117 options = self._parse_key_constraint_options() 4118 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4119 4120 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4121 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4122 return this 4123 4124 bracket_kind = self._prev.token_type 4125 4126 if self._match(TokenType.COLON): 4127 expressions: t.List[exp.Expression] = [ 4128 self.expression(exp.Slice, expression=self._parse_conjunction()) 4129 ] 4130 else: 4131 expressions = self._parse_csv( 4132 lambda: self._parse_slice( 4133 self._parse_alias(self._parse_conjunction(), explicit=True) 4134 ) 4135 ) 4136 4137 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4138 self.raise_error("Expected ]") 4139 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4140 self.raise_error("Expected }") 4141 4142 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4143 if bracket_kind == TokenType.L_BRACE: 4144 this = self.expression(exp.Struct, expressions=expressions) 4145 elif not this or this.name.upper() == "ARRAY": 4146 this = self.expression(exp.Array, expressions=expressions) 4147 else: 4148 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4149 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4150 4151 self._add_comments(this) 4152 return self._parse_bracket(this) 4153 4154 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4155 if self._match(TokenType.COLON): 4156 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4157 return this 4158 4159 def _parse_case(self) -> t.Optional[exp.Expression]: 4160 ifs = [] 4161 default = None 4162 4163 comments = self._prev_comments 4164 expression = self._parse_conjunction() 4165 4166 while self._match(TokenType.WHEN): 4167 this = self._parse_conjunction() 4168 self._match(TokenType.THEN) 4169 then = self._parse_conjunction() 4170 ifs.append(self.expression(exp.If, this=this, true=then)) 4171 4172 if self._match(TokenType.ELSE): 4173 default = self._parse_conjunction() 4174 4175 if not self._match(TokenType.END): 4176 self.raise_error("Expected END after CASE", self._prev) 4177 4178 return self._parse_window( 4179 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4180 ) 4181 4182 def _parse_if(self) -> t.Optional[exp.Expression]: 4183 if self._match(TokenType.L_PAREN): 4184 args = self._parse_csv(self._parse_conjunction) 4185 this = self.validate_expression(exp.If.from_arg_list(args), args) 4186 self._match_r_paren() 4187 else: 4188 index = self._index - 1 4189 condition = self._parse_conjunction() 4190 4191 if not condition: 4192 self._retreat(index) 4193 return None 4194 4195 self._match(TokenType.THEN) 4196 true = self._parse_conjunction() 4197 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4198 self._match(TokenType.END) 4199 this = self.expression(exp.If, this=condition, true=true, false=false) 4200 4201 return self._parse_window(this) 4202 4203 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4204 if not self._match_text_seq("VALUE", "FOR"): 4205 self._retreat(self._index - 1) 4206 return None 4207 4208 return self.expression( 4209 exp.NextValueFor, 4210 this=self._parse_column(), 4211 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4212 ) 4213 4214 def _parse_extract(self) -> exp.Extract: 4215 this = self._parse_function() or self._parse_var() or self._parse_type() 4216 4217 if self._match(TokenType.FROM): 4218 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4219 4220 if not self._match(TokenType.COMMA): 4221 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4222 4223 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4224 4225 def _parse_any_value(self) -> exp.AnyValue: 4226 this = self._parse_lambda() 4227 is_max = None 4228 having = None 4229 4230 if self._match(TokenType.HAVING): 4231 self._match_texts(("MAX", "MIN")) 4232 is_max = self._prev.text == "MAX" 4233 having = self._parse_column() 4234 4235 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4236 4237 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4238 this = self._parse_conjunction() 4239 4240 if not self._match(TokenType.ALIAS): 4241 if self._match(TokenType.COMMA): 4242 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4243 4244 self.raise_error("Expected AS after CAST") 4245 4246 fmt = None 4247 to = self._parse_types() 4248 4249 if self._match(TokenType.FORMAT): 4250 fmt_string = self._parse_string() 4251 fmt = self._parse_at_time_zone(fmt_string) 4252 4253 if not to: 4254 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4255 if to.this in exp.DataType.TEMPORAL_TYPES: 4256 this = self.expression( 4257 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4258 this=this, 4259 format=exp.Literal.string( 4260 format_time( 4261 fmt_string.this if fmt_string else "", 4262 self.FORMAT_MAPPING or self.TIME_MAPPING, 4263 self.FORMAT_TRIE or self.TIME_TRIE, 4264 ) 4265 ), 4266 ) 4267 4268 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4269 this.set("zone", fmt.args["zone"]) 4270 return this 4271 elif not to: 4272 self.raise_error("Expected TYPE after CAST") 4273 elif isinstance(to, exp.Identifier): 4274 to = exp.DataType.build(to.name, udt=True) 4275 elif to.this == exp.DataType.Type.CHAR: 4276 if self._match(TokenType.CHARACTER_SET): 4277 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4278 4279 return self.expression( 4280 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4281 ) 4282 4283 def _parse_concat(self) -> t.Optional[exp.Expression]: 4284 args = self._parse_csv(self._parse_conjunction) 4285 if self.CONCAT_NULL_OUTPUTS_STRING: 4286 args = self._ensure_string_if_null(args) 4287 4288 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4289 # we find such a call we replace it with its argument. 4290 if len(args) == 1: 4291 return args[0] 4292 4293 return self.expression( 4294 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4295 ) 4296 4297 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4298 args = self._parse_csv(self._parse_conjunction) 4299 if len(args) < 2: 4300 return self.expression(exp.ConcatWs, expressions=args) 4301 delim, *values = args 4302 if self.CONCAT_NULL_OUTPUTS_STRING: 4303 values = self._ensure_string_if_null(values) 4304 4305 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4306 4307 def _parse_string_agg(self) -> exp.Expression: 4308 if self._match(TokenType.DISTINCT): 4309 args: t.List[t.Optional[exp.Expression]] = [ 4310 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4311 ] 4312 if self._match(TokenType.COMMA): 4313 args.extend(self._parse_csv(self._parse_conjunction)) 4314 else: 4315 args = self._parse_csv(self._parse_conjunction) # type: ignore 4316 4317 index = self._index 4318 if not self._match(TokenType.R_PAREN) and args: 4319 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4320 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4321 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4322 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4323 4324 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4325 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4326 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4327 if not self._match_text_seq("WITHIN", "GROUP"): 4328 self._retreat(index) 4329 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4330 4331 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4332 order = self._parse_order(this=seq_get(args, 0)) 4333 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4334 4335 def _parse_convert( 4336 self, strict: bool, safe: t.Optional[bool] = None 4337 ) -> t.Optional[exp.Expression]: 4338 this = self._parse_bitwise() 4339 4340 if self._match(TokenType.USING): 4341 to: t.Optional[exp.Expression] = self.expression( 4342 exp.CharacterSet, this=self._parse_var() 4343 ) 4344 elif self._match(TokenType.COMMA): 4345 to = self._parse_types() 4346 else: 4347 to = None 4348 4349 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4350 4351 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4352 """ 4353 There are generally two variants of the DECODE function: 4354 4355 - DECODE(bin, charset) 4356 - DECODE(expression, search, result [, search, result] ... [, default]) 4357 4358 The second variant will always be parsed into a CASE expression. Note that NULL 4359 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4360 instead of relying on pattern matching. 4361 """ 4362 args = self._parse_csv(self._parse_conjunction) 4363 4364 if len(args) < 3: 4365 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4366 4367 expression, *expressions = args 4368 if not expression: 4369 return None 4370 4371 ifs = [] 4372 for search, result in zip(expressions[::2], expressions[1::2]): 4373 if not search or not result: 4374 return None 4375 4376 if isinstance(search, exp.Literal): 4377 ifs.append( 4378 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4379 ) 4380 elif isinstance(search, exp.Null): 4381 ifs.append( 4382 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4383 ) 4384 else: 4385 cond = exp.or_( 4386 exp.EQ(this=expression.copy(), expression=search), 4387 exp.and_( 4388 exp.Is(this=expression.copy(), expression=exp.Null()), 4389 exp.Is(this=search.copy(), expression=exp.Null()), 4390 copy=False, 4391 ), 4392 copy=False, 4393 ) 4394 ifs.append(exp.If(this=cond, true=result)) 4395 4396 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4397 4398 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4399 self._match_text_seq("KEY") 4400 key = self._parse_column() 4401 self._match_set((TokenType.COLON, TokenType.COMMA)) 4402 self._match_text_seq("VALUE") 4403 value = self._parse_bitwise() 4404 4405 if not key and not value: 4406 return None 4407 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4408 4409 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4410 if not this or not self._match_text_seq("FORMAT", "JSON"): 4411 return this 4412 4413 return self.expression(exp.FormatJson, this=this) 4414 4415 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4416 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4417 for value in values: 4418 if self._match_text_seq(value, "ON", on): 4419 return f"{value} ON {on}" 4420 4421 return None 4422 4423 def _parse_json_object(self) -> exp.JSONObject: 4424 star = self._parse_star() 4425 expressions = ( 4426 [star] 4427 if star 4428 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4429 ) 4430 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4431 4432 unique_keys = None 4433 if self._match_text_seq("WITH", "UNIQUE"): 4434 unique_keys = True 4435 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4436 unique_keys = False 4437 4438 self._match_text_seq("KEYS") 4439 4440 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4441 self._parse_type() 4442 ) 4443 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4444 4445 return self.expression( 4446 exp.JSONObject, 4447 expressions=expressions, 4448 null_handling=null_handling, 4449 unique_keys=unique_keys, 4450 return_type=return_type, 4451 encoding=encoding, 4452 ) 4453 4454 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4455 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4456 if not self._match_text_seq("NESTED"): 4457 this = self._parse_id_var() 4458 kind = self._parse_types(allow_identifiers=False) 4459 nested = None 4460 else: 4461 this = None 4462 kind = None 4463 nested = True 4464 4465 path = self._match_text_seq("PATH") and self._parse_string() 4466 nested_schema = nested and self._parse_json_schema() 4467 4468 return self.expression( 4469 exp.JSONColumnDef, 4470 this=this, 4471 kind=kind, 4472 path=path, 4473 nested_schema=nested_schema, 4474 ) 4475 4476 def _parse_json_schema(self) -> exp.JSONSchema: 4477 self._match_text_seq("COLUMNS") 4478 return self.expression( 4479 exp.JSONSchema, 4480 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4481 ) 4482 4483 def _parse_json_table(self) -> exp.JSONTable: 4484 this = self._parse_format_json(self._parse_bitwise()) 4485 path = self._match(TokenType.COMMA) and self._parse_string() 4486 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4487 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4488 schema = self._parse_json_schema() 4489 4490 return exp.JSONTable( 4491 this=this, 4492 schema=schema, 4493 path=path, 4494 error_handling=error_handling, 4495 empty_handling=empty_handling, 4496 ) 4497 4498 def _parse_logarithm(self) -> exp.Func: 4499 # Default argument order is base, expression 4500 args = self._parse_csv(self._parse_range) 4501 4502 if len(args) > 1: 4503 if not self.LOG_BASE_FIRST: 4504 args.reverse() 4505 return exp.Log.from_arg_list(args) 4506 4507 return self.expression( 4508 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4509 ) 4510 4511 def _parse_match_against(self) -> exp.MatchAgainst: 4512 expressions = self._parse_csv(self._parse_column) 4513 4514 self._match_text_seq(")", "AGAINST", "(") 4515 4516 this = self._parse_string() 4517 4518 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4519 modifier = "IN NATURAL LANGUAGE MODE" 4520 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4521 modifier = f"{modifier} WITH QUERY EXPANSION" 4522 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4523 modifier = "IN BOOLEAN MODE" 4524 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4525 modifier = "WITH QUERY EXPANSION" 4526 else: 4527 modifier = None 4528 4529 return self.expression( 4530 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4531 ) 4532 4533 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4534 def _parse_open_json(self) -> exp.OpenJSON: 4535 this = self._parse_bitwise() 4536 path = self._match(TokenType.COMMA) and self._parse_string() 4537 4538 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4539 this = self._parse_field(any_token=True) 4540 kind = self._parse_types() 4541 path = self._parse_string() 4542 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4543 4544 return self.expression( 4545 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4546 ) 4547 4548 expressions = None 4549 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4550 self._match_l_paren() 4551 expressions = self._parse_csv(_parse_open_json_column_def) 4552 4553 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4554 4555 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4556 args = self._parse_csv(self._parse_bitwise) 4557 4558 if self._match(TokenType.IN): 4559 return self.expression( 4560 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4561 ) 4562 4563 if haystack_first: 4564 haystack = seq_get(args, 0) 4565 needle = seq_get(args, 1) 4566 else: 4567 needle = seq_get(args, 0) 4568 haystack = seq_get(args, 1) 4569 4570 return self.expression( 4571 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4572 ) 4573 4574 def _parse_predict(self) -> exp.Predict: 4575 self._match_text_seq("MODEL") 4576 this = self._parse_table() 4577 4578 self._match(TokenType.COMMA) 4579 self._match_text_seq("TABLE") 4580 4581 return self.expression( 4582 exp.Predict, 4583 this=this, 4584 expression=self._parse_table(), 4585 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4586 ) 4587 4588 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4589 args = self._parse_csv(self._parse_table) 4590 return exp.JoinHint(this=func_name.upper(), expressions=args) 4591 4592 def _parse_substring(self) -> exp.Substring: 4593 # Postgres supports the form: substring(string [from int] [for int]) 4594 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4595 4596 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4597 4598 if self._match(TokenType.FROM): 4599 args.append(self._parse_bitwise()) 4600 if self._match(TokenType.FOR): 4601 args.append(self._parse_bitwise()) 4602 4603 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4604 4605 def _parse_trim(self) -> exp.Trim: 4606 # https://www.w3resource.com/sql/character-functions/trim.php 4607 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4608 4609 position = None 4610 collation = None 4611 expression = None 4612 4613 if self._match_texts(self.TRIM_TYPES): 4614 position = self._prev.text.upper() 4615 4616 this = self._parse_bitwise() 4617 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4618 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4619 expression = self._parse_bitwise() 4620 4621 if invert_order: 4622 this, expression = expression, this 4623 4624 if self._match(TokenType.COLLATE): 4625 collation = self._parse_bitwise() 4626 4627 return self.expression( 4628 exp.Trim, this=this, position=position, expression=expression, collation=collation 4629 ) 4630 4631 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4632 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4633 4634 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4635 return self._parse_window(self._parse_id_var(), alias=True) 4636 4637 def _parse_respect_or_ignore_nulls( 4638 self, this: t.Optional[exp.Expression] 4639 ) -> t.Optional[exp.Expression]: 4640 if self._match_text_seq("IGNORE", "NULLS"): 4641 return self.expression(exp.IgnoreNulls, this=this) 4642 if self._match_text_seq("RESPECT", "NULLS"): 4643 return self.expression(exp.RespectNulls, this=this) 4644 return this 4645 4646 def _parse_window( 4647 self, this: t.Optional[exp.Expression], alias: bool = False 4648 ) -> t.Optional[exp.Expression]: 4649 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4650 self._match(TokenType.WHERE) 4651 this = self.expression( 4652 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4653 ) 4654 self._match_r_paren() 4655 4656 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4657 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4658 if self._match_text_seq("WITHIN", "GROUP"): 4659 order = self._parse_wrapped(self._parse_order) 4660 this = self.expression(exp.WithinGroup, this=this, expression=order) 4661 4662 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4663 # Some dialects choose to implement and some do not. 4664 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4665 4666 # There is some code above in _parse_lambda that handles 4667 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4668 4669 # The below changes handle 4670 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4671 4672 # Oracle allows both formats 4673 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4674 # and Snowflake chose to do the same for familiarity 4675 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4676 this = self._parse_respect_or_ignore_nulls(this) 4677 4678 # bigquery select from window x AS (partition by ...) 4679 if alias: 4680 over = None 4681 self._match(TokenType.ALIAS) 4682 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4683 return this 4684 else: 4685 over = self._prev.text.upper() 4686 4687 if not self._match(TokenType.L_PAREN): 4688 return self.expression( 4689 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4690 ) 4691 4692 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4693 4694 first = self._match(TokenType.FIRST) 4695 if self._match_text_seq("LAST"): 4696 first = False 4697 4698 partition, order = self._parse_partition_and_order() 4699 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4700 4701 if kind: 4702 self._match(TokenType.BETWEEN) 4703 start = self._parse_window_spec() 4704 self._match(TokenType.AND) 4705 end = self._parse_window_spec() 4706 4707 spec = self.expression( 4708 exp.WindowSpec, 4709 kind=kind, 4710 start=start["value"], 4711 start_side=start["side"], 4712 end=end["value"], 4713 end_side=end["side"], 4714 ) 4715 else: 4716 spec = None 4717 4718 self._match_r_paren() 4719 4720 window = self.expression( 4721 exp.Window, 4722 this=this, 4723 partition_by=partition, 4724 order=order, 4725 spec=spec, 4726 alias=window_alias, 4727 over=over, 4728 first=first, 4729 ) 4730 4731 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4732 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4733 return self._parse_window(window, alias=alias) 4734 4735 return window 4736 4737 def _parse_partition_and_order( 4738 self, 4739 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4740 return self._parse_partition_by(), self._parse_order() 4741 4742 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4743 self._match(TokenType.BETWEEN) 4744 4745 return { 4746 "value": ( 4747 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4748 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4749 or self._parse_bitwise() 4750 ), 4751 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4752 } 4753 4754 def _parse_alias( 4755 self, this: t.Optional[exp.Expression], explicit: bool = False 4756 ) -> t.Optional[exp.Expression]: 4757 any_token = self._match(TokenType.ALIAS) 4758 4759 if explicit and not any_token: 4760 return this 4761 4762 if self._match(TokenType.L_PAREN): 4763 aliases = self.expression( 4764 exp.Aliases, 4765 this=this, 4766 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4767 ) 4768 self._match_r_paren(aliases) 4769 return aliases 4770 4771 alias = self._parse_id_var(any_token) 4772 4773 if alias: 4774 return self.expression(exp.Alias, this=this, alias=alias) 4775 4776 return this 4777 4778 def _parse_id_var( 4779 self, 4780 any_token: bool = True, 4781 tokens: t.Optional[t.Collection[TokenType]] = None, 4782 ) -> t.Optional[exp.Expression]: 4783 identifier = self._parse_identifier() 4784 4785 if identifier: 4786 return identifier 4787 4788 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4789 quoted = self._prev.token_type == TokenType.STRING 4790 return exp.Identifier(this=self._prev.text, quoted=quoted) 4791 4792 return None 4793 4794 def _parse_string(self) -> t.Optional[exp.Expression]: 4795 if self._match(TokenType.STRING): 4796 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4797 return self._parse_placeholder() 4798 4799 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4800 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4801 4802 def _parse_number(self) -> t.Optional[exp.Expression]: 4803 if self._match(TokenType.NUMBER): 4804 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4805 return self._parse_placeholder() 4806 4807 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4808 if self._match(TokenType.IDENTIFIER): 4809 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4810 return self._parse_placeholder() 4811 4812 def _parse_var( 4813 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4814 ) -> t.Optional[exp.Expression]: 4815 if ( 4816 (any_token and self._advance_any()) 4817 or self._match(TokenType.VAR) 4818 or (self._match_set(tokens) if tokens else False) 4819 ): 4820 return self.expression(exp.Var, this=self._prev.text) 4821 return self._parse_placeholder() 4822 4823 def _advance_any(self) -> t.Optional[Token]: 4824 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4825 self._advance() 4826 return self._prev 4827 return None 4828 4829 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4830 return self._parse_var() or self._parse_string() 4831 4832 def _parse_null(self) -> t.Optional[exp.Expression]: 4833 if self._match_set(self.NULL_TOKENS): 4834 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4835 return self._parse_placeholder() 4836 4837 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4838 if self._match(TokenType.TRUE): 4839 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4840 if self._match(TokenType.FALSE): 4841 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4842 return self._parse_placeholder() 4843 4844 def _parse_star(self) -> t.Optional[exp.Expression]: 4845 if self._match(TokenType.STAR): 4846 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4847 return self._parse_placeholder() 4848 4849 def _parse_parameter(self) -> exp.Parameter: 4850 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4851 return ( 4852 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4853 ) 4854 4855 self._match(TokenType.L_BRACE) 4856 this = _parse_parameter_part() 4857 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4858 self._match(TokenType.R_BRACE) 4859 4860 return self.expression(exp.Parameter, this=this, expression=expression) 4861 4862 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4863 if self._match_set(self.PLACEHOLDER_PARSERS): 4864 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4865 if placeholder: 4866 return placeholder 4867 self._advance(-1) 4868 return None 4869 4870 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4871 if not self._match(TokenType.EXCEPT): 4872 return None 4873 if self._match(TokenType.L_PAREN, advance=False): 4874 return self._parse_wrapped_csv(self._parse_column) 4875 4876 except_column = self._parse_column() 4877 return [except_column] if except_column else None 4878 4879 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4880 if not self._match(TokenType.REPLACE): 4881 return None 4882 if self._match(TokenType.L_PAREN, advance=False): 4883 return self._parse_wrapped_csv(self._parse_expression) 4884 4885 replace_expression = self._parse_expression() 4886 return [replace_expression] if replace_expression else None 4887 4888 def _parse_csv( 4889 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4890 ) -> t.List[exp.Expression]: 4891 parse_result = parse_method() 4892 items = [parse_result] if parse_result is not None else [] 4893 4894 while self._match(sep): 4895 self._add_comments(parse_result) 4896 parse_result = parse_method() 4897 if parse_result is not None: 4898 items.append(parse_result) 4899 4900 return items 4901 4902 def _parse_tokens( 4903 self, parse_method: t.Callable, expressions: t.Dict 4904 ) -> t.Optional[exp.Expression]: 4905 this = parse_method() 4906 4907 while self._match_set(expressions): 4908 this = self.expression( 4909 expressions[self._prev.token_type], 4910 this=this, 4911 comments=self._prev_comments, 4912 expression=parse_method(), 4913 ) 4914 4915 return this 4916 4917 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4918 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4919 4920 def _parse_wrapped_csv( 4921 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4922 ) -> t.List[exp.Expression]: 4923 return self._parse_wrapped( 4924 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4925 ) 4926 4927 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4928 wrapped = self._match(TokenType.L_PAREN) 4929 if not wrapped and not optional: 4930 self.raise_error("Expecting (") 4931 parse_result = parse_method() 4932 if wrapped: 4933 self._match_r_paren() 4934 return parse_result 4935 4936 def _parse_expressions(self) -> t.List[exp.Expression]: 4937 return self._parse_csv(self._parse_expression) 4938 4939 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4940 return self._parse_select() or self._parse_set_operations( 4941 self._parse_expression() if alias else self._parse_conjunction() 4942 ) 4943 4944 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4945 return self._parse_query_modifiers( 4946 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4947 ) 4948 4949 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4950 this = None 4951 if self._match_texts(self.TRANSACTION_KIND): 4952 this = self._prev.text 4953 4954 self._match_texts({"TRANSACTION", "WORK"}) 4955 4956 modes = [] 4957 while True: 4958 mode = [] 4959 while self._match(TokenType.VAR): 4960 mode.append(self._prev.text) 4961 4962 if mode: 4963 modes.append(" ".join(mode)) 4964 if not self._match(TokenType.COMMA): 4965 break 4966 4967 return self.expression(exp.Transaction, this=this, modes=modes) 4968 4969 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4970 chain = None 4971 savepoint = None 4972 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4973 4974 self._match_texts({"TRANSACTION", "WORK"}) 4975 4976 if self._match_text_seq("TO"): 4977 self._match_text_seq("SAVEPOINT") 4978 savepoint = self._parse_id_var() 4979 4980 if self._match(TokenType.AND): 4981 chain = not self._match_text_seq("NO") 4982 self._match_text_seq("CHAIN") 4983 4984 if is_rollback: 4985 return self.expression(exp.Rollback, savepoint=savepoint) 4986 4987 return self.expression(exp.Commit, chain=chain) 4988 4989 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4990 if not self._match_text_seq("ADD"): 4991 return None 4992 4993 self._match(TokenType.COLUMN) 4994 exists_column = self._parse_exists(not_=True) 4995 expression = self._parse_field_def() 4996 4997 if expression: 4998 expression.set("exists", exists_column) 4999 5000 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5001 if self._match_texts(("FIRST", "AFTER")): 5002 position = self._prev.text 5003 column_position = self.expression( 5004 exp.ColumnPosition, this=self._parse_column(), position=position 5005 ) 5006 expression.set("position", column_position) 5007 5008 return expression 5009 5010 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5011 drop = self._match(TokenType.DROP) and self._parse_drop() 5012 if drop and not isinstance(drop, exp.Command): 5013 drop.set("kind", drop.args.get("kind", "COLUMN")) 5014 return drop 5015 5016 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5017 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5018 return self.expression( 5019 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5020 ) 5021 5022 def _parse_add_constraint(self) -> exp.AddConstraint: 5023 this = None 5024 kind = self._prev.token_type 5025 5026 if kind == TokenType.CONSTRAINT: 5027 this = self._parse_id_var() 5028 5029 if self._match_text_seq("CHECK"): 5030 expression = self._parse_wrapped(self._parse_conjunction) 5031 enforced = self._match_text_seq("ENFORCED") 5032 5033 return self.expression( 5034 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5035 ) 5036 5037 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5038 expression = self._parse_foreign_key() 5039 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5040 expression = self._parse_primary_key() 5041 else: 5042 expression = None 5043 5044 return self.expression(exp.AddConstraint, this=this, expression=expression) 5045 5046 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5047 index = self._index - 1 5048 5049 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5050 return self._parse_csv(self._parse_add_constraint) 5051 5052 self._retreat(index) 5053 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5054 return self._parse_csv(self._parse_field_def) 5055 5056 return self._parse_csv(self._parse_add_column) 5057 5058 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5059 self._match(TokenType.COLUMN) 5060 column = self._parse_field(any_token=True) 5061 5062 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5063 return self.expression(exp.AlterColumn, this=column, drop=True) 5064 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5065 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5066 5067 self._match_text_seq("SET", "DATA") 5068 return self.expression( 5069 exp.AlterColumn, 5070 this=column, 5071 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5072 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5073 using=self._match(TokenType.USING) and self._parse_conjunction(), 5074 ) 5075 5076 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5077 index = self._index - 1 5078 5079 partition_exists = self._parse_exists() 5080 if self._match(TokenType.PARTITION, advance=False): 5081 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5082 5083 self._retreat(index) 5084 return self._parse_csv(self._parse_drop_column) 5085 5086 def _parse_alter_table_rename(self) -> exp.RenameTable: 5087 self._match_text_seq("TO") 5088 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5089 5090 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5091 start = self._prev 5092 5093 if not self._match(TokenType.TABLE): 5094 return self._parse_as_command(start) 5095 5096 exists = self._parse_exists() 5097 only = self._match_text_seq("ONLY") 5098 this = self._parse_table(schema=True) 5099 5100 if self._next: 5101 self._advance() 5102 5103 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5104 if parser: 5105 actions = ensure_list(parser(self)) 5106 5107 if not self._curr: 5108 return self.expression( 5109 exp.AlterTable, 5110 this=this, 5111 exists=exists, 5112 actions=actions, 5113 only=only, 5114 ) 5115 5116 return self._parse_as_command(start) 5117 5118 def _parse_merge(self) -> exp.Merge: 5119 self._match(TokenType.INTO) 5120 target = self._parse_table() 5121 5122 if target and self._match(TokenType.ALIAS, advance=False): 5123 target.set("alias", self._parse_table_alias()) 5124 5125 self._match(TokenType.USING) 5126 using = self._parse_table() 5127 5128 self._match(TokenType.ON) 5129 on = self._parse_conjunction() 5130 5131 return self.expression( 5132 exp.Merge, 5133 this=target, 5134 using=using, 5135 on=on, 5136 expressions=self._parse_when_matched(), 5137 ) 5138 5139 def _parse_when_matched(self) -> t.List[exp.When]: 5140 whens = [] 5141 5142 while self._match(TokenType.WHEN): 5143 matched = not self._match(TokenType.NOT) 5144 self._match_text_seq("MATCHED") 5145 source = ( 5146 False 5147 if self._match_text_seq("BY", "TARGET") 5148 else self._match_text_seq("BY", "SOURCE") 5149 ) 5150 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5151 5152 self._match(TokenType.THEN) 5153 5154 if self._match(TokenType.INSERT): 5155 _this = self._parse_star() 5156 if _this: 5157 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5158 else: 5159 then = self.expression( 5160 exp.Insert, 5161 this=self._parse_value(), 5162 expression=self._match(TokenType.VALUES) and self._parse_value(), 5163 ) 5164 elif self._match(TokenType.UPDATE): 5165 expressions = self._parse_star() 5166 if expressions: 5167 then = self.expression(exp.Update, expressions=expressions) 5168 else: 5169 then = self.expression( 5170 exp.Update, 5171 expressions=self._match(TokenType.SET) 5172 and self._parse_csv(self._parse_equality), 5173 ) 5174 elif self._match(TokenType.DELETE): 5175 then = self.expression(exp.Var, this=self._prev.text) 5176 else: 5177 then = None 5178 5179 whens.append( 5180 self.expression( 5181 exp.When, 5182 matched=matched, 5183 source=source, 5184 condition=condition, 5185 then=then, 5186 ) 5187 ) 5188 return whens 5189 5190 def _parse_show(self) -> t.Optional[exp.Expression]: 5191 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5192 if parser: 5193 return parser(self) 5194 return self._parse_as_command(self._prev) 5195 5196 def _parse_set_item_assignment( 5197 self, kind: t.Optional[str] = None 5198 ) -> t.Optional[exp.Expression]: 5199 index = self._index 5200 5201 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5202 return self._parse_set_transaction(global_=kind == "GLOBAL") 5203 5204 left = self._parse_primary() or self._parse_id_var() 5205 assignment_delimiter = self._match_texts(("=", "TO")) 5206 5207 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5208 self._retreat(index) 5209 return None 5210 5211 right = self._parse_statement() or self._parse_id_var() 5212 this = self.expression(exp.EQ, this=left, expression=right) 5213 5214 return self.expression(exp.SetItem, this=this, kind=kind) 5215 5216 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5217 self._match_text_seq("TRANSACTION") 5218 characteristics = self._parse_csv( 5219 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5220 ) 5221 return self.expression( 5222 exp.SetItem, 5223 expressions=characteristics, 5224 kind="TRANSACTION", 5225 **{"global": global_}, # type: ignore 5226 ) 5227 5228 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5229 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5230 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5231 5232 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5233 index = self._index 5234 set_ = self.expression( 5235 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5236 ) 5237 5238 if self._curr: 5239 self._retreat(index) 5240 return self._parse_as_command(self._prev) 5241 5242 return set_ 5243 5244 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5245 for option in options: 5246 if self._match_text_seq(*option.split(" ")): 5247 return exp.var(option) 5248 return None 5249 5250 def _parse_as_command(self, start: Token) -> exp.Command: 5251 while self._curr: 5252 self._advance() 5253 text = self._find_sql(start, self._prev) 5254 size = len(start.text) 5255 return exp.Command(this=text[:size], expression=text[size:]) 5256 5257 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5258 settings = [] 5259 5260 self._match_l_paren() 5261 kind = self._parse_id_var() 5262 5263 if self._match(TokenType.L_PAREN): 5264 while True: 5265 key = self._parse_id_var() 5266 value = self._parse_primary() 5267 5268 if not key and value is None: 5269 break 5270 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5271 self._match(TokenType.R_PAREN) 5272 5273 self._match_r_paren() 5274 5275 return self.expression( 5276 exp.DictProperty, 5277 this=this, 5278 kind=kind.this if kind else None, 5279 settings=settings, 5280 ) 5281 5282 def _parse_dict_range(self, this: str) -> exp.DictRange: 5283 self._match_l_paren() 5284 has_min = self._match_text_seq("MIN") 5285 if has_min: 5286 min = self._parse_var() or self._parse_primary() 5287 self._match_text_seq("MAX") 5288 max = self._parse_var() or self._parse_primary() 5289 else: 5290 max = self._parse_var() or self._parse_primary() 5291 min = exp.Literal.number(0) 5292 self._match_r_paren() 5293 return self.expression(exp.DictRange, this=this, min=min, max=max) 5294 5295 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5296 index = self._index 5297 expression = self._parse_column() 5298 if not self._match(TokenType.IN): 5299 self._retreat(index - 1) 5300 return None 5301 iterator = self._parse_column() 5302 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5303 return self.expression( 5304 exp.Comprehension, 5305 this=this, 5306 expression=expression, 5307 iterator=iterator, 5308 condition=condition, 5309 ) 5310 5311 def _find_parser( 5312 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5313 ) -> t.Optional[t.Callable]: 5314 if not self._curr: 5315 return None 5316 5317 index = self._index 5318 this = [] 5319 while True: 5320 # The current token might be multiple words 5321 curr = self._curr.text.upper() 5322 key = curr.split(" ") 5323 this.append(curr) 5324 5325 self._advance() 5326 result, trie = in_trie(trie, key) 5327 if result == TrieResult.FAILED: 5328 break 5329 5330 if result == TrieResult.EXISTS: 5331 subparser = parsers[" ".join(this)] 5332 return subparser 5333 5334 self._retreat(index) 5335 return None 5336 5337 def _match(self, token_type, advance=True, expression=None): 5338 if not self._curr: 5339 return None 5340 5341 if self._curr.token_type == token_type: 5342 if advance: 5343 self._advance() 5344 self._add_comments(expression) 5345 return True 5346 5347 return None 5348 5349 def _match_set(self, types, advance=True): 5350 if not self._curr: 5351 return None 5352 5353 if self._curr.token_type in types: 5354 if advance: 5355 self._advance() 5356 return True 5357 5358 return None 5359 5360 def _match_pair(self, token_type_a, token_type_b, advance=True): 5361 if not self._curr or not self._next: 5362 return None 5363 5364 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5365 if advance: 5366 self._advance(2) 5367 return True 5368 5369 return None 5370 5371 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5372 if not self._match(TokenType.L_PAREN, expression=expression): 5373 self.raise_error("Expecting (") 5374 5375 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5376 if not self._match(TokenType.R_PAREN, expression=expression): 5377 self.raise_error("Expecting )") 5378 5379 def _match_texts(self, texts, advance=True): 5380 if self._curr and self._curr.text.upper() in texts: 5381 if advance: 5382 self._advance() 5383 return True 5384 return False 5385 5386 def _match_text_seq(self, *texts, advance=True): 5387 index = self._index 5388 for text in texts: 5389 if self._curr and self._curr.text.upper() == text: 5390 self._advance() 5391 else: 5392 self._retreat(index) 5393 return False 5394 5395 if not advance: 5396 self._retreat(index) 5397 5398 return True 5399 5400 @t.overload 5401 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5402 ... 5403 5404 @t.overload 5405 def _replace_columns_with_dots( 5406 self, this: t.Optional[exp.Expression] 5407 ) -> t.Optional[exp.Expression]: 5408 ... 5409 5410 def _replace_columns_with_dots(self, this): 5411 if isinstance(this, exp.Dot): 5412 exp.replace_children(this, self._replace_columns_with_dots) 5413 elif isinstance(this, exp.Column): 5414 exp.replace_children(this, self._replace_columns_with_dots) 5415 table = this.args.get("table") 5416 this = ( 5417 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5418 ) 5419 5420 return this 5421 5422 def _replace_lambda( 5423 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5424 ) -> t.Optional[exp.Expression]: 5425 if not node: 5426 return node 5427 5428 for column in node.find_all(exp.Column): 5429 if column.parts[0].name in lambda_variables: 5430 dot_or_id = column.to_dot() if column.table else column.this 5431 parent = column.parent 5432 5433 while isinstance(parent, exp.Dot): 5434 if not isinstance(parent.parent, exp.Dot): 5435 parent.replace(dot_or_id) 5436 break 5437 parent = parent.parent 5438 else: 5439 if column is node: 5440 node = dot_or_id 5441 else: 5442 column.replace(dot_or_id) 5443 return node 5444 5445 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5446 return [ 5447 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5448 for value in values 5449 if value 5450 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 *DB_CREATABLES, 253 } 254 255 # Tokens that can represent identifiers 256 ID_VAR_TOKENS = { 257 TokenType.VAR, 258 TokenType.ANTI, 259 TokenType.APPLY, 260 TokenType.ASC, 261 TokenType.AUTO_INCREMENT, 262 TokenType.BEGIN, 263 TokenType.CACHE, 264 TokenType.CASE, 265 TokenType.COLLATE, 266 TokenType.COMMAND, 267 TokenType.COMMENT, 268 TokenType.COMMIT, 269 TokenType.CONSTRAINT, 270 TokenType.DEFAULT, 271 TokenType.DELETE, 272 TokenType.DESC, 273 TokenType.DESCRIBE, 274 TokenType.DICTIONARY, 275 TokenType.DIV, 276 TokenType.END, 277 TokenType.EXECUTE, 278 TokenType.ESCAPE, 279 TokenType.FALSE, 280 TokenType.FIRST, 281 TokenType.FILTER, 282 TokenType.FORMAT, 283 TokenType.FULL, 284 TokenType.IS, 285 TokenType.ISNULL, 286 TokenType.INTERVAL, 287 TokenType.KEEP, 288 TokenType.KILL, 289 TokenType.LEFT, 290 TokenType.LOAD, 291 TokenType.MERGE, 292 TokenType.NATURAL, 293 TokenType.NEXT, 294 TokenType.OFFSET, 295 TokenType.ORDINALITY, 296 TokenType.OVERLAPS, 297 TokenType.OVERWRITE, 298 TokenType.PARTITION, 299 TokenType.PERCENT, 300 TokenType.PIVOT, 301 TokenType.PRAGMA, 302 TokenType.RANGE, 303 TokenType.REFERENCES, 304 TokenType.RIGHT, 305 TokenType.ROW, 306 TokenType.ROWS, 307 TokenType.SEMI, 308 TokenType.SET, 309 TokenType.SETTINGS, 310 TokenType.SHOW, 311 TokenType.TEMPORARY, 312 TokenType.TOP, 313 TokenType.TRUE, 314 TokenType.UNIQUE, 315 TokenType.UNPIVOT, 316 TokenType.UPDATE, 317 TokenType.USE, 318 TokenType.VOLATILE, 319 TokenType.WINDOW, 320 *CREATABLES, 321 *SUBQUERY_PREDICATES, 322 *TYPE_TOKENS, 323 *NO_PAREN_FUNCTIONS, 324 } 325 326 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 327 328 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 329 TokenType.ANTI, 330 TokenType.APPLY, 331 TokenType.ASOF, 332 TokenType.FULL, 333 TokenType.LEFT, 334 TokenType.LOCK, 335 TokenType.NATURAL, 336 TokenType.OFFSET, 337 TokenType.RIGHT, 338 TokenType.SEMI, 339 TokenType.WINDOW, 340 } 341 342 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 343 344 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 345 346 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 347 348 FUNC_TOKENS = { 349 TokenType.COLLATE, 350 TokenType.COMMAND, 351 TokenType.CURRENT_DATE, 352 TokenType.CURRENT_DATETIME, 353 TokenType.CURRENT_TIMESTAMP, 354 TokenType.CURRENT_TIME, 355 TokenType.CURRENT_USER, 356 TokenType.FILTER, 357 TokenType.FIRST, 358 TokenType.FORMAT, 359 TokenType.GLOB, 360 TokenType.IDENTIFIER, 361 TokenType.INDEX, 362 TokenType.ISNULL, 363 TokenType.ILIKE, 364 TokenType.INSERT, 365 TokenType.LIKE, 366 TokenType.MERGE, 367 TokenType.OFFSET, 368 TokenType.PRIMARY_KEY, 369 TokenType.RANGE, 370 TokenType.REPLACE, 371 TokenType.RLIKE, 372 TokenType.ROW, 373 TokenType.UNNEST, 374 TokenType.VAR, 375 TokenType.LEFT, 376 TokenType.RIGHT, 377 TokenType.DATE, 378 TokenType.DATETIME, 379 TokenType.TABLE, 380 TokenType.TIMESTAMP, 381 TokenType.TIMESTAMPTZ, 382 TokenType.WINDOW, 383 TokenType.XOR, 384 *TYPE_TOKENS, 385 *SUBQUERY_PREDICATES, 386 } 387 388 CONJUNCTION = { 389 TokenType.AND: exp.And, 390 TokenType.OR: exp.Or, 391 } 392 393 EQUALITY = { 394 TokenType.EQ: exp.EQ, 395 TokenType.NEQ: exp.NEQ, 396 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 397 } 398 399 COMPARISON = { 400 TokenType.GT: exp.GT, 401 TokenType.GTE: exp.GTE, 402 TokenType.LT: exp.LT, 403 TokenType.LTE: exp.LTE, 404 } 405 406 BITWISE = { 407 TokenType.AMP: exp.BitwiseAnd, 408 TokenType.CARET: exp.BitwiseXor, 409 TokenType.PIPE: exp.BitwiseOr, 410 TokenType.DPIPE: exp.DPipe, 411 } 412 413 TERM = { 414 TokenType.DASH: exp.Sub, 415 TokenType.PLUS: exp.Add, 416 TokenType.MOD: exp.Mod, 417 TokenType.COLLATE: exp.Collate, 418 } 419 420 FACTOR = { 421 TokenType.DIV: exp.IntDiv, 422 TokenType.LR_ARROW: exp.Distance, 423 TokenType.SLASH: exp.Div, 424 TokenType.STAR: exp.Mul, 425 } 426 427 TIMES = { 428 TokenType.TIME, 429 TokenType.TIMETZ, 430 } 431 432 TIMESTAMPS = { 433 TokenType.TIMESTAMP, 434 TokenType.TIMESTAMPTZ, 435 TokenType.TIMESTAMPLTZ, 436 *TIMES, 437 } 438 439 SET_OPERATIONS = { 440 TokenType.UNION, 441 TokenType.INTERSECT, 442 TokenType.EXCEPT, 443 } 444 445 JOIN_METHODS = { 446 TokenType.NATURAL, 447 TokenType.ASOF, 448 } 449 450 JOIN_SIDES = { 451 TokenType.LEFT, 452 TokenType.RIGHT, 453 TokenType.FULL, 454 } 455 456 JOIN_KINDS = { 457 TokenType.INNER, 458 TokenType.OUTER, 459 TokenType.CROSS, 460 TokenType.SEMI, 461 TokenType.ANTI, 462 } 463 464 JOIN_HINTS: t.Set[str] = set() 465 466 LAMBDAS = { 467 TokenType.ARROW: lambda self, expressions: self.expression( 468 exp.Lambda, 469 this=self._replace_lambda( 470 self._parse_conjunction(), 471 {node.name for node in expressions}, 472 ), 473 expressions=expressions, 474 ), 475 TokenType.FARROW: lambda self, expressions: self.expression( 476 exp.Kwarg, 477 this=exp.var(expressions[0].name), 478 expression=self._parse_conjunction(), 479 ), 480 } 481 482 COLUMN_OPERATORS = { 483 TokenType.DOT: None, 484 TokenType.DCOLON: lambda self, this, to: self.expression( 485 exp.Cast if self.STRICT_CAST else exp.TryCast, 486 this=this, 487 to=to, 488 ), 489 TokenType.ARROW: lambda self, this, path: self.expression( 490 exp.JSONExtract, 491 this=this, 492 expression=path, 493 ), 494 TokenType.DARROW: lambda self, this, path: self.expression( 495 exp.JSONExtractScalar, 496 this=this, 497 expression=path, 498 ), 499 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 500 exp.JSONBExtract, 501 this=this, 502 expression=path, 503 ), 504 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 505 exp.JSONBExtractScalar, 506 this=this, 507 expression=path, 508 ), 509 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 510 exp.JSONBContains, 511 this=this, 512 expression=key, 513 ), 514 } 515 516 EXPRESSION_PARSERS = { 517 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 518 exp.Column: lambda self: self._parse_column(), 519 exp.Condition: lambda self: self._parse_conjunction(), 520 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 521 exp.Expression: lambda self: self._parse_statement(), 522 exp.From: lambda self: self._parse_from(), 523 exp.Group: lambda self: self._parse_group(), 524 exp.Having: lambda self: self._parse_having(), 525 exp.Identifier: lambda self: self._parse_id_var(), 526 exp.Join: lambda self: self._parse_join(), 527 exp.Lambda: lambda self: self._parse_lambda(), 528 exp.Lateral: lambda self: self._parse_lateral(), 529 exp.Limit: lambda self: self._parse_limit(), 530 exp.Offset: lambda self: self._parse_offset(), 531 exp.Order: lambda self: self._parse_order(), 532 exp.Ordered: lambda self: self._parse_ordered(), 533 exp.Properties: lambda self: self._parse_properties(), 534 exp.Qualify: lambda self: self._parse_qualify(), 535 exp.Returning: lambda self: self._parse_returning(), 536 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 537 exp.Table: lambda self: self._parse_table_parts(), 538 exp.TableAlias: lambda self: self._parse_table_alias(), 539 exp.Where: lambda self: self._parse_where(), 540 exp.Window: lambda self: self._parse_named_window(), 541 exp.With: lambda self: self._parse_with(), 542 "JOIN_TYPE": lambda self: self._parse_join_parts(), 543 } 544 545 STATEMENT_PARSERS = { 546 TokenType.ALTER: lambda self: self._parse_alter(), 547 TokenType.BEGIN: lambda self: self._parse_transaction(), 548 TokenType.CACHE: lambda self: self._parse_cache(), 549 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 550 TokenType.COMMENT: lambda self: self._parse_comment(), 551 TokenType.CREATE: lambda self: self._parse_create(), 552 TokenType.DELETE: lambda self: self._parse_delete(), 553 TokenType.DESC: lambda self: self._parse_describe(), 554 TokenType.DESCRIBE: lambda self: self._parse_describe(), 555 TokenType.DROP: lambda self: self._parse_drop(), 556 TokenType.INSERT: lambda self: self._parse_insert(), 557 TokenType.KILL: lambda self: self._parse_kill(), 558 TokenType.LOAD: lambda self: self._parse_load(), 559 TokenType.MERGE: lambda self: self._parse_merge(), 560 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 561 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 562 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 563 TokenType.SET: lambda self: self._parse_set(), 564 TokenType.UNCACHE: lambda self: self._parse_uncache(), 565 TokenType.UPDATE: lambda self: self._parse_update(), 566 TokenType.USE: lambda self: self.expression( 567 exp.Use, 568 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 569 and exp.var(self._prev.text), 570 this=self._parse_table(schema=False), 571 ), 572 } 573 574 UNARY_PARSERS = { 575 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 576 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 577 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 578 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 579 } 580 581 PRIMARY_PARSERS = { 582 TokenType.STRING: lambda self, token: self.expression( 583 exp.Literal, this=token.text, is_string=True 584 ), 585 TokenType.NUMBER: lambda self, token: self.expression( 586 exp.Literal, this=token.text, is_string=False 587 ), 588 TokenType.STAR: lambda self, _: self.expression( 589 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 590 ), 591 TokenType.NULL: lambda self, _: self.expression(exp.Null), 592 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 593 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 594 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 595 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 596 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 597 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 598 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 599 exp.National, this=token.text 600 ), 601 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 602 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 603 exp.RawString, this=token.text 604 ), 605 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 606 } 607 608 PLACEHOLDER_PARSERS = { 609 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 610 TokenType.PARAMETER: lambda self: self._parse_parameter(), 611 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 612 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 613 else None, 614 } 615 616 RANGE_PARSERS = { 617 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 618 TokenType.GLOB: binary_range_parser(exp.Glob), 619 TokenType.ILIKE: binary_range_parser(exp.ILike), 620 TokenType.IN: lambda self, this: self._parse_in(this), 621 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 622 TokenType.IS: lambda self, this: self._parse_is(this), 623 TokenType.LIKE: binary_range_parser(exp.Like), 624 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 625 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 626 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 627 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 628 } 629 630 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 631 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 632 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 633 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 634 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 635 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 636 "CHECKSUM": lambda self: self._parse_checksum(), 637 "CLUSTER BY": lambda self: self._parse_cluster(), 638 "CLUSTERED": lambda self: self._parse_clustered_by(), 639 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 640 exp.CollateProperty, **kwargs 641 ), 642 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 643 "COPY": lambda self: self._parse_copy_property(), 644 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 645 "DEFINER": lambda self: self._parse_definer(), 646 "DETERMINISTIC": lambda self: self.expression( 647 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 648 ), 649 "DISTKEY": lambda self: self._parse_distkey(), 650 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 651 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 652 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 653 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 654 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 655 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 656 "FREESPACE": lambda self: self._parse_freespace(), 657 "HEAP": lambda self: self.expression(exp.HeapProperty), 658 "IMMUTABLE": lambda self: self.expression( 659 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 660 ), 661 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 662 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 663 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 664 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 665 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 666 "LIKE": lambda self: self._parse_create_like(), 667 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 668 "LOCK": lambda self: self._parse_locking(), 669 "LOCKING": lambda self: self._parse_locking(), 670 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 671 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 672 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 673 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 674 "NO": lambda self: self._parse_no_property(), 675 "ON": lambda self: self._parse_on_property(), 676 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 677 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 678 "PARTITION": lambda self: self._parse_partitioned_of(), 679 "PARTITION BY": lambda self: self._parse_partitioned_by(), 680 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 681 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 682 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 683 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 684 "REMOTE": lambda self: self._parse_remote_with_connection(), 685 "RETURNS": lambda self: self._parse_returns(), 686 "ROW": lambda self: self._parse_row(), 687 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 688 "SAMPLE": lambda self: self.expression( 689 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 690 ), 691 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 692 "SETTINGS": lambda self: self.expression( 693 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 694 ), 695 "SORTKEY": lambda self: self._parse_sortkey(), 696 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 697 "STABLE": lambda self: self.expression( 698 exp.StabilityProperty, this=exp.Literal.string("STABLE") 699 ), 700 "STORED": lambda self: self._parse_stored(), 701 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 702 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 703 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 704 "TO": lambda self: self._parse_to_table(), 705 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 706 "TRANSFORM": lambda self: self.expression( 707 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 708 ), 709 "TTL": lambda self: self._parse_ttl(), 710 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 711 "VOLATILE": lambda self: self._parse_volatile_property(), 712 "WITH": lambda self: self._parse_with_property(), 713 } 714 715 CONSTRAINT_PARSERS = { 716 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 717 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 718 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 719 "CHARACTER SET": lambda self: self.expression( 720 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 721 ), 722 "CHECK": lambda self: self.expression( 723 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 724 ), 725 "COLLATE": lambda self: self.expression( 726 exp.CollateColumnConstraint, this=self._parse_var() 727 ), 728 "COMMENT": lambda self: self.expression( 729 exp.CommentColumnConstraint, this=self._parse_string() 730 ), 731 "COMPRESS": lambda self: self._parse_compress(), 732 "CLUSTERED": lambda self: self.expression( 733 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 734 ), 735 "NONCLUSTERED": lambda self: self.expression( 736 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 737 ), 738 "DEFAULT": lambda self: self.expression( 739 exp.DefaultColumnConstraint, this=self._parse_bitwise() 740 ), 741 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 742 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 743 "FORMAT": lambda self: self.expression( 744 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 745 ), 746 "GENERATED": lambda self: self._parse_generated_as_identity(), 747 "IDENTITY": lambda self: self._parse_auto_increment(), 748 "INLINE": lambda self: self._parse_inline(), 749 "LIKE": lambda self: self._parse_create_like(), 750 "NOT": lambda self: self._parse_not_constraint(), 751 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 752 "ON": lambda self: ( 753 self._match(TokenType.UPDATE) 754 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 755 ) 756 or self.expression(exp.OnProperty, this=self._parse_id_var()), 757 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 758 "PRIMARY KEY": lambda self: self._parse_primary_key(), 759 "REFERENCES": lambda self: self._parse_references(match=False), 760 "TITLE": lambda self: self.expression( 761 exp.TitleColumnConstraint, this=self._parse_var_or_string() 762 ), 763 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 764 "UNIQUE": lambda self: self._parse_unique(), 765 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 766 "WITH": lambda self: self.expression( 767 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 768 ), 769 } 770 771 ALTER_PARSERS = { 772 "ADD": lambda self: self._parse_alter_table_add(), 773 "ALTER": lambda self: self._parse_alter_table_alter(), 774 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 775 "DROP": lambda self: self._parse_alter_table_drop(), 776 "RENAME": lambda self: self._parse_alter_table_rename(), 777 } 778 779 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 780 781 NO_PAREN_FUNCTION_PARSERS = { 782 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 783 "CASE": lambda self: self._parse_case(), 784 "IF": lambda self: self._parse_if(), 785 "NEXT": lambda self: self._parse_next_value_for(), 786 } 787 788 INVALID_FUNC_NAME_TOKENS = { 789 TokenType.IDENTIFIER, 790 TokenType.STRING, 791 } 792 793 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 794 795 FUNCTION_PARSERS = { 796 "ANY_VALUE": lambda self: self._parse_any_value(), 797 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 798 "CONCAT": lambda self: self._parse_concat(), 799 "CONCAT_WS": lambda self: self._parse_concat_ws(), 800 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 801 "DECODE": lambda self: self._parse_decode(), 802 "EXTRACT": lambda self: self._parse_extract(), 803 "JSON_OBJECT": lambda self: self._parse_json_object(), 804 "JSON_TABLE": lambda self: self._parse_json_table(), 805 "LOG": lambda self: self._parse_logarithm(), 806 "MATCH": lambda self: self._parse_match_against(), 807 "OPENJSON": lambda self: self._parse_open_json(), 808 "POSITION": lambda self: self._parse_position(), 809 "PREDICT": lambda self: self._parse_predict(), 810 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 811 "STRING_AGG": lambda self: self._parse_string_agg(), 812 "SUBSTRING": lambda self: self._parse_substring(), 813 "TRIM": lambda self: self._parse_trim(), 814 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 815 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 816 } 817 818 QUERY_MODIFIER_PARSERS = { 819 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 820 TokenType.WHERE: lambda self: ("where", self._parse_where()), 821 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 822 TokenType.HAVING: lambda self: ("having", self._parse_having()), 823 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 824 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 825 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 826 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 827 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 828 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 829 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 830 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 831 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 832 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 833 TokenType.CLUSTER_BY: lambda self: ( 834 "cluster", 835 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 836 ), 837 TokenType.DISTRIBUTE_BY: lambda self: ( 838 "distribute", 839 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 840 ), 841 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 842 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 843 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 844 } 845 846 SET_PARSERS = { 847 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 848 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 849 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 850 "TRANSACTION": lambda self: self._parse_set_transaction(), 851 } 852 853 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 854 855 TYPE_LITERAL_PARSERS = { 856 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 857 } 858 859 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 860 861 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 862 863 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 864 865 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 866 TRANSACTION_CHARACTERISTICS = { 867 "ISOLATION LEVEL REPEATABLE READ", 868 "ISOLATION LEVEL READ COMMITTED", 869 "ISOLATION LEVEL READ UNCOMMITTED", 870 "ISOLATION LEVEL SERIALIZABLE", 871 "READ WRITE", 872 "READ ONLY", 873 } 874 875 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 876 877 CLONE_KEYWORDS = {"CLONE", "COPY"} 878 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 879 880 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 881 882 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 883 884 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 885 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 886 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 887 888 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 889 890 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 891 892 DISTINCT_TOKENS = {TokenType.DISTINCT} 893 894 NULL_TOKENS = {TokenType.NULL} 895 896 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 897 898 STRICT_CAST = True 899 900 # A NULL arg in CONCAT yields NULL by default 901 CONCAT_NULL_OUTPUTS_STRING = False 902 903 PREFIXED_PIVOT_COLUMNS = False 904 IDENTIFY_PIVOT_STRINGS = False 905 906 LOG_BASE_FIRST = True 907 LOG_DEFAULTS_TO_LN = False 908 909 # Whether or not ADD is present for each column added by ALTER TABLE 910 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 911 912 # Whether or not the table sample clause expects CSV syntax 913 TABLESAMPLE_CSV = False 914 915 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 916 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 917 918 # Whether the TRIM function expects the characters to trim as its first argument 919 TRIM_PATTERN_FIRST = False 920 921 __slots__ = ( 922 "error_level", 923 "error_message_context", 924 "max_errors", 925 "sql", 926 "errors", 927 "_tokens", 928 "_index", 929 "_curr", 930 "_next", 931 "_prev", 932 "_prev_comments", 933 "_tokenizer", 934 ) 935 936 # Autofilled 937 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 938 INDEX_OFFSET: int = 0 939 UNNEST_COLUMN_ONLY: bool = False 940 ALIAS_POST_TABLESAMPLE: bool = False 941 STRICT_STRING_CONCAT = False 942 SUPPORTS_USER_DEFINED_TYPES = True 943 NORMALIZE_FUNCTIONS = "upper" 944 NULL_ORDERING: str = "nulls_are_small" 945 SHOW_TRIE: t.Dict = {} 946 SET_TRIE: t.Dict = {} 947 FORMAT_MAPPING: t.Dict[str, str] = {} 948 FORMAT_TRIE: t.Dict = {} 949 TIME_MAPPING: t.Dict[str, str] = {} 950 TIME_TRIE: t.Dict = {} 951 952 def __init__( 953 self, 954 error_level: t.Optional[ErrorLevel] = None, 955 error_message_context: int = 100, 956 max_errors: int = 3, 957 ): 958 self.error_level = error_level or ErrorLevel.IMMEDIATE 959 self.error_message_context = error_message_context 960 self.max_errors = max_errors 961 self._tokenizer = self.TOKENIZER_CLASS() 962 self.reset() 963 964 def reset(self): 965 self.sql = "" 966 self.errors = [] 967 self._tokens = [] 968 self._index = 0 969 self._curr = None 970 self._next = None 971 self._prev = None 972 self._prev_comments = None 973 974 def parse( 975 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 976 ) -> t.List[t.Optional[exp.Expression]]: 977 """ 978 Parses a list of tokens and returns a list of syntax trees, one tree 979 per parsed SQL statement. 980 981 Args: 982 raw_tokens: The list of tokens. 983 sql: The original SQL string, used to produce helpful debug messages. 984 985 Returns: 986 The list of the produced syntax trees. 987 """ 988 return self._parse( 989 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 990 ) 991 992 def parse_into( 993 self, 994 expression_types: exp.IntoType, 995 raw_tokens: t.List[Token], 996 sql: t.Optional[str] = None, 997 ) -> t.List[t.Optional[exp.Expression]]: 998 """ 999 Parses a list of tokens into a given Expression type. If a collection of Expression 1000 types is given instead, this method will try to parse the token list into each one 1001 of them, stopping at the first for which the parsing succeeds. 1002 1003 Args: 1004 expression_types: The expression type(s) to try and parse the token list into. 1005 raw_tokens: The list of tokens. 1006 sql: The original SQL string, used to produce helpful debug messages. 1007 1008 Returns: 1009 The target Expression. 1010 """ 1011 errors = [] 1012 for expression_type in ensure_list(expression_types): 1013 parser = self.EXPRESSION_PARSERS.get(expression_type) 1014 if not parser: 1015 raise TypeError(f"No parser registered for {expression_type}") 1016 1017 try: 1018 return self._parse(parser, raw_tokens, sql) 1019 except ParseError as e: 1020 e.errors[0]["into_expression"] = expression_type 1021 errors.append(e) 1022 1023 raise ParseError( 1024 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1025 errors=merge_errors(errors), 1026 ) from errors[-1] 1027 1028 def _parse( 1029 self, 1030 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1031 raw_tokens: t.List[Token], 1032 sql: t.Optional[str] = None, 1033 ) -> t.List[t.Optional[exp.Expression]]: 1034 self.reset() 1035 self.sql = sql or "" 1036 1037 total = len(raw_tokens) 1038 chunks: t.List[t.List[Token]] = [[]] 1039 1040 for i, token in enumerate(raw_tokens): 1041 if token.token_type == TokenType.SEMICOLON: 1042 if i < total - 1: 1043 chunks.append([]) 1044 else: 1045 chunks[-1].append(token) 1046 1047 expressions = [] 1048 1049 for tokens in chunks: 1050 self._index = -1 1051 self._tokens = tokens 1052 self._advance() 1053 1054 expressions.append(parse_method(self)) 1055 1056 if self._index < len(self._tokens): 1057 self.raise_error("Invalid expression / Unexpected token") 1058 1059 self.check_errors() 1060 1061 return expressions 1062 1063 def check_errors(self) -> None: 1064 """Logs or raises any found errors, depending on the chosen error level setting.""" 1065 if self.error_level == ErrorLevel.WARN: 1066 for error in self.errors: 1067 logger.error(str(error)) 1068 elif self.error_level == ErrorLevel.RAISE and self.errors: 1069 raise ParseError( 1070 concat_messages(self.errors, self.max_errors), 1071 errors=merge_errors(self.errors), 1072 ) 1073 1074 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1075 """ 1076 Appends an error in the list of recorded errors or raises it, depending on the chosen 1077 error level setting. 1078 """ 1079 token = token or self._curr or self._prev or Token.string("") 1080 start = token.start 1081 end = token.end + 1 1082 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1083 highlight = self.sql[start:end] 1084 end_context = self.sql[end : end + self.error_message_context] 1085 1086 error = ParseError.new( 1087 f"{message}. Line {token.line}, Col: {token.col}.\n" 1088 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1089 description=message, 1090 line=token.line, 1091 col=token.col, 1092 start_context=start_context, 1093 highlight=highlight, 1094 end_context=end_context, 1095 ) 1096 1097 if self.error_level == ErrorLevel.IMMEDIATE: 1098 raise error 1099 1100 self.errors.append(error) 1101 1102 def expression( 1103 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1104 ) -> E: 1105 """ 1106 Creates a new, validated Expression. 1107 1108 Args: 1109 exp_class: The expression class to instantiate. 1110 comments: An optional list of comments to attach to the expression. 1111 kwargs: The arguments to set for the expression along with their respective values. 1112 1113 Returns: 1114 The target expression. 1115 """ 1116 instance = exp_class(**kwargs) 1117 instance.add_comments(comments) if comments else self._add_comments(instance) 1118 return self.validate_expression(instance) 1119 1120 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1121 if expression and self._prev_comments: 1122 expression.add_comments(self._prev_comments) 1123 self._prev_comments = None 1124 1125 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1126 """ 1127 Validates an Expression, making sure that all its mandatory arguments are set. 1128 1129 Args: 1130 expression: The expression to validate. 1131 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1132 1133 Returns: 1134 The validated expression. 1135 """ 1136 if self.error_level != ErrorLevel.IGNORE: 1137 for error_message in expression.error_messages(args): 1138 self.raise_error(error_message) 1139 1140 return expression 1141 1142 def _find_sql(self, start: Token, end: Token) -> str: 1143 return self.sql[start.start : end.end + 1] 1144 1145 def _advance(self, times: int = 1) -> None: 1146 self._index += times 1147 self._curr = seq_get(self._tokens, self._index) 1148 self._next = seq_get(self._tokens, self._index + 1) 1149 1150 if self._index > 0: 1151 self._prev = self._tokens[self._index - 1] 1152 self._prev_comments = self._prev.comments 1153 else: 1154 self._prev = None 1155 self._prev_comments = None 1156 1157 def _retreat(self, index: int) -> None: 1158 if index != self._index: 1159 self._advance(index - self._index) 1160 1161 def _parse_command(self) -> exp.Command: 1162 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1163 1164 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1165 start = self._prev 1166 exists = self._parse_exists() if allow_exists else None 1167 1168 self._match(TokenType.ON) 1169 1170 kind = self._match_set(self.CREATABLES) and self._prev 1171 if not kind: 1172 return self._parse_as_command(start) 1173 1174 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1175 this = self._parse_user_defined_function(kind=kind.token_type) 1176 elif kind.token_type == TokenType.TABLE: 1177 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1178 elif kind.token_type == TokenType.COLUMN: 1179 this = self._parse_column() 1180 else: 1181 this = self._parse_id_var() 1182 1183 self._match(TokenType.IS) 1184 1185 return self.expression( 1186 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1187 ) 1188 1189 def _parse_to_table( 1190 self, 1191 ) -> exp.ToTableProperty: 1192 table = self._parse_table_parts(schema=True) 1193 return self.expression(exp.ToTableProperty, this=table) 1194 1195 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1196 def _parse_ttl(self) -> exp.Expression: 1197 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1198 this = self._parse_bitwise() 1199 1200 if self._match_text_seq("DELETE"): 1201 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1202 if self._match_text_seq("RECOMPRESS"): 1203 return self.expression( 1204 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1205 ) 1206 if self._match_text_seq("TO", "DISK"): 1207 return self.expression( 1208 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1209 ) 1210 if self._match_text_seq("TO", "VOLUME"): 1211 return self.expression( 1212 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1213 ) 1214 1215 return this 1216 1217 expressions = self._parse_csv(_parse_ttl_action) 1218 where = self._parse_where() 1219 group = self._parse_group() 1220 1221 aggregates = None 1222 if group and self._match(TokenType.SET): 1223 aggregates = self._parse_csv(self._parse_set_item) 1224 1225 return self.expression( 1226 exp.MergeTreeTTL, 1227 expressions=expressions, 1228 where=where, 1229 group=group, 1230 aggregates=aggregates, 1231 ) 1232 1233 def _parse_statement(self) -> t.Optional[exp.Expression]: 1234 if self._curr is None: 1235 return None 1236 1237 if self._match_set(self.STATEMENT_PARSERS): 1238 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1239 1240 if self._match_set(Tokenizer.COMMANDS): 1241 return self._parse_command() 1242 1243 expression = self._parse_expression() 1244 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1245 return self._parse_query_modifiers(expression) 1246 1247 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1248 start = self._prev 1249 temporary = self._match(TokenType.TEMPORARY) 1250 materialized = self._match_text_seq("MATERIALIZED") 1251 1252 kind = self._match_set(self.CREATABLES) and self._prev.text 1253 if not kind: 1254 return self._parse_as_command(start) 1255 1256 return self.expression( 1257 exp.Drop, 1258 comments=start.comments, 1259 exists=exists or self._parse_exists(), 1260 this=self._parse_table(schema=True), 1261 kind=kind, 1262 temporary=temporary, 1263 materialized=materialized, 1264 cascade=self._match_text_seq("CASCADE"), 1265 constraints=self._match_text_seq("CONSTRAINTS"), 1266 purge=self._match_text_seq("PURGE"), 1267 ) 1268 1269 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1270 return ( 1271 self._match_text_seq("IF") 1272 and (not not_ or self._match(TokenType.NOT)) 1273 and self._match(TokenType.EXISTS) 1274 ) 1275 1276 def _parse_create(self) -> exp.Create | exp.Command: 1277 # Note: this can't be None because we've matched a statement parser 1278 start = self._prev 1279 comments = self._prev_comments 1280 1281 replace = start.text.upper() == "REPLACE" or self._match_pair( 1282 TokenType.OR, TokenType.REPLACE 1283 ) 1284 unique = self._match(TokenType.UNIQUE) 1285 1286 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1287 self._advance() 1288 1289 properties = None 1290 create_token = self._match_set(self.CREATABLES) and self._prev 1291 1292 if not create_token: 1293 # exp.Properties.Location.POST_CREATE 1294 properties = self._parse_properties() 1295 create_token = self._match_set(self.CREATABLES) and self._prev 1296 1297 if not properties or not create_token: 1298 return self._parse_as_command(start) 1299 1300 exists = self._parse_exists(not_=True) 1301 this = None 1302 expression: t.Optional[exp.Expression] = None 1303 indexes = None 1304 no_schema_binding = None 1305 begin = None 1306 end = None 1307 clone = None 1308 1309 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1310 nonlocal properties 1311 if properties and temp_props: 1312 properties.expressions.extend(temp_props.expressions) 1313 elif temp_props: 1314 properties = temp_props 1315 1316 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1317 this = self._parse_user_defined_function(kind=create_token.token_type) 1318 1319 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1320 extend_props(self._parse_properties()) 1321 1322 self._match(TokenType.ALIAS) 1323 1324 if self._match(TokenType.COMMAND): 1325 expression = self._parse_as_command(self._prev) 1326 else: 1327 begin = self._match(TokenType.BEGIN) 1328 return_ = self._match_text_seq("RETURN") 1329 1330 if self._match(TokenType.STRING, advance=False): 1331 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1332 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1333 expression = self._parse_string() 1334 extend_props(self._parse_properties()) 1335 else: 1336 expression = self._parse_statement() 1337 1338 end = self._match_text_seq("END") 1339 1340 if return_: 1341 expression = self.expression(exp.Return, this=expression) 1342 elif create_token.token_type == TokenType.INDEX: 1343 this = self._parse_index(index=self._parse_id_var()) 1344 elif create_token.token_type in self.DB_CREATABLES: 1345 table_parts = self._parse_table_parts(schema=True) 1346 1347 # exp.Properties.Location.POST_NAME 1348 self._match(TokenType.COMMA) 1349 extend_props(self._parse_properties(before=True)) 1350 1351 this = self._parse_schema(this=table_parts) 1352 1353 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1354 extend_props(self._parse_properties()) 1355 1356 self._match(TokenType.ALIAS) 1357 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1358 # exp.Properties.Location.POST_ALIAS 1359 extend_props(self._parse_properties()) 1360 1361 expression = self._parse_ddl_select() 1362 1363 if create_token.token_type == TokenType.TABLE: 1364 # exp.Properties.Location.POST_EXPRESSION 1365 extend_props(self._parse_properties()) 1366 1367 indexes = [] 1368 while True: 1369 index = self._parse_index() 1370 1371 # exp.Properties.Location.POST_INDEX 1372 extend_props(self._parse_properties()) 1373 1374 if not index: 1375 break 1376 else: 1377 self._match(TokenType.COMMA) 1378 indexes.append(index) 1379 elif create_token.token_type == TokenType.VIEW: 1380 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1381 no_schema_binding = True 1382 1383 shallow = self._match_text_seq("SHALLOW") 1384 1385 if self._match_texts(self.CLONE_KEYWORDS): 1386 copy = self._prev.text.lower() == "copy" 1387 clone = self._parse_table(schema=True) 1388 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1389 clone_kind = ( 1390 self._match(TokenType.L_PAREN) 1391 and self._match_texts(self.CLONE_KINDS) 1392 and self._prev.text.upper() 1393 ) 1394 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1395 self._match(TokenType.R_PAREN) 1396 clone = self.expression( 1397 exp.Clone, 1398 this=clone, 1399 when=when, 1400 kind=clone_kind, 1401 shallow=shallow, 1402 expression=clone_expression, 1403 copy=copy, 1404 ) 1405 1406 return self.expression( 1407 exp.Create, 1408 comments=comments, 1409 this=this, 1410 kind=create_token.text, 1411 replace=replace, 1412 unique=unique, 1413 expression=expression, 1414 exists=exists, 1415 properties=properties, 1416 indexes=indexes, 1417 no_schema_binding=no_schema_binding, 1418 begin=begin, 1419 end=end, 1420 clone=clone, 1421 ) 1422 1423 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1424 # only used for teradata currently 1425 self._match(TokenType.COMMA) 1426 1427 kwargs = { 1428 "no": self._match_text_seq("NO"), 1429 "dual": self._match_text_seq("DUAL"), 1430 "before": self._match_text_seq("BEFORE"), 1431 "default": self._match_text_seq("DEFAULT"), 1432 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1433 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1434 "after": self._match_text_seq("AFTER"), 1435 "minimum": self._match_texts(("MIN", "MINIMUM")), 1436 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1437 } 1438 1439 if self._match_texts(self.PROPERTY_PARSERS): 1440 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1441 try: 1442 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1443 except TypeError: 1444 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1445 1446 return None 1447 1448 def _parse_property(self) -> t.Optional[exp.Expression]: 1449 if self._match_texts(self.PROPERTY_PARSERS): 1450 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1451 1452 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1453 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1454 1455 if self._match_text_seq("COMPOUND", "SORTKEY"): 1456 return self._parse_sortkey(compound=True) 1457 1458 if self._match_text_seq("SQL", "SECURITY"): 1459 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1460 1461 index = self._index 1462 key = self._parse_column() 1463 1464 if not self._match(TokenType.EQ): 1465 self._retreat(index) 1466 return None 1467 1468 return self.expression( 1469 exp.Property, 1470 this=key.to_dot() if isinstance(key, exp.Column) else key, 1471 value=self._parse_column() or self._parse_var(any_token=True), 1472 ) 1473 1474 def _parse_stored(self) -> exp.FileFormatProperty: 1475 self._match(TokenType.ALIAS) 1476 1477 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1478 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1479 1480 return self.expression( 1481 exp.FileFormatProperty, 1482 this=self.expression( 1483 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1484 ) 1485 if input_format or output_format 1486 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1487 ) 1488 1489 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1490 self._match(TokenType.EQ) 1491 self._match(TokenType.ALIAS) 1492 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1493 1494 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1495 properties = [] 1496 while True: 1497 if before: 1498 prop = self._parse_property_before() 1499 else: 1500 prop = self._parse_property() 1501 1502 if not prop: 1503 break 1504 for p in ensure_list(prop): 1505 properties.append(p) 1506 1507 if properties: 1508 return self.expression(exp.Properties, expressions=properties) 1509 1510 return None 1511 1512 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1513 return self.expression( 1514 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1515 ) 1516 1517 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1518 if self._index >= 2: 1519 pre_volatile_token = self._tokens[self._index - 2] 1520 else: 1521 pre_volatile_token = None 1522 1523 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1524 return exp.VolatileProperty() 1525 1526 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1527 1528 def _parse_with_property( 1529 self, 1530 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1531 if self._match(TokenType.L_PAREN, advance=False): 1532 return self._parse_wrapped_csv(self._parse_property) 1533 1534 if self._match_text_seq("JOURNAL"): 1535 return self._parse_withjournaltable() 1536 1537 if self._match_text_seq("DATA"): 1538 return self._parse_withdata(no=False) 1539 elif self._match_text_seq("NO", "DATA"): 1540 return self._parse_withdata(no=True) 1541 1542 if not self._next: 1543 return None 1544 1545 return self._parse_withisolatedloading() 1546 1547 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1548 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1549 self._match(TokenType.EQ) 1550 1551 user = self._parse_id_var() 1552 self._match(TokenType.PARAMETER) 1553 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1554 1555 if not user or not host: 1556 return None 1557 1558 return exp.DefinerProperty(this=f"{user}@{host}") 1559 1560 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1561 self._match(TokenType.TABLE) 1562 self._match(TokenType.EQ) 1563 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1564 1565 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1566 return self.expression(exp.LogProperty, no=no) 1567 1568 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1569 return self.expression(exp.JournalProperty, **kwargs) 1570 1571 def _parse_checksum(self) -> exp.ChecksumProperty: 1572 self._match(TokenType.EQ) 1573 1574 on = None 1575 if self._match(TokenType.ON): 1576 on = True 1577 elif self._match_text_seq("OFF"): 1578 on = False 1579 1580 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1581 1582 def _parse_cluster(self) -> exp.Cluster: 1583 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1584 1585 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1586 self._match_text_seq("BY") 1587 1588 self._match_l_paren() 1589 expressions = self._parse_csv(self._parse_column) 1590 self._match_r_paren() 1591 1592 if self._match_text_seq("SORTED", "BY"): 1593 self._match_l_paren() 1594 sorted_by = self._parse_csv(self._parse_ordered) 1595 self._match_r_paren() 1596 else: 1597 sorted_by = None 1598 1599 self._match(TokenType.INTO) 1600 buckets = self._parse_number() 1601 self._match_text_seq("BUCKETS") 1602 1603 return self.expression( 1604 exp.ClusteredByProperty, 1605 expressions=expressions, 1606 sorted_by=sorted_by, 1607 buckets=buckets, 1608 ) 1609 1610 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1611 if not self._match_text_seq("GRANTS"): 1612 self._retreat(self._index - 1) 1613 return None 1614 1615 return self.expression(exp.CopyGrantsProperty) 1616 1617 def _parse_freespace(self) -> exp.FreespaceProperty: 1618 self._match(TokenType.EQ) 1619 return self.expression( 1620 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1621 ) 1622 1623 def _parse_mergeblockratio( 1624 self, no: bool = False, default: bool = False 1625 ) -> exp.MergeBlockRatioProperty: 1626 if self._match(TokenType.EQ): 1627 return self.expression( 1628 exp.MergeBlockRatioProperty, 1629 this=self._parse_number(), 1630 percent=self._match(TokenType.PERCENT), 1631 ) 1632 1633 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1634 1635 def _parse_datablocksize( 1636 self, 1637 default: t.Optional[bool] = None, 1638 minimum: t.Optional[bool] = None, 1639 maximum: t.Optional[bool] = None, 1640 ) -> exp.DataBlocksizeProperty: 1641 self._match(TokenType.EQ) 1642 size = self._parse_number() 1643 1644 units = None 1645 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1646 units = self._prev.text 1647 1648 return self.expression( 1649 exp.DataBlocksizeProperty, 1650 size=size, 1651 units=units, 1652 default=default, 1653 minimum=minimum, 1654 maximum=maximum, 1655 ) 1656 1657 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1658 self._match(TokenType.EQ) 1659 always = self._match_text_seq("ALWAYS") 1660 manual = self._match_text_seq("MANUAL") 1661 never = self._match_text_seq("NEVER") 1662 default = self._match_text_seq("DEFAULT") 1663 1664 autotemp = None 1665 if self._match_text_seq("AUTOTEMP"): 1666 autotemp = self._parse_schema() 1667 1668 return self.expression( 1669 exp.BlockCompressionProperty, 1670 always=always, 1671 manual=manual, 1672 never=never, 1673 default=default, 1674 autotemp=autotemp, 1675 ) 1676 1677 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1678 no = self._match_text_seq("NO") 1679 concurrent = self._match_text_seq("CONCURRENT") 1680 self._match_text_seq("ISOLATED", "LOADING") 1681 for_all = self._match_text_seq("FOR", "ALL") 1682 for_insert = self._match_text_seq("FOR", "INSERT") 1683 for_none = self._match_text_seq("FOR", "NONE") 1684 return self.expression( 1685 exp.IsolatedLoadingProperty, 1686 no=no, 1687 concurrent=concurrent, 1688 for_all=for_all, 1689 for_insert=for_insert, 1690 for_none=for_none, 1691 ) 1692 1693 def _parse_locking(self) -> exp.LockingProperty: 1694 if self._match(TokenType.TABLE): 1695 kind = "TABLE" 1696 elif self._match(TokenType.VIEW): 1697 kind = "VIEW" 1698 elif self._match(TokenType.ROW): 1699 kind = "ROW" 1700 elif self._match_text_seq("DATABASE"): 1701 kind = "DATABASE" 1702 else: 1703 kind = None 1704 1705 if kind in ("DATABASE", "TABLE", "VIEW"): 1706 this = self._parse_table_parts() 1707 else: 1708 this = None 1709 1710 if self._match(TokenType.FOR): 1711 for_or_in = "FOR" 1712 elif self._match(TokenType.IN): 1713 for_or_in = "IN" 1714 else: 1715 for_or_in = None 1716 1717 if self._match_text_seq("ACCESS"): 1718 lock_type = "ACCESS" 1719 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1720 lock_type = "EXCLUSIVE" 1721 elif self._match_text_seq("SHARE"): 1722 lock_type = "SHARE" 1723 elif self._match_text_seq("READ"): 1724 lock_type = "READ" 1725 elif self._match_text_seq("WRITE"): 1726 lock_type = "WRITE" 1727 elif self._match_text_seq("CHECKSUM"): 1728 lock_type = "CHECKSUM" 1729 else: 1730 lock_type = None 1731 1732 override = self._match_text_seq("OVERRIDE") 1733 1734 return self.expression( 1735 exp.LockingProperty, 1736 this=this, 1737 kind=kind, 1738 for_or_in=for_or_in, 1739 lock_type=lock_type, 1740 override=override, 1741 ) 1742 1743 def _parse_partition_by(self) -> t.List[exp.Expression]: 1744 if self._match(TokenType.PARTITION_BY): 1745 return self._parse_csv(self._parse_conjunction) 1746 return [] 1747 1748 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1749 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1750 if self._match_text_seq("MINVALUE"): 1751 return exp.var("MINVALUE") 1752 if self._match_text_seq("MAXVALUE"): 1753 return exp.var("MAXVALUE") 1754 return self._parse_bitwise() 1755 1756 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1757 expression = None 1758 from_expressions = None 1759 to_expressions = None 1760 1761 if self._match(TokenType.IN): 1762 this = self._parse_wrapped_csv(self._parse_bitwise) 1763 elif self._match(TokenType.FROM): 1764 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1765 self._match_text_seq("TO") 1766 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1767 elif self._match_text_seq("WITH", "(", "MODULUS"): 1768 this = self._parse_number() 1769 self._match_text_seq(",", "REMAINDER") 1770 expression = self._parse_number() 1771 self._match_r_paren() 1772 else: 1773 self.raise_error("Failed to parse partition bound spec.") 1774 1775 return self.expression( 1776 exp.PartitionBoundSpec, 1777 this=this, 1778 expression=expression, 1779 from_expressions=from_expressions, 1780 to_expressions=to_expressions, 1781 ) 1782 1783 # https://www.postgresql.org/docs/current/sql-createtable.html 1784 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1785 if not self._match_text_seq("OF"): 1786 self._retreat(self._index - 1) 1787 return None 1788 1789 this = self._parse_table(schema=True) 1790 1791 if self._match(TokenType.DEFAULT): 1792 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1793 elif self._match_text_seq("FOR", "VALUES"): 1794 expression = self._parse_partition_bound_spec() 1795 else: 1796 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1797 1798 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1799 1800 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1801 self._match(TokenType.EQ) 1802 return self.expression( 1803 exp.PartitionedByProperty, 1804 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1805 ) 1806 1807 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1808 if self._match_text_seq("AND", "STATISTICS"): 1809 statistics = True 1810 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1811 statistics = False 1812 else: 1813 statistics = None 1814 1815 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1816 1817 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1818 if self._match_text_seq("PRIMARY", "INDEX"): 1819 return exp.NoPrimaryIndexProperty() 1820 return None 1821 1822 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1823 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1824 return exp.OnCommitProperty() 1825 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1826 return exp.OnCommitProperty(delete=True) 1827 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1828 1829 def _parse_distkey(self) -> exp.DistKeyProperty: 1830 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1831 1832 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1833 table = self._parse_table(schema=True) 1834 1835 options = [] 1836 while self._match_texts(("INCLUDING", "EXCLUDING")): 1837 this = self._prev.text.upper() 1838 1839 id_var = self._parse_id_var() 1840 if not id_var: 1841 return None 1842 1843 options.append( 1844 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1845 ) 1846 1847 return self.expression(exp.LikeProperty, this=table, expressions=options) 1848 1849 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1850 return self.expression( 1851 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1852 ) 1853 1854 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1855 self._match(TokenType.EQ) 1856 return self.expression( 1857 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1858 ) 1859 1860 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1861 self._match_text_seq("WITH", "CONNECTION") 1862 return self.expression( 1863 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1864 ) 1865 1866 def _parse_returns(self) -> exp.ReturnsProperty: 1867 value: t.Optional[exp.Expression] 1868 is_table = self._match(TokenType.TABLE) 1869 1870 if is_table: 1871 if self._match(TokenType.LT): 1872 value = self.expression( 1873 exp.Schema, 1874 this="TABLE", 1875 expressions=self._parse_csv(self._parse_struct_types), 1876 ) 1877 if not self._match(TokenType.GT): 1878 self.raise_error("Expecting >") 1879 else: 1880 value = self._parse_schema(exp.var("TABLE")) 1881 else: 1882 value = self._parse_types() 1883 1884 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1885 1886 def _parse_describe(self) -> exp.Describe: 1887 kind = self._match_set(self.CREATABLES) and self._prev.text 1888 this = self._parse_table(schema=True) 1889 properties = self._parse_properties() 1890 expressions = properties.expressions if properties else None 1891 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1892 1893 def _parse_insert(self) -> exp.Insert: 1894 comments = ensure_list(self._prev_comments) 1895 overwrite = self._match(TokenType.OVERWRITE) 1896 ignore = self._match(TokenType.IGNORE) 1897 local = self._match_text_seq("LOCAL") 1898 alternative = None 1899 1900 if self._match_text_seq("DIRECTORY"): 1901 this: t.Optional[exp.Expression] = self.expression( 1902 exp.Directory, 1903 this=self._parse_var_or_string(), 1904 local=local, 1905 row_format=self._parse_row_format(match_row=True), 1906 ) 1907 else: 1908 if self._match(TokenType.OR): 1909 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1910 1911 self._match(TokenType.INTO) 1912 comments += ensure_list(self._prev_comments) 1913 self._match(TokenType.TABLE) 1914 this = self._parse_table(schema=True) 1915 1916 returning = self._parse_returning() 1917 1918 return self.expression( 1919 exp.Insert, 1920 comments=comments, 1921 this=this, 1922 by_name=self._match_text_seq("BY", "NAME"), 1923 exists=self._parse_exists(), 1924 partition=self._parse_partition(), 1925 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1926 and self._parse_conjunction(), 1927 expression=self._parse_ddl_select(), 1928 conflict=self._parse_on_conflict(), 1929 returning=returning or self._parse_returning(), 1930 overwrite=overwrite, 1931 alternative=alternative, 1932 ignore=ignore, 1933 ) 1934 1935 def _parse_kill(self) -> exp.Kill: 1936 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1937 1938 return self.expression( 1939 exp.Kill, 1940 this=self._parse_primary(), 1941 kind=kind, 1942 ) 1943 1944 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1945 conflict = self._match_text_seq("ON", "CONFLICT") 1946 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1947 1948 if not conflict and not duplicate: 1949 return None 1950 1951 nothing = None 1952 expressions = None 1953 key = None 1954 constraint = None 1955 1956 if conflict: 1957 if self._match_text_seq("ON", "CONSTRAINT"): 1958 constraint = self._parse_id_var() 1959 else: 1960 key = self._parse_csv(self._parse_value) 1961 1962 self._match_text_seq("DO") 1963 if self._match_text_seq("NOTHING"): 1964 nothing = True 1965 else: 1966 self._match(TokenType.UPDATE) 1967 self._match(TokenType.SET) 1968 expressions = self._parse_csv(self._parse_equality) 1969 1970 return self.expression( 1971 exp.OnConflict, 1972 duplicate=duplicate, 1973 expressions=expressions, 1974 nothing=nothing, 1975 key=key, 1976 constraint=constraint, 1977 ) 1978 1979 def _parse_returning(self) -> t.Optional[exp.Returning]: 1980 if not self._match(TokenType.RETURNING): 1981 return None 1982 return self.expression( 1983 exp.Returning, 1984 expressions=self._parse_csv(self._parse_expression), 1985 into=self._match(TokenType.INTO) and self._parse_table_part(), 1986 ) 1987 1988 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1989 if not self._match(TokenType.FORMAT): 1990 return None 1991 return self._parse_row_format() 1992 1993 def _parse_row_format( 1994 self, match_row: bool = False 1995 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1996 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1997 return None 1998 1999 if self._match_text_seq("SERDE"): 2000 this = self._parse_string() 2001 2002 serde_properties = None 2003 if self._match(TokenType.SERDE_PROPERTIES): 2004 serde_properties = self.expression( 2005 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2006 ) 2007 2008 return self.expression( 2009 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2010 ) 2011 2012 self._match_text_seq("DELIMITED") 2013 2014 kwargs = {} 2015 2016 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2017 kwargs["fields"] = self._parse_string() 2018 if self._match_text_seq("ESCAPED", "BY"): 2019 kwargs["escaped"] = self._parse_string() 2020 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2021 kwargs["collection_items"] = self._parse_string() 2022 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2023 kwargs["map_keys"] = self._parse_string() 2024 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2025 kwargs["lines"] = self._parse_string() 2026 if self._match_text_seq("NULL", "DEFINED", "AS"): 2027 kwargs["null"] = self._parse_string() 2028 2029 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2030 2031 def _parse_load(self) -> exp.LoadData | exp.Command: 2032 if self._match_text_seq("DATA"): 2033 local = self._match_text_seq("LOCAL") 2034 self._match_text_seq("INPATH") 2035 inpath = self._parse_string() 2036 overwrite = self._match(TokenType.OVERWRITE) 2037 self._match_pair(TokenType.INTO, TokenType.TABLE) 2038 2039 return self.expression( 2040 exp.LoadData, 2041 this=self._parse_table(schema=True), 2042 local=local, 2043 overwrite=overwrite, 2044 inpath=inpath, 2045 partition=self._parse_partition(), 2046 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2047 serde=self._match_text_seq("SERDE") and self._parse_string(), 2048 ) 2049 return self._parse_as_command(self._prev) 2050 2051 def _parse_delete(self) -> exp.Delete: 2052 # This handles MySQL's "Multiple-Table Syntax" 2053 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2054 tables = None 2055 comments = self._prev_comments 2056 if not self._match(TokenType.FROM, advance=False): 2057 tables = self._parse_csv(self._parse_table) or None 2058 2059 returning = self._parse_returning() 2060 2061 return self.expression( 2062 exp.Delete, 2063 comments=comments, 2064 tables=tables, 2065 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2066 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2067 where=self._parse_where(), 2068 returning=returning or self._parse_returning(), 2069 limit=self._parse_limit(), 2070 ) 2071 2072 def _parse_update(self) -> exp.Update: 2073 comments = self._prev_comments 2074 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2075 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2076 returning = self._parse_returning() 2077 return self.expression( 2078 exp.Update, 2079 comments=comments, 2080 **{ # type: ignore 2081 "this": this, 2082 "expressions": expressions, 2083 "from": self._parse_from(joins=True), 2084 "where": self._parse_where(), 2085 "returning": returning or self._parse_returning(), 2086 "order": self._parse_order(), 2087 "limit": self._parse_limit(), 2088 }, 2089 ) 2090 2091 def _parse_uncache(self) -> exp.Uncache: 2092 if not self._match(TokenType.TABLE): 2093 self.raise_error("Expecting TABLE after UNCACHE") 2094 2095 return self.expression( 2096 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2097 ) 2098 2099 def _parse_cache(self) -> exp.Cache: 2100 lazy = self._match_text_seq("LAZY") 2101 self._match(TokenType.TABLE) 2102 table = self._parse_table(schema=True) 2103 2104 options = [] 2105 if self._match_text_seq("OPTIONS"): 2106 self._match_l_paren() 2107 k = self._parse_string() 2108 self._match(TokenType.EQ) 2109 v = self._parse_string() 2110 options = [k, v] 2111 self._match_r_paren() 2112 2113 self._match(TokenType.ALIAS) 2114 return self.expression( 2115 exp.Cache, 2116 this=table, 2117 lazy=lazy, 2118 options=options, 2119 expression=self._parse_select(nested=True), 2120 ) 2121 2122 def _parse_partition(self) -> t.Optional[exp.Partition]: 2123 if not self._match(TokenType.PARTITION): 2124 return None 2125 2126 return self.expression( 2127 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2128 ) 2129 2130 def _parse_value(self) -> exp.Tuple: 2131 if self._match(TokenType.L_PAREN): 2132 expressions = self._parse_csv(self._parse_conjunction) 2133 self._match_r_paren() 2134 return self.expression(exp.Tuple, expressions=expressions) 2135 2136 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2137 # https://prestodb.io/docs/current/sql/values.html 2138 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2139 2140 def _parse_projections(self) -> t.List[exp.Expression]: 2141 return self._parse_expressions() 2142 2143 def _parse_select( 2144 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2145 ) -> t.Optional[exp.Expression]: 2146 cte = self._parse_with() 2147 2148 if cte: 2149 this = self._parse_statement() 2150 2151 if not this: 2152 self.raise_error("Failed to parse any statement following CTE") 2153 return cte 2154 2155 if "with" in this.arg_types: 2156 this.set("with", cte) 2157 else: 2158 self.raise_error(f"{this.key} does not support CTE") 2159 this = cte 2160 2161 return this 2162 2163 # duckdb supports leading with FROM x 2164 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2165 2166 if self._match(TokenType.SELECT): 2167 comments = self._prev_comments 2168 2169 hint = self._parse_hint() 2170 all_ = self._match(TokenType.ALL) 2171 distinct = self._match_set(self.DISTINCT_TOKENS) 2172 2173 kind = ( 2174 self._match(TokenType.ALIAS) 2175 and self._match_texts(("STRUCT", "VALUE")) 2176 and self._prev.text 2177 ) 2178 2179 if distinct: 2180 distinct = self.expression( 2181 exp.Distinct, 2182 on=self._parse_value() if self._match(TokenType.ON) else None, 2183 ) 2184 2185 if all_ and distinct: 2186 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2187 2188 limit = self._parse_limit(top=True) 2189 projections = self._parse_projections() 2190 2191 this = self.expression( 2192 exp.Select, 2193 kind=kind, 2194 hint=hint, 2195 distinct=distinct, 2196 expressions=projections, 2197 limit=limit, 2198 ) 2199 this.comments = comments 2200 2201 into = self._parse_into() 2202 if into: 2203 this.set("into", into) 2204 2205 if not from_: 2206 from_ = self._parse_from() 2207 2208 if from_: 2209 this.set("from", from_) 2210 2211 this = self._parse_query_modifiers(this) 2212 elif (table or nested) and self._match(TokenType.L_PAREN): 2213 if self._match(TokenType.PIVOT): 2214 this = self._parse_simplified_pivot() 2215 elif self._match(TokenType.FROM): 2216 this = exp.select("*").from_( 2217 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2218 ) 2219 else: 2220 this = self._parse_table() if table else self._parse_select(nested=True) 2221 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2222 2223 self._match_r_paren() 2224 2225 # We return early here so that the UNION isn't attached to the subquery by the 2226 # following call to _parse_set_operations, but instead becomes the parent node 2227 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2228 elif self._match(TokenType.VALUES): 2229 this = self.expression( 2230 exp.Values, 2231 expressions=self._parse_csv(self._parse_value), 2232 alias=self._parse_table_alias(), 2233 ) 2234 elif from_: 2235 this = exp.select("*").from_(from_.this, copy=False) 2236 else: 2237 this = None 2238 2239 return self._parse_set_operations(this) 2240 2241 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2242 if not skip_with_token and not self._match(TokenType.WITH): 2243 return None 2244 2245 comments = self._prev_comments 2246 recursive = self._match(TokenType.RECURSIVE) 2247 2248 expressions = [] 2249 while True: 2250 expressions.append(self._parse_cte()) 2251 2252 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2253 break 2254 else: 2255 self._match(TokenType.WITH) 2256 2257 return self.expression( 2258 exp.With, comments=comments, expressions=expressions, recursive=recursive 2259 ) 2260 2261 def _parse_cte(self) -> exp.CTE: 2262 alias = self._parse_table_alias() 2263 if not alias or not alias.this: 2264 self.raise_error("Expected CTE to have alias") 2265 2266 self._match(TokenType.ALIAS) 2267 return self.expression( 2268 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2269 ) 2270 2271 def _parse_table_alias( 2272 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2273 ) -> t.Optional[exp.TableAlias]: 2274 any_token = self._match(TokenType.ALIAS) 2275 alias = ( 2276 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2277 or self._parse_string_as_identifier() 2278 ) 2279 2280 index = self._index 2281 if self._match(TokenType.L_PAREN): 2282 columns = self._parse_csv(self._parse_function_parameter) 2283 self._match_r_paren() if columns else self._retreat(index) 2284 else: 2285 columns = None 2286 2287 if not alias and not columns: 2288 return None 2289 2290 return self.expression(exp.TableAlias, this=alias, columns=columns) 2291 2292 def _parse_subquery( 2293 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2294 ) -> t.Optional[exp.Subquery]: 2295 if not this: 2296 return None 2297 2298 return self.expression( 2299 exp.Subquery, 2300 this=this, 2301 pivots=self._parse_pivots(), 2302 alias=self._parse_table_alias() if parse_alias else None, 2303 ) 2304 2305 def _parse_query_modifiers( 2306 self, this: t.Optional[exp.Expression] 2307 ) -> t.Optional[exp.Expression]: 2308 if isinstance(this, self.MODIFIABLES): 2309 for join in iter(self._parse_join, None): 2310 this.append("joins", join) 2311 for lateral in iter(self._parse_lateral, None): 2312 this.append("laterals", lateral) 2313 2314 while True: 2315 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2316 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2317 key, expression = parser(self) 2318 2319 if expression: 2320 this.set(key, expression) 2321 if key == "limit": 2322 offset = expression.args.pop("offset", None) 2323 if offset: 2324 this.set("offset", exp.Offset(expression=offset)) 2325 continue 2326 break 2327 return this 2328 2329 def _parse_hint(self) -> t.Optional[exp.Hint]: 2330 if self._match(TokenType.HINT): 2331 hints = [] 2332 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2333 hints.extend(hint) 2334 2335 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2336 self.raise_error("Expected */ after HINT") 2337 2338 return self.expression(exp.Hint, expressions=hints) 2339 2340 return None 2341 2342 def _parse_into(self) -> t.Optional[exp.Into]: 2343 if not self._match(TokenType.INTO): 2344 return None 2345 2346 temp = self._match(TokenType.TEMPORARY) 2347 unlogged = self._match_text_seq("UNLOGGED") 2348 self._match(TokenType.TABLE) 2349 2350 return self.expression( 2351 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2352 ) 2353 2354 def _parse_from( 2355 self, joins: bool = False, skip_from_token: bool = False 2356 ) -> t.Optional[exp.From]: 2357 if not skip_from_token and not self._match(TokenType.FROM): 2358 return None 2359 2360 return self.expression( 2361 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2362 ) 2363 2364 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2365 if not self._match(TokenType.MATCH_RECOGNIZE): 2366 return None 2367 2368 self._match_l_paren() 2369 2370 partition = self._parse_partition_by() 2371 order = self._parse_order() 2372 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2373 2374 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2375 rows = exp.var("ONE ROW PER MATCH") 2376 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2377 text = "ALL ROWS PER MATCH" 2378 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2379 text += f" SHOW EMPTY MATCHES" 2380 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2381 text += f" OMIT EMPTY MATCHES" 2382 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2383 text += f" WITH UNMATCHED ROWS" 2384 rows = exp.var(text) 2385 else: 2386 rows = None 2387 2388 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2389 text = "AFTER MATCH SKIP" 2390 if self._match_text_seq("PAST", "LAST", "ROW"): 2391 text += f" PAST LAST ROW" 2392 elif self._match_text_seq("TO", "NEXT", "ROW"): 2393 text += f" TO NEXT ROW" 2394 elif self._match_text_seq("TO", "FIRST"): 2395 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2396 elif self._match_text_seq("TO", "LAST"): 2397 text += f" TO LAST {self._advance_any().text}" # type: ignore 2398 after = exp.var(text) 2399 else: 2400 after = None 2401 2402 if self._match_text_seq("PATTERN"): 2403 self._match_l_paren() 2404 2405 if not self._curr: 2406 self.raise_error("Expecting )", self._curr) 2407 2408 paren = 1 2409 start = self._curr 2410 2411 while self._curr and paren > 0: 2412 if self._curr.token_type == TokenType.L_PAREN: 2413 paren += 1 2414 if self._curr.token_type == TokenType.R_PAREN: 2415 paren -= 1 2416 2417 end = self._prev 2418 self._advance() 2419 2420 if paren > 0: 2421 self.raise_error("Expecting )", self._curr) 2422 2423 pattern = exp.var(self._find_sql(start, end)) 2424 else: 2425 pattern = None 2426 2427 define = ( 2428 self._parse_csv( 2429 lambda: self.expression( 2430 exp.Alias, 2431 alias=self._parse_id_var(any_token=True), 2432 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2433 ) 2434 ) 2435 if self._match_text_seq("DEFINE") 2436 else None 2437 ) 2438 2439 self._match_r_paren() 2440 2441 return self.expression( 2442 exp.MatchRecognize, 2443 partition_by=partition, 2444 order=order, 2445 measures=measures, 2446 rows=rows, 2447 after=after, 2448 pattern=pattern, 2449 define=define, 2450 alias=self._parse_table_alias(), 2451 ) 2452 2453 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2454 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2455 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2456 2457 if outer_apply or cross_apply: 2458 this = self._parse_select(table=True) 2459 view = None 2460 outer = not cross_apply 2461 elif self._match(TokenType.LATERAL): 2462 this = self._parse_select(table=True) 2463 view = self._match(TokenType.VIEW) 2464 outer = self._match(TokenType.OUTER) 2465 else: 2466 return None 2467 2468 if not this: 2469 this = ( 2470 self._parse_unnest() 2471 or self._parse_function() 2472 or self._parse_id_var(any_token=False) 2473 ) 2474 2475 while self._match(TokenType.DOT): 2476 this = exp.Dot( 2477 this=this, 2478 expression=self._parse_function() or self._parse_id_var(any_token=False), 2479 ) 2480 2481 if view: 2482 table = self._parse_id_var(any_token=False) 2483 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2484 table_alias: t.Optional[exp.TableAlias] = self.expression( 2485 exp.TableAlias, this=table, columns=columns 2486 ) 2487 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2488 # We move the alias from the lateral's child node to the lateral itself 2489 table_alias = this.args["alias"].pop() 2490 else: 2491 table_alias = self._parse_table_alias() 2492 2493 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2494 2495 def _parse_join_parts( 2496 self, 2497 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2498 return ( 2499 self._match_set(self.JOIN_METHODS) and self._prev, 2500 self._match_set(self.JOIN_SIDES) and self._prev, 2501 self._match_set(self.JOIN_KINDS) and self._prev, 2502 ) 2503 2504 def _parse_join( 2505 self, skip_join_token: bool = False, parse_bracket: bool = False 2506 ) -> t.Optional[exp.Join]: 2507 if self._match(TokenType.COMMA): 2508 return self.expression(exp.Join, this=self._parse_table()) 2509 2510 index = self._index 2511 method, side, kind = self._parse_join_parts() 2512 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2513 join = self._match(TokenType.JOIN) 2514 2515 if not skip_join_token and not join: 2516 self._retreat(index) 2517 kind = None 2518 method = None 2519 side = None 2520 2521 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2522 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2523 2524 if not skip_join_token and not join and not outer_apply and not cross_apply: 2525 return None 2526 2527 if outer_apply: 2528 side = Token(TokenType.LEFT, "LEFT") 2529 2530 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2531 2532 if method: 2533 kwargs["method"] = method.text 2534 if side: 2535 kwargs["side"] = side.text 2536 if kind: 2537 kwargs["kind"] = kind.text 2538 if hint: 2539 kwargs["hint"] = hint 2540 2541 if self._match(TokenType.ON): 2542 kwargs["on"] = self._parse_conjunction() 2543 elif self._match(TokenType.USING): 2544 kwargs["using"] = self._parse_wrapped_id_vars() 2545 elif not (kind and kind.token_type == TokenType.CROSS): 2546 index = self._index 2547 join = self._parse_join() 2548 2549 if join and self._match(TokenType.ON): 2550 kwargs["on"] = self._parse_conjunction() 2551 elif join and self._match(TokenType.USING): 2552 kwargs["using"] = self._parse_wrapped_id_vars() 2553 else: 2554 join = None 2555 self._retreat(index) 2556 2557 kwargs["this"].set("joins", [join] if join else None) 2558 2559 comments = [c for token in (method, side, kind) if token for c in token.comments] 2560 return self.expression(exp.Join, comments=comments, **kwargs) 2561 2562 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2563 this = self._parse_conjunction() 2564 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2565 return this 2566 2567 opclass = self._parse_var(any_token=True) 2568 if opclass: 2569 return self.expression(exp.Opclass, this=this, expression=opclass) 2570 2571 return this 2572 2573 def _parse_index( 2574 self, 2575 index: t.Optional[exp.Expression] = None, 2576 ) -> t.Optional[exp.Index]: 2577 if index: 2578 unique = None 2579 primary = None 2580 amp = None 2581 2582 self._match(TokenType.ON) 2583 self._match(TokenType.TABLE) # hive 2584 table = self._parse_table_parts(schema=True) 2585 else: 2586 unique = self._match(TokenType.UNIQUE) 2587 primary = self._match_text_seq("PRIMARY") 2588 amp = self._match_text_seq("AMP") 2589 2590 if not self._match(TokenType.INDEX): 2591 return None 2592 2593 index = self._parse_id_var() 2594 table = None 2595 2596 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2597 2598 if self._match(TokenType.L_PAREN, advance=False): 2599 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2600 else: 2601 columns = None 2602 2603 return self.expression( 2604 exp.Index, 2605 this=index, 2606 table=table, 2607 using=using, 2608 columns=columns, 2609 unique=unique, 2610 primary=primary, 2611 amp=amp, 2612 partition_by=self._parse_partition_by(), 2613 where=self._parse_where(), 2614 ) 2615 2616 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2617 hints: t.List[exp.Expression] = [] 2618 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2619 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2620 hints.append( 2621 self.expression( 2622 exp.WithTableHint, 2623 expressions=self._parse_csv( 2624 lambda: self._parse_function() or self._parse_var(any_token=True) 2625 ), 2626 ) 2627 ) 2628 self._match_r_paren() 2629 else: 2630 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2631 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2632 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2633 2634 self._match_texts({"INDEX", "KEY"}) 2635 if self._match(TokenType.FOR): 2636 hint.set("target", self._advance_any() and self._prev.text.upper()) 2637 2638 hint.set("expressions", self._parse_wrapped_id_vars()) 2639 hints.append(hint) 2640 2641 return hints or None 2642 2643 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2644 return ( 2645 (not schema and self._parse_function(optional_parens=False)) 2646 or self._parse_id_var(any_token=False) 2647 or self._parse_string_as_identifier() 2648 or self._parse_placeholder() 2649 ) 2650 2651 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2652 catalog = None 2653 db = None 2654 table = self._parse_table_part(schema=schema) 2655 2656 while self._match(TokenType.DOT): 2657 if catalog: 2658 # This allows nesting the table in arbitrarily many dot expressions if needed 2659 table = self.expression( 2660 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2661 ) 2662 else: 2663 catalog = db 2664 db = table 2665 table = self._parse_table_part(schema=schema) 2666 2667 if not table: 2668 self.raise_error(f"Expected table name but got {self._curr}") 2669 2670 return self.expression( 2671 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2672 ) 2673 2674 def _parse_table( 2675 self, 2676 schema: bool = False, 2677 joins: bool = False, 2678 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2679 parse_bracket: bool = False, 2680 ) -> t.Optional[exp.Expression]: 2681 lateral = self._parse_lateral() 2682 if lateral: 2683 return lateral 2684 2685 unnest = self._parse_unnest() 2686 if unnest: 2687 return unnest 2688 2689 values = self._parse_derived_table_values() 2690 if values: 2691 return values 2692 2693 subquery = self._parse_select(table=True) 2694 if subquery: 2695 if not subquery.args.get("pivots"): 2696 subquery.set("pivots", self._parse_pivots()) 2697 return subquery 2698 2699 bracket = parse_bracket and self._parse_bracket(None) 2700 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2701 this = t.cast( 2702 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2703 ) 2704 2705 if schema: 2706 return self._parse_schema(this=this) 2707 2708 version = self._parse_version() 2709 2710 if version: 2711 this.set("version", version) 2712 2713 if self.ALIAS_POST_TABLESAMPLE: 2714 table_sample = self._parse_table_sample() 2715 2716 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2717 if alias: 2718 this.set("alias", alias) 2719 2720 if self._match_text_seq("AT"): 2721 this.set("index", self._parse_id_var()) 2722 2723 this.set("hints", self._parse_table_hints()) 2724 2725 if not this.args.get("pivots"): 2726 this.set("pivots", self._parse_pivots()) 2727 2728 if not self.ALIAS_POST_TABLESAMPLE: 2729 table_sample = self._parse_table_sample() 2730 2731 if table_sample: 2732 table_sample.set("this", this) 2733 this = table_sample 2734 2735 if joins: 2736 for join in iter(self._parse_join, None): 2737 this.append("joins", join) 2738 2739 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2740 this.set("ordinality", True) 2741 this.set("alias", self._parse_table_alias()) 2742 2743 return this 2744 2745 def _parse_version(self) -> t.Optional[exp.Version]: 2746 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2747 this = "TIMESTAMP" 2748 elif self._match(TokenType.VERSION_SNAPSHOT): 2749 this = "VERSION" 2750 else: 2751 return None 2752 2753 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2754 kind = self._prev.text.upper() 2755 start = self._parse_bitwise() 2756 self._match_texts(("TO", "AND")) 2757 end = self._parse_bitwise() 2758 expression: t.Optional[exp.Expression] = self.expression( 2759 exp.Tuple, expressions=[start, end] 2760 ) 2761 elif self._match_text_seq("CONTAINED", "IN"): 2762 kind = "CONTAINED IN" 2763 expression = self.expression( 2764 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2765 ) 2766 elif self._match(TokenType.ALL): 2767 kind = "ALL" 2768 expression = None 2769 else: 2770 self._match_text_seq("AS", "OF") 2771 kind = "AS OF" 2772 expression = self._parse_type() 2773 2774 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2775 2776 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2777 if not self._match(TokenType.UNNEST): 2778 return None 2779 2780 expressions = self._parse_wrapped_csv(self._parse_type) 2781 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2782 2783 alias = self._parse_table_alias() if with_alias else None 2784 2785 if alias: 2786 if self.UNNEST_COLUMN_ONLY: 2787 if alias.args.get("columns"): 2788 self.raise_error("Unexpected extra column alias in unnest.") 2789 2790 alias.set("columns", [alias.this]) 2791 alias.set("this", None) 2792 2793 columns = alias.args.get("columns") or [] 2794 if offset and len(expressions) < len(columns): 2795 offset = columns.pop() 2796 2797 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2798 self._match(TokenType.ALIAS) 2799 offset = self._parse_id_var( 2800 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2801 ) or exp.to_identifier("offset") 2802 2803 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2804 2805 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2806 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2807 if not is_derived and not self._match(TokenType.VALUES): 2808 return None 2809 2810 expressions = self._parse_csv(self._parse_value) 2811 alias = self._parse_table_alias() 2812 2813 if is_derived: 2814 self._match_r_paren() 2815 2816 return self.expression( 2817 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2818 ) 2819 2820 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2821 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2822 as_modifier and self._match_text_seq("USING", "SAMPLE") 2823 ): 2824 return None 2825 2826 bucket_numerator = None 2827 bucket_denominator = None 2828 bucket_field = None 2829 percent = None 2830 rows = None 2831 size = None 2832 seed = None 2833 2834 kind = ( 2835 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2836 ) 2837 method = self._parse_var(tokens=(TokenType.ROW,)) 2838 2839 matched_l_paren = self._match(TokenType.L_PAREN) 2840 2841 if self.TABLESAMPLE_CSV: 2842 num = None 2843 expressions = self._parse_csv(self._parse_primary) 2844 else: 2845 expressions = None 2846 num = ( 2847 self._parse_factor() 2848 if self._match(TokenType.NUMBER, advance=False) 2849 else self._parse_primary() 2850 ) 2851 2852 if self._match_text_seq("BUCKET"): 2853 bucket_numerator = self._parse_number() 2854 self._match_text_seq("OUT", "OF") 2855 bucket_denominator = bucket_denominator = self._parse_number() 2856 self._match(TokenType.ON) 2857 bucket_field = self._parse_field() 2858 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2859 percent = num 2860 elif self._match(TokenType.ROWS): 2861 rows = num 2862 elif num: 2863 size = num 2864 2865 if matched_l_paren: 2866 self._match_r_paren() 2867 2868 if self._match(TokenType.L_PAREN): 2869 method = self._parse_var() 2870 seed = self._match(TokenType.COMMA) and self._parse_number() 2871 self._match_r_paren() 2872 elif self._match_texts(("SEED", "REPEATABLE")): 2873 seed = self._parse_wrapped(self._parse_number) 2874 2875 return self.expression( 2876 exp.TableSample, 2877 expressions=expressions, 2878 method=method, 2879 bucket_numerator=bucket_numerator, 2880 bucket_denominator=bucket_denominator, 2881 bucket_field=bucket_field, 2882 percent=percent, 2883 rows=rows, 2884 size=size, 2885 seed=seed, 2886 kind=kind, 2887 ) 2888 2889 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2890 return list(iter(self._parse_pivot, None)) or None 2891 2892 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2893 return list(iter(self._parse_join, None)) or None 2894 2895 # https://duckdb.org/docs/sql/statements/pivot 2896 def _parse_simplified_pivot(self) -> exp.Pivot: 2897 def _parse_on() -> t.Optional[exp.Expression]: 2898 this = self._parse_bitwise() 2899 return self._parse_in(this) if self._match(TokenType.IN) else this 2900 2901 this = self._parse_table() 2902 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2903 using = self._match(TokenType.USING) and self._parse_csv( 2904 lambda: self._parse_alias(self._parse_function()) 2905 ) 2906 group = self._parse_group() 2907 return self.expression( 2908 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2909 ) 2910 2911 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2912 index = self._index 2913 include_nulls = None 2914 2915 if self._match(TokenType.PIVOT): 2916 unpivot = False 2917 elif self._match(TokenType.UNPIVOT): 2918 unpivot = True 2919 2920 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2921 if self._match_text_seq("INCLUDE", "NULLS"): 2922 include_nulls = True 2923 elif self._match_text_seq("EXCLUDE", "NULLS"): 2924 include_nulls = False 2925 else: 2926 return None 2927 2928 expressions = [] 2929 field = None 2930 2931 if not self._match(TokenType.L_PAREN): 2932 self._retreat(index) 2933 return None 2934 2935 if unpivot: 2936 expressions = self._parse_csv(self._parse_column) 2937 else: 2938 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2939 2940 if not expressions: 2941 self.raise_error("Failed to parse PIVOT's aggregation list") 2942 2943 if not self._match(TokenType.FOR): 2944 self.raise_error("Expecting FOR") 2945 2946 value = self._parse_column() 2947 2948 if not self._match(TokenType.IN): 2949 self.raise_error("Expecting IN") 2950 2951 field = self._parse_in(value, alias=True) 2952 2953 self._match_r_paren() 2954 2955 pivot = self.expression( 2956 exp.Pivot, 2957 expressions=expressions, 2958 field=field, 2959 unpivot=unpivot, 2960 include_nulls=include_nulls, 2961 ) 2962 2963 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2964 pivot.set("alias", self._parse_table_alias()) 2965 2966 if not unpivot: 2967 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2968 2969 columns: t.List[exp.Expression] = [] 2970 for fld in pivot.args["field"].expressions: 2971 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2972 for name in names: 2973 if self.PREFIXED_PIVOT_COLUMNS: 2974 name = f"{name}_{field_name}" if name else field_name 2975 else: 2976 name = f"{field_name}_{name}" if name else field_name 2977 2978 columns.append(exp.to_identifier(name)) 2979 2980 pivot.set("columns", columns) 2981 2982 return pivot 2983 2984 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2985 return [agg.alias for agg in aggregations] 2986 2987 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2988 if not skip_where_token and not self._match(TokenType.WHERE): 2989 return None 2990 2991 return self.expression( 2992 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2993 ) 2994 2995 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2996 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2997 return None 2998 2999 elements = defaultdict(list) 3000 3001 if self._match(TokenType.ALL): 3002 return self.expression(exp.Group, all=True) 3003 3004 while True: 3005 expressions = self._parse_csv(self._parse_conjunction) 3006 if expressions: 3007 elements["expressions"].extend(expressions) 3008 3009 grouping_sets = self._parse_grouping_sets() 3010 if grouping_sets: 3011 elements["grouping_sets"].extend(grouping_sets) 3012 3013 rollup = None 3014 cube = None 3015 totals = None 3016 3017 index = self._index 3018 with_ = self._match(TokenType.WITH) 3019 if self._match(TokenType.ROLLUP): 3020 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3021 elements["rollup"].extend(ensure_list(rollup)) 3022 3023 if self._match(TokenType.CUBE): 3024 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3025 elements["cube"].extend(ensure_list(cube)) 3026 3027 if self._match_text_seq("TOTALS"): 3028 totals = True 3029 elements["totals"] = True # type: ignore 3030 3031 if not (grouping_sets or rollup or cube or totals): 3032 if with_: 3033 self._retreat(index) 3034 break 3035 3036 return self.expression(exp.Group, **elements) # type: ignore 3037 3038 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3039 if not self._match(TokenType.GROUPING_SETS): 3040 return None 3041 3042 return self._parse_wrapped_csv(self._parse_grouping_set) 3043 3044 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3045 if self._match(TokenType.L_PAREN): 3046 grouping_set = self._parse_csv(self._parse_column) 3047 self._match_r_paren() 3048 return self.expression(exp.Tuple, expressions=grouping_set) 3049 3050 return self._parse_column() 3051 3052 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3053 if not skip_having_token and not self._match(TokenType.HAVING): 3054 return None 3055 return self.expression(exp.Having, this=self._parse_conjunction()) 3056 3057 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3058 if not self._match(TokenType.QUALIFY): 3059 return None 3060 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3061 3062 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3063 if skip_start_token: 3064 start = None 3065 elif self._match(TokenType.START_WITH): 3066 start = self._parse_conjunction() 3067 else: 3068 return None 3069 3070 self._match(TokenType.CONNECT_BY) 3071 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3072 exp.Prior, this=self._parse_bitwise() 3073 ) 3074 connect = self._parse_conjunction() 3075 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3076 3077 if not start and self._match(TokenType.START_WITH): 3078 start = self._parse_conjunction() 3079 3080 return self.expression(exp.Connect, start=start, connect=connect) 3081 3082 def _parse_order( 3083 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3084 ) -> t.Optional[exp.Expression]: 3085 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3086 return this 3087 3088 return self.expression( 3089 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3090 ) 3091 3092 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3093 if not self._match(token): 3094 return None 3095 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3096 3097 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3098 this = parse_method() if parse_method else self._parse_conjunction() 3099 3100 asc = self._match(TokenType.ASC) 3101 desc = self._match(TokenType.DESC) or (asc and False) 3102 3103 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3104 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3105 3106 nulls_first = is_nulls_first or False 3107 explicitly_null_ordered = is_nulls_first or is_nulls_last 3108 3109 if ( 3110 not explicitly_null_ordered 3111 and ( 3112 (not desc and self.NULL_ORDERING == "nulls_are_small") 3113 or (desc and self.NULL_ORDERING != "nulls_are_small") 3114 ) 3115 and self.NULL_ORDERING != "nulls_are_last" 3116 ): 3117 nulls_first = True 3118 3119 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3120 3121 def _parse_limit( 3122 self, this: t.Optional[exp.Expression] = None, top: bool = False 3123 ) -> t.Optional[exp.Expression]: 3124 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3125 comments = self._prev_comments 3126 if top: 3127 limit_paren = self._match(TokenType.L_PAREN) 3128 expression = self._parse_number() 3129 3130 if limit_paren: 3131 self._match_r_paren() 3132 else: 3133 expression = self._parse_term() 3134 3135 if self._match(TokenType.COMMA): 3136 offset = expression 3137 expression = self._parse_term() 3138 else: 3139 offset = None 3140 3141 limit_exp = self.expression( 3142 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3143 ) 3144 3145 return limit_exp 3146 3147 if self._match(TokenType.FETCH): 3148 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3149 direction = self._prev.text if direction else "FIRST" 3150 3151 count = self._parse_field(tokens=self.FETCH_TOKENS) 3152 percent = self._match(TokenType.PERCENT) 3153 3154 self._match_set((TokenType.ROW, TokenType.ROWS)) 3155 3156 only = self._match_text_seq("ONLY") 3157 with_ties = self._match_text_seq("WITH", "TIES") 3158 3159 if only and with_ties: 3160 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3161 3162 return self.expression( 3163 exp.Fetch, 3164 direction=direction, 3165 count=count, 3166 percent=percent, 3167 with_ties=with_ties, 3168 ) 3169 3170 return this 3171 3172 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3173 if not self._match(TokenType.OFFSET): 3174 return this 3175 3176 count = self._parse_term() 3177 self._match_set((TokenType.ROW, TokenType.ROWS)) 3178 return self.expression(exp.Offset, this=this, expression=count) 3179 3180 def _parse_locks(self) -> t.List[exp.Lock]: 3181 locks = [] 3182 while True: 3183 if self._match_text_seq("FOR", "UPDATE"): 3184 update = True 3185 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3186 "LOCK", "IN", "SHARE", "MODE" 3187 ): 3188 update = False 3189 else: 3190 break 3191 3192 expressions = None 3193 if self._match_text_seq("OF"): 3194 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3195 3196 wait: t.Optional[bool | exp.Expression] = None 3197 if self._match_text_seq("NOWAIT"): 3198 wait = True 3199 elif self._match_text_seq("WAIT"): 3200 wait = self._parse_primary() 3201 elif self._match_text_seq("SKIP", "LOCKED"): 3202 wait = False 3203 3204 locks.append( 3205 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3206 ) 3207 3208 return locks 3209 3210 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3211 if not self._match_set(self.SET_OPERATIONS): 3212 return this 3213 3214 token_type = self._prev.token_type 3215 3216 if token_type == TokenType.UNION: 3217 expression = exp.Union 3218 elif token_type == TokenType.EXCEPT: 3219 expression = exp.Except 3220 else: 3221 expression = exp.Intersect 3222 3223 return self.expression( 3224 expression, 3225 comments=self._prev.comments, 3226 this=this, 3227 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3228 by_name=self._match_text_seq("BY", "NAME"), 3229 expression=self._parse_set_operations(self._parse_select(nested=True)), 3230 ) 3231 3232 def _parse_expression(self) -> t.Optional[exp.Expression]: 3233 return self._parse_alias(self._parse_conjunction()) 3234 3235 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3236 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3237 3238 def _parse_equality(self) -> t.Optional[exp.Expression]: 3239 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3240 3241 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3242 return self._parse_tokens(self._parse_range, self.COMPARISON) 3243 3244 def _parse_range(self) -> t.Optional[exp.Expression]: 3245 this = self._parse_bitwise() 3246 negate = self._match(TokenType.NOT) 3247 3248 if self._match_set(self.RANGE_PARSERS): 3249 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3250 if not expression: 3251 return this 3252 3253 this = expression 3254 elif self._match(TokenType.ISNULL): 3255 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3256 3257 # Postgres supports ISNULL and NOTNULL for conditions. 3258 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3259 if self._match(TokenType.NOTNULL): 3260 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3261 this = self.expression(exp.Not, this=this) 3262 3263 if negate: 3264 this = self.expression(exp.Not, this=this) 3265 3266 if self._match(TokenType.IS): 3267 this = self._parse_is(this) 3268 3269 return this 3270 3271 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3272 index = self._index - 1 3273 negate = self._match(TokenType.NOT) 3274 3275 if self._match_text_seq("DISTINCT", "FROM"): 3276 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3277 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3278 3279 expression = self._parse_null() or self._parse_boolean() 3280 if not expression: 3281 self._retreat(index) 3282 return None 3283 3284 this = self.expression(exp.Is, this=this, expression=expression) 3285 return self.expression(exp.Not, this=this) if negate else this 3286 3287 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3288 unnest = self._parse_unnest(with_alias=False) 3289 if unnest: 3290 this = self.expression(exp.In, this=this, unnest=unnest) 3291 elif self._match(TokenType.L_PAREN): 3292 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3293 3294 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3295 this = self.expression(exp.In, this=this, query=expressions[0]) 3296 else: 3297 this = self.expression(exp.In, this=this, expressions=expressions) 3298 3299 self._match_r_paren(this) 3300 else: 3301 this = self.expression(exp.In, this=this, field=self._parse_field()) 3302 3303 return this 3304 3305 def _parse_between(self, this: exp.Expression) -> exp.Between: 3306 low = self._parse_bitwise() 3307 self._match(TokenType.AND) 3308 high = self._parse_bitwise() 3309 return self.expression(exp.Between, this=this, low=low, high=high) 3310 3311 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3312 if not self._match(TokenType.ESCAPE): 3313 return this 3314 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3315 3316 def _parse_interval(self) -> t.Optional[exp.Interval]: 3317 index = self._index 3318 3319 if not self._match(TokenType.INTERVAL): 3320 return None 3321 3322 if self._match(TokenType.STRING, advance=False): 3323 this = self._parse_primary() 3324 else: 3325 this = self._parse_term() 3326 3327 if not this: 3328 self._retreat(index) 3329 return None 3330 3331 unit = self._parse_function() or self._parse_var(any_token=True) 3332 3333 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3334 # each INTERVAL expression into this canonical form so it's easy to transpile 3335 if this and this.is_number: 3336 this = exp.Literal.string(this.name) 3337 elif this and this.is_string: 3338 parts = this.name.split() 3339 3340 if len(parts) == 2: 3341 if unit: 3342 # This is not actually a unit, it's something else (e.g. a "window side") 3343 unit = None 3344 self._retreat(self._index - 1) 3345 3346 this = exp.Literal.string(parts[0]) 3347 unit = self.expression(exp.Var, this=parts[1]) 3348 3349 return self.expression(exp.Interval, this=this, unit=unit) 3350 3351 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3352 this = self._parse_term() 3353 3354 while True: 3355 if self._match_set(self.BITWISE): 3356 this = self.expression( 3357 self.BITWISE[self._prev.token_type], 3358 this=this, 3359 expression=self._parse_term(), 3360 ) 3361 elif self._match(TokenType.DQMARK): 3362 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3363 elif self._match_pair(TokenType.LT, TokenType.LT): 3364 this = self.expression( 3365 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3366 ) 3367 elif self._match_pair(TokenType.GT, TokenType.GT): 3368 this = self.expression( 3369 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3370 ) 3371 else: 3372 break 3373 3374 return this 3375 3376 def _parse_term(self) -> t.Optional[exp.Expression]: 3377 return self._parse_tokens(self._parse_factor, self.TERM) 3378 3379 def _parse_factor(self) -> t.Optional[exp.Expression]: 3380 return self._parse_tokens(self._parse_unary, self.FACTOR) 3381 3382 def _parse_unary(self) -> t.Optional[exp.Expression]: 3383 if self._match_set(self.UNARY_PARSERS): 3384 return self.UNARY_PARSERS[self._prev.token_type](self) 3385 return self._parse_at_time_zone(self._parse_type()) 3386 3387 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3388 interval = parse_interval and self._parse_interval() 3389 if interval: 3390 return interval 3391 3392 index = self._index 3393 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3394 this = self._parse_column() 3395 3396 if data_type: 3397 if isinstance(this, exp.Literal): 3398 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3399 if parser: 3400 return parser(self, this, data_type) 3401 return self.expression(exp.Cast, this=this, to=data_type) 3402 if not data_type.expressions: 3403 self._retreat(index) 3404 return self._parse_column() 3405 return self._parse_column_ops(data_type) 3406 3407 return this and self._parse_column_ops(this) 3408 3409 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3410 this = self._parse_type() 3411 if not this: 3412 return None 3413 3414 return self.expression( 3415 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3416 ) 3417 3418 def _parse_types( 3419 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3420 ) -> t.Optional[exp.Expression]: 3421 index = self._index 3422 3423 prefix = self._match_text_seq("SYSUDTLIB", ".") 3424 3425 if not self._match_set(self.TYPE_TOKENS): 3426 identifier = allow_identifiers and self._parse_id_var( 3427 any_token=False, tokens=(TokenType.VAR,) 3428 ) 3429 3430 if identifier: 3431 tokens = self._tokenizer.tokenize(identifier.name) 3432 3433 if len(tokens) != 1: 3434 self.raise_error("Unexpected identifier", self._prev) 3435 3436 if tokens[0].token_type in self.TYPE_TOKENS: 3437 self._prev = tokens[0] 3438 elif self.SUPPORTS_USER_DEFINED_TYPES: 3439 type_name = identifier.name 3440 3441 while self._match(TokenType.DOT): 3442 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3443 3444 return exp.DataType.build(type_name, udt=True) 3445 else: 3446 return None 3447 else: 3448 return None 3449 3450 type_token = self._prev.token_type 3451 3452 if type_token == TokenType.PSEUDO_TYPE: 3453 return self.expression(exp.PseudoType, this=self._prev.text) 3454 3455 if type_token == TokenType.OBJECT_IDENTIFIER: 3456 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3457 3458 nested = type_token in self.NESTED_TYPE_TOKENS 3459 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3460 expressions = None 3461 maybe_func = False 3462 3463 if self._match(TokenType.L_PAREN): 3464 if is_struct: 3465 expressions = self._parse_csv(self._parse_struct_types) 3466 elif nested: 3467 expressions = self._parse_csv( 3468 lambda: self._parse_types( 3469 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3470 ) 3471 ) 3472 elif type_token in self.ENUM_TYPE_TOKENS: 3473 expressions = self._parse_csv(self._parse_equality) 3474 else: 3475 expressions = self._parse_csv(self._parse_type_size) 3476 3477 if not expressions or not self._match(TokenType.R_PAREN): 3478 self._retreat(index) 3479 return None 3480 3481 maybe_func = True 3482 3483 this: t.Optional[exp.Expression] = None 3484 values: t.Optional[t.List[exp.Expression]] = None 3485 3486 if nested and self._match(TokenType.LT): 3487 if is_struct: 3488 expressions = self._parse_csv(self._parse_struct_types) 3489 else: 3490 expressions = self._parse_csv( 3491 lambda: self._parse_types( 3492 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3493 ) 3494 ) 3495 3496 if not self._match(TokenType.GT): 3497 self.raise_error("Expecting >") 3498 3499 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3500 values = self._parse_csv(self._parse_conjunction) 3501 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3502 3503 if type_token in self.TIMESTAMPS: 3504 if self._match_text_seq("WITH", "TIME", "ZONE"): 3505 maybe_func = False 3506 tz_type = ( 3507 exp.DataType.Type.TIMETZ 3508 if type_token in self.TIMES 3509 else exp.DataType.Type.TIMESTAMPTZ 3510 ) 3511 this = exp.DataType(this=tz_type, expressions=expressions) 3512 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3513 maybe_func = False 3514 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3515 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3516 maybe_func = False 3517 elif type_token == TokenType.INTERVAL: 3518 unit = self._parse_var() 3519 3520 if self._match_text_seq("TO"): 3521 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3522 else: 3523 span = None 3524 3525 if span or not unit: 3526 this = self.expression( 3527 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3528 ) 3529 else: 3530 this = self.expression(exp.Interval, unit=unit) 3531 3532 if maybe_func and check_func: 3533 index2 = self._index 3534 peek = self._parse_string() 3535 3536 if not peek: 3537 self._retreat(index) 3538 return None 3539 3540 self._retreat(index2) 3541 3542 if not this: 3543 if self._match_text_seq("UNSIGNED"): 3544 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3545 if not unsigned_type_token: 3546 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3547 3548 type_token = unsigned_type_token or type_token 3549 3550 this = exp.DataType( 3551 this=exp.DataType.Type[type_token.value], 3552 expressions=expressions, 3553 nested=nested, 3554 values=values, 3555 prefix=prefix, 3556 ) 3557 3558 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3559 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3560 3561 return this 3562 3563 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3564 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3565 self._match(TokenType.COLON) 3566 return self._parse_column_def(this) 3567 3568 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3569 if not self._match_text_seq("AT", "TIME", "ZONE"): 3570 return this 3571 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3572 3573 def _parse_column(self) -> t.Optional[exp.Expression]: 3574 this = self._parse_field() 3575 if isinstance(this, exp.Identifier): 3576 this = self.expression(exp.Column, this=this) 3577 elif not this: 3578 return self._parse_bracket(this) 3579 return self._parse_column_ops(this) 3580 3581 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3582 this = self._parse_bracket(this) 3583 3584 while self._match_set(self.COLUMN_OPERATORS): 3585 op_token = self._prev.token_type 3586 op = self.COLUMN_OPERATORS.get(op_token) 3587 3588 if op_token == TokenType.DCOLON: 3589 field = self._parse_types() 3590 if not field: 3591 self.raise_error("Expected type") 3592 elif op and self._curr: 3593 self._advance() 3594 value = self._prev.text 3595 field = ( 3596 exp.Literal.number(value) 3597 if self._prev.token_type == TokenType.NUMBER 3598 else exp.Literal.string(value) 3599 ) 3600 else: 3601 field = self._parse_field(anonymous_func=True, any_token=True) 3602 3603 if isinstance(field, exp.Func): 3604 # bigquery allows function calls like x.y.count(...) 3605 # SAFE.SUBSTR(...) 3606 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3607 this = self._replace_columns_with_dots(this) 3608 3609 if op: 3610 this = op(self, this, field) 3611 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3612 this = self.expression( 3613 exp.Column, 3614 this=field, 3615 table=this.this, 3616 db=this.args.get("table"), 3617 catalog=this.args.get("db"), 3618 ) 3619 else: 3620 this = self.expression(exp.Dot, this=this, expression=field) 3621 this = self._parse_bracket(this) 3622 return this 3623 3624 def _parse_primary(self) -> t.Optional[exp.Expression]: 3625 if self._match_set(self.PRIMARY_PARSERS): 3626 token_type = self._prev.token_type 3627 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3628 3629 if token_type == TokenType.STRING: 3630 expressions = [primary] 3631 while self._match(TokenType.STRING): 3632 expressions.append(exp.Literal.string(self._prev.text)) 3633 3634 if len(expressions) > 1: 3635 return self.expression(exp.Concat, expressions=expressions) 3636 3637 return primary 3638 3639 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3640 return exp.Literal.number(f"0.{self._prev.text}") 3641 3642 if self._match(TokenType.L_PAREN): 3643 comments = self._prev_comments 3644 query = self._parse_select() 3645 3646 if query: 3647 expressions = [query] 3648 else: 3649 expressions = self._parse_expressions() 3650 3651 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3652 3653 if isinstance(this, exp.Subqueryable): 3654 this = self._parse_set_operations( 3655 self._parse_subquery(this=this, parse_alias=False) 3656 ) 3657 elif len(expressions) > 1: 3658 this = self.expression(exp.Tuple, expressions=expressions) 3659 else: 3660 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3661 3662 if this: 3663 this.add_comments(comments) 3664 3665 self._match_r_paren(expression=this) 3666 return this 3667 3668 return None 3669 3670 def _parse_field( 3671 self, 3672 any_token: bool = False, 3673 tokens: t.Optional[t.Collection[TokenType]] = None, 3674 anonymous_func: bool = False, 3675 ) -> t.Optional[exp.Expression]: 3676 return ( 3677 self._parse_primary() 3678 or self._parse_function(anonymous=anonymous_func) 3679 or self._parse_id_var(any_token=any_token, tokens=tokens) 3680 ) 3681 3682 def _parse_function( 3683 self, 3684 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3685 anonymous: bool = False, 3686 optional_parens: bool = True, 3687 ) -> t.Optional[exp.Expression]: 3688 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3689 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3690 fn_syntax = False 3691 if ( 3692 self._match(TokenType.L_BRACE, advance=False) 3693 and self._next 3694 and self._next.text.upper() == "FN" 3695 ): 3696 self._advance(2) 3697 fn_syntax = True 3698 3699 func = self._parse_function_call( 3700 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3701 ) 3702 3703 if fn_syntax: 3704 self._match(TokenType.R_BRACE) 3705 3706 return func 3707 3708 def _parse_function_call( 3709 self, 3710 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3711 anonymous: bool = False, 3712 optional_parens: bool = True, 3713 ) -> t.Optional[exp.Expression]: 3714 if not self._curr: 3715 return None 3716 3717 token_type = self._curr.token_type 3718 this = self._curr.text 3719 upper = this.upper() 3720 3721 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3722 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3723 self._advance() 3724 return parser(self) 3725 3726 if not self._next or self._next.token_type != TokenType.L_PAREN: 3727 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3728 self._advance() 3729 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3730 3731 return None 3732 3733 if token_type not in self.FUNC_TOKENS: 3734 return None 3735 3736 self._advance(2) 3737 3738 parser = self.FUNCTION_PARSERS.get(upper) 3739 if parser and not anonymous: 3740 this = parser(self) 3741 else: 3742 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3743 3744 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3745 this = self.expression(subquery_predicate, this=self._parse_select()) 3746 self._match_r_paren() 3747 return this 3748 3749 if functions is None: 3750 functions = self.FUNCTIONS 3751 3752 function = functions.get(upper) 3753 3754 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3755 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3756 3757 if function and not anonymous: 3758 func = self.validate_expression(function(args), args) 3759 if not self.NORMALIZE_FUNCTIONS: 3760 func.meta["name"] = this 3761 this = func 3762 else: 3763 this = self.expression(exp.Anonymous, this=this, expressions=args) 3764 3765 self._match_r_paren(this) 3766 return self._parse_window(this) 3767 3768 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3769 return self._parse_column_def(self._parse_id_var()) 3770 3771 def _parse_user_defined_function( 3772 self, kind: t.Optional[TokenType] = None 3773 ) -> t.Optional[exp.Expression]: 3774 this = self._parse_id_var() 3775 3776 while self._match(TokenType.DOT): 3777 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3778 3779 if not self._match(TokenType.L_PAREN): 3780 return this 3781 3782 expressions = self._parse_csv(self._parse_function_parameter) 3783 self._match_r_paren() 3784 return self.expression( 3785 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3786 ) 3787 3788 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3789 literal = self._parse_primary() 3790 if literal: 3791 return self.expression(exp.Introducer, this=token.text, expression=literal) 3792 3793 return self.expression(exp.Identifier, this=token.text) 3794 3795 def _parse_session_parameter(self) -> exp.SessionParameter: 3796 kind = None 3797 this = self._parse_id_var() or self._parse_primary() 3798 3799 if this and self._match(TokenType.DOT): 3800 kind = this.name 3801 this = self._parse_var() or self._parse_primary() 3802 3803 return self.expression(exp.SessionParameter, this=this, kind=kind) 3804 3805 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3806 index = self._index 3807 3808 if self._match(TokenType.L_PAREN): 3809 expressions = t.cast( 3810 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3811 ) 3812 3813 if not self._match(TokenType.R_PAREN): 3814 self._retreat(index) 3815 else: 3816 expressions = [self._parse_id_var()] 3817 3818 if self._match_set(self.LAMBDAS): 3819 return self.LAMBDAS[self._prev.token_type](self, expressions) 3820 3821 self._retreat(index) 3822 3823 this: t.Optional[exp.Expression] 3824 3825 if self._match(TokenType.DISTINCT): 3826 this = self.expression( 3827 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3828 ) 3829 else: 3830 this = self._parse_select_or_expression(alias=alias) 3831 3832 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3833 3834 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3835 index = self._index 3836 3837 if not self.errors: 3838 try: 3839 if self._parse_select(nested=True): 3840 return this 3841 except ParseError: 3842 pass 3843 finally: 3844 self.errors.clear() 3845 self._retreat(index) 3846 3847 if not self._match(TokenType.L_PAREN): 3848 return this 3849 3850 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3851 3852 self._match_r_paren() 3853 return self.expression(exp.Schema, this=this, expressions=args) 3854 3855 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3856 return self._parse_column_def(self._parse_field(any_token=True)) 3857 3858 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3859 # column defs are not really columns, they're identifiers 3860 if isinstance(this, exp.Column): 3861 this = this.this 3862 3863 kind = self._parse_types(schema=True) 3864 3865 if self._match_text_seq("FOR", "ORDINALITY"): 3866 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3867 3868 constraints: t.List[exp.Expression] = [] 3869 3870 if not kind and self._match(TokenType.ALIAS): 3871 constraints.append( 3872 self.expression( 3873 exp.ComputedColumnConstraint, 3874 this=self._parse_conjunction(), 3875 persisted=self._match_text_seq("PERSISTED"), 3876 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3877 ) 3878 ) 3879 3880 while True: 3881 constraint = self._parse_column_constraint() 3882 if not constraint: 3883 break 3884 constraints.append(constraint) 3885 3886 if not kind and not constraints: 3887 return this 3888 3889 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3890 3891 def _parse_auto_increment( 3892 self, 3893 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3894 start = None 3895 increment = None 3896 3897 if self._match(TokenType.L_PAREN, advance=False): 3898 args = self._parse_wrapped_csv(self._parse_bitwise) 3899 start = seq_get(args, 0) 3900 increment = seq_get(args, 1) 3901 elif self._match_text_seq("START"): 3902 start = self._parse_bitwise() 3903 self._match_text_seq("INCREMENT") 3904 increment = self._parse_bitwise() 3905 3906 if start and increment: 3907 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3908 3909 return exp.AutoIncrementColumnConstraint() 3910 3911 def _parse_compress(self) -> exp.CompressColumnConstraint: 3912 if self._match(TokenType.L_PAREN, advance=False): 3913 return self.expression( 3914 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3915 ) 3916 3917 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3918 3919 def _parse_generated_as_identity( 3920 self, 3921 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3922 if self._match_text_seq("BY", "DEFAULT"): 3923 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3924 this = self.expression( 3925 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3926 ) 3927 else: 3928 self._match_text_seq("ALWAYS") 3929 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3930 3931 self._match(TokenType.ALIAS) 3932 identity = self._match_text_seq("IDENTITY") 3933 3934 if self._match(TokenType.L_PAREN): 3935 if self._match(TokenType.START_WITH): 3936 this.set("start", self._parse_bitwise()) 3937 if self._match_text_seq("INCREMENT", "BY"): 3938 this.set("increment", self._parse_bitwise()) 3939 if self._match_text_seq("MINVALUE"): 3940 this.set("minvalue", self._parse_bitwise()) 3941 if self._match_text_seq("MAXVALUE"): 3942 this.set("maxvalue", self._parse_bitwise()) 3943 3944 if self._match_text_seq("CYCLE"): 3945 this.set("cycle", True) 3946 elif self._match_text_seq("NO", "CYCLE"): 3947 this.set("cycle", False) 3948 3949 if not identity: 3950 this.set("expression", self._parse_bitwise()) 3951 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 3952 args = self._parse_csv(self._parse_bitwise) 3953 this.set("start", seq_get(args, 0)) 3954 this.set("increment", seq_get(args, 1)) 3955 3956 self._match_r_paren() 3957 3958 return this 3959 3960 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3961 self._match_text_seq("LENGTH") 3962 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3963 3964 def _parse_not_constraint( 3965 self, 3966 ) -> t.Optional[exp.Expression]: 3967 if self._match_text_seq("NULL"): 3968 return self.expression(exp.NotNullColumnConstraint) 3969 if self._match_text_seq("CASESPECIFIC"): 3970 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3971 if self._match_text_seq("FOR", "REPLICATION"): 3972 return self.expression(exp.NotForReplicationColumnConstraint) 3973 return None 3974 3975 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3976 if self._match(TokenType.CONSTRAINT): 3977 this = self._parse_id_var() 3978 else: 3979 this = None 3980 3981 if self._match_texts(self.CONSTRAINT_PARSERS): 3982 return self.expression( 3983 exp.ColumnConstraint, 3984 this=this, 3985 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3986 ) 3987 3988 return this 3989 3990 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3991 if not self._match(TokenType.CONSTRAINT): 3992 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3993 3994 this = self._parse_id_var() 3995 expressions = [] 3996 3997 while True: 3998 constraint = self._parse_unnamed_constraint() or self._parse_function() 3999 if not constraint: 4000 break 4001 expressions.append(constraint) 4002 4003 return self.expression(exp.Constraint, this=this, expressions=expressions) 4004 4005 def _parse_unnamed_constraint( 4006 self, constraints: t.Optional[t.Collection[str]] = None 4007 ) -> t.Optional[exp.Expression]: 4008 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4009 constraints or self.CONSTRAINT_PARSERS 4010 ): 4011 return None 4012 4013 constraint = self._prev.text.upper() 4014 if constraint not in self.CONSTRAINT_PARSERS: 4015 self.raise_error(f"No parser found for schema constraint {constraint}.") 4016 4017 return self.CONSTRAINT_PARSERS[constraint](self) 4018 4019 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4020 self._match_text_seq("KEY") 4021 return self.expression( 4022 exp.UniqueColumnConstraint, 4023 this=self._parse_schema(self._parse_id_var(any_token=False)), 4024 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4025 ) 4026 4027 def _parse_key_constraint_options(self) -> t.List[str]: 4028 options = [] 4029 while True: 4030 if not self._curr: 4031 break 4032 4033 if self._match(TokenType.ON): 4034 action = None 4035 on = self._advance_any() and self._prev.text 4036 4037 if self._match_text_seq("NO", "ACTION"): 4038 action = "NO ACTION" 4039 elif self._match_text_seq("CASCADE"): 4040 action = "CASCADE" 4041 elif self._match_text_seq("RESTRICT"): 4042 action = "RESTRICT" 4043 elif self._match_pair(TokenType.SET, TokenType.NULL): 4044 action = "SET NULL" 4045 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4046 action = "SET DEFAULT" 4047 else: 4048 self.raise_error("Invalid key constraint") 4049 4050 options.append(f"ON {on} {action}") 4051 elif self._match_text_seq("NOT", "ENFORCED"): 4052 options.append("NOT ENFORCED") 4053 elif self._match_text_seq("DEFERRABLE"): 4054 options.append("DEFERRABLE") 4055 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4056 options.append("INITIALLY DEFERRED") 4057 elif self._match_text_seq("NORELY"): 4058 options.append("NORELY") 4059 elif self._match_text_seq("MATCH", "FULL"): 4060 options.append("MATCH FULL") 4061 else: 4062 break 4063 4064 return options 4065 4066 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4067 if match and not self._match(TokenType.REFERENCES): 4068 return None 4069 4070 expressions = None 4071 this = self._parse_table(schema=True) 4072 options = self._parse_key_constraint_options() 4073 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4074 4075 def _parse_foreign_key(self) -> exp.ForeignKey: 4076 expressions = self._parse_wrapped_id_vars() 4077 reference = self._parse_references() 4078 options = {} 4079 4080 while self._match(TokenType.ON): 4081 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4082 self.raise_error("Expected DELETE or UPDATE") 4083 4084 kind = self._prev.text.lower() 4085 4086 if self._match_text_seq("NO", "ACTION"): 4087 action = "NO ACTION" 4088 elif self._match(TokenType.SET): 4089 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4090 action = "SET " + self._prev.text.upper() 4091 else: 4092 self._advance() 4093 action = self._prev.text.upper() 4094 4095 options[kind] = action 4096 4097 return self.expression( 4098 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4099 ) 4100 4101 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4102 return self._parse_field() 4103 4104 def _parse_primary_key( 4105 self, wrapped_optional: bool = False, in_props: bool = False 4106 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4107 desc = ( 4108 self._match_set((TokenType.ASC, TokenType.DESC)) 4109 and self._prev.token_type == TokenType.DESC 4110 ) 4111 4112 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4113 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4114 4115 expressions = self._parse_wrapped_csv( 4116 self._parse_primary_key_part, optional=wrapped_optional 4117 ) 4118 options = self._parse_key_constraint_options() 4119 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4120 4121 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4122 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4123 return this 4124 4125 bracket_kind = self._prev.token_type 4126 4127 if self._match(TokenType.COLON): 4128 expressions: t.List[exp.Expression] = [ 4129 self.expression(exp.Slice, expression=self._parse_conjunction()) 4130 ] 4131 else: 4132 expressions = self._parse_csv( 4133 lambda: self._parse_slice( 4134 self._parse_alias(self._parse_conjunction(), explicit=True) 4135 ) 4136 ) 4137 4138 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4139 self.raise_error("Expected ]") 4140 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4141 self.raise_error("Expected }") 4142 4143 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4144 if bracket_kind == TokenType.L_BRACE: 4145 this = self.expression(exp.Struct, expressions=expressions) 4146 elif not this or this.name.upper() == "ARRAY": 4147 this = self.expression(exp.Array, expressions=expressions) 4148 else: 4149 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4150 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4151 4152 self._add_comments(this) 4153 return self._parse_bracket(this) 4154 4155 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4156 if self._match(TokenType.COLON): 4157 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4158 return this 4159 4160 def _parse_case(self) -> t.Optional[exp.Expression]: 4161 ifs = [] 4162 default = None 4163 4164 comments = self._prev_comments 4165 expression = self._parse_conjunction() 4166 4167 while self._match(TokenType.WHEN): 4168 this = self._parse_conjunction() 4169 self._match(TokenType.THEN) 4170 then = self._parse_conjunction() 4171 ifs.append(self.expression(exp.If, this=this, true=then)) 4172 4173 if self._match(TokenType.ELSE): 4174 default = self._parse_conjunction() 4175 4176 if not self._match(TokenType.END): 4177 self.raise_error("Expected END after CASE", self._prev) 4178 4179 return self._parse_window( 4180 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4181 ) 4182 4183 def _parse_if(self) -> t.Optional[exp.Expression]: 4184 if self._match(TokenType.L_PAREN): 4185 args = self._parse_csv(self._parse_conjunction) 4186 this = self.validate_expression(exp.If.from_arg_list(args), args) 4187 self._match_r_paren() 4188 else: 4189 index = self._index - 1 4190 condition = self._parse_conjunction() 4191 4192 if not condition: 4193 self._retreat(index) 4194 return None 4195 4196 self._match(TokenType.THEN) 4197 true = self._parse_conjunction() 4198 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4199 self._match(TokenType.END) 4200 this = self.expression(exp.If, this=condition, true=true, false=false) 4201 4202 return self._parse_window(this) 4203 4204 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4205 if not self._match_text_seq("VALUE", "FOR"): 4206 self._retreat(self._index - 1) 4207 return None 4208 4209 return self.expression( 4210 exp.NextValueFor, 4211 this=self._parse_column(), 4212 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4213 ) 4214 4215 def _parse_extract(self) -> exp.Extract: 4216 this = self._parse_function() or self._parse_var() or self._parse_type() 4217 4218 if self._match(TokenType.FROM): 4219 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4220 4221 if not self._match(TokenType.COMMA): 4222 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4223 4224 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4225 4226 def _parse_any_value(self) -> exp.AnyValue: 4227 this = self._parse_lambda() 4228 is_max = None 4229 having = None 4230 4231 if self._match(TokenType.HAVING): 4232 self._match_texts(("MAX", "MIN")) 4233 is_max = self._prev.text == "MAX" 4234 having = self._parse_column() 4235 4236 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4237 4238 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4239 this = self._parse_conjunction() 4240 4241 if not self._match(TokenType.ALIAS): 4242 if self._match(TokenType.COMMA): 4243 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4244 4245 self.raise_error("Expected AS after CAST") 4246 4247 fmt = None 4248 to = self._parse_types() 4249 4250 if self._match(TokenType.FORMAT): 4251 fmt_string = self._parse_string() 4252 fmt = self._parse_at_time_zone(fmt_string) 4253 4254 if not to: 4255 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4256 if to.this in exp.DataType.TEMPORAL_TYPES: 4257 this = self.expression( 4258 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4259 this=this, 4260 format=exp.Literal.string( 4261 format_time( 4262 fmt_string.this if fmt_string else "", 4263 self.FORMAT_MAPPING or self.TIME_MAPPING, 4264 self.FORMAT_TRIE or self.TIME_TRIE, 4265 ) 4266 ), 4267 ) 4268 4269 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4270 this.set("zone", fmt.args["zone"]) 4271 return this 4272 elif not to: 4273 self.raise_error("Expected TYPE after CAST") 4274 elif isinstance(to, exp.Identifier): 4275 to = exp.DataType.build(to.name, udt=True) 4276 elif to.this == exp.DataType.Type.CHAR: 4277 if self._match(TokenType.CHARACTER_SET): 4278 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4279 4280 return self.expression( 4281 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4282 ) 4283 4284 def _parse_concat(self) -> t.Optional[exp.Expression]: 4285 args = self._parse_csv(self._parse_conjunction) 4286 if self.CONCAT_NULL_OUTPUTS_STRING: 4287 args = self._ensure_string_if_null(args) 4288 4289 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4290 # we find such a call we replace it with its argument. 4291 if len(args) == 1: 4292 return args[0] 4293 4294 return self.expression( 4295 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4296 ) 4297 4298 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4299 args = self._parse_csv(self._parse_conjunction) 4300 if len(args) < 2: 4301 return self.expression(exp.ConcatWs, expressions=args) 4302 delim, *values = args 4303 if self.CONCAT_NULL_OUTPUTS_STRING: 4304 values = self._ensure_string_if_null(values) 4305 4306 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4307 4308 def _parse_string_agg(self) -> exp.Expression: 4309 if self._match(TokenType.DISTINCT): 4310 args: t.List[t.Optional[exp.Expression]] = [ 4311 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4312 ] 4313 if self._match(TokenType.COMMA): 4314 args.extend(self._parse_csv(self._parse_conjunction)) 4315 else: 4316 args = self._parse_csv(self._parse_conjunction) # type: ignore 4317 4318 index = self._index 4319 if not self._match(TokenType.R_PAREN) and args: 4320 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4321 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4322 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4323 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4324 4325 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4326 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4327 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4328 if not self._match_text_seq("WITHIN", "GROUP"): 4329 self._retreat(index) 4330 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4331 4332 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4333 order = self._parse_order(this=seq_get(args, 0)) 4334 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4335 4336 def _parse_convert( 4337 self, strict: bool, safe: t.Optional[bool] = None 4338 ) -> t.Optional[exp.Expression]: 4339 this = self._parse_bitwise() 4340 4341 if self._match(TokenType.USING): 4342 to: t.Optional[exp.Expression] = self.expression( 4343 exp.CharacterSet, this=self._parse_var() 4344 ) 4345 elif self._match(TokenType.COMMA): 4346 to = self._parse_types() 4347 else: 4348 to = None 4349 4350 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4351 4352 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4353 """ 4354 There are generally two variants of the DECODE function: 4355 4356 - DECODE(bin, charset) 4357 - DECODE(expression, search, result [, search, result] ... [, default]) 4358 4359 The second variant will always be parsed into a CASE expression. Note that NULL 4360 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4361 instead of relying on pattern matching. 4362 """ 4363 args = self._parse_csv(self._parse_conjunction) 4364 4365 if len(args) < 3: 4366 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4367 4368 expression, *expressions = args 4369 if not expression: 4370 return None 4371 4372 ifs = [] 4373 for search, result in zip(expressions[::2], expressions[1::2]): 4374 if not search or not result: 4375 return None 4376 4377 if isinstance(search, exp.Literal): 4378 ifs.append( 4379 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4380 ) 4381 elif isinstance(search, exp.Null): 4382 ifs.append( 4383 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4384 ) 4385 else: 4386 cond = exp.or_( 4387 exp.EQ(this=expression.copy(), expression=search), 4388 exp.and_( 4389 exp.Is(this=expression.copy(), expression=exp.Null()), 4390 exp.Is(this=search.copy(), expression=exp.Null()), 4391 copy=False, 4392 ), 4393 copy=False, 4394 ) 4395 ifs.append(exp.If(this=cond, true=result)) 4396 4397 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4398 4399 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4400 self._match_text_seq("KEY") 4401 key = self._parse_column() 4402 self._match_set((TokenType.COLON, TokenType.COMMA)) 4403 self._match_text_seq("VALUE") 4404 value = self._parse_bitwise() 4405 4406 if not key and not value: 4407 return None 4408 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4409 4410 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4411 if not this or not self._match_text_seq("FORMAT", "JSON"): 4412 return this 4413 4414 return self.expression(exp.FormatJson, this=this) 4415 4416 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4417 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4418 for value in values: 4419 if self._match_text_seq(value, "ON", on): 4420 return f"{value} ON {on}" 4421 4422 return None 4423 4424 def _parse_json_object(self) -> exp.JSONObject: 4425 star = self._parse_star() 4426 expressions = ( 4427 [star] 4428 if star 4429 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4430 ) 4431 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4432 4433 unique_keys = None 4434 if self._match_text_seq("WITH", "UNIQUE"): 4435 unique_keys = True 4436 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4437 unique_keys = False 4438 4439 self._match_text_seq("KEYS") 4440 4441 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4442 self._parse_type() 4443 ) 4444 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4445 4446 return self.expression( 4447 exp.JSONObject, 4448 expressions=expressions, 4449 null_handling=null_handling, 4450 unique_keys=unique_keys, 4451 return_type=return_type, 4452 encoding=encoding, 4453 ) 4454 4455 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4456 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4457 if not self._match_text_seq("NESTED"): 4458 this = self._parse_id_var() 4459 kind = self._parse_types(allow_identifiers=False) 4460 nested = None 4461 else: 4462 this = None 4463 kind = None 4464 nested = True 4465 4466 path = self._match_text_seq("PATH") and self._parse_string() 4467 nested_schema = nested and self._parse_json_schema() 4468 4469 return self.expression( 4470 exp.JSONColumnDef, 4471 this=this, 4472 kind=kind, 4473 path=path, 4474 nested_schema=nested_schema, 4475 ) 4476 4477 def _parse_json_schema(self) -> exp.JSONSchema: 4478 self._match_text_seq("COLUMNS") 4479 return self.expression( 4480 exp.JSONSchema, 4481 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4482 ) 4483 4484 def _parse_json_table(self) -> exp.JSONTable: 4485 this = self._parse_format_json(self._parse_bitwise()) 4486 path = self._match(TokenType.COMMA) and self._parse_string() 4487 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4488 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4489 schema = self._parse_json_schema() 4490 4491 return exp.JSONTable( 4492 this=this, 4493 schema=schema, 4494 path=path, 4495 error_handling=error_handling, 4496 empty_handling=empty_handling, 4497 ) 4498 4499 def _parse_logarithm(self) -> exp.Func: 4500 # Default argument order is base, expression 4501 args = self._parse_csv(self._parse_range) 4502 4503 if len(args) > 1: 4504 if not self.LOG_BASE_FIRST: 4505 args.reverse() 4506 return exp.Log.from_arg_list(args) 4507 4508 return self.expression( 4509 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4510 ) 4511 4512 def _parse_match_against(self) -> exp.MatchAgainst: 4513 expressions = self._parse_csv(self._parse_column) 4514 4515 self._match_text_seq(")", "AGAINST", "(") 4516 4517 this = self._parse_string() 4518 4519 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4520 modifier = "IN NATURAL LANGUAGE MODE" 4521 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4522 modifier = f"{modifier} WITH QUERY EXPANSION" 4523 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4524 modifier = "IN BOOLEAN MODE" 4525 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4526 modifier = "WITH QUERY EXPANSION" 4527 else: 4528 modifier = None 4529 4530 return self.expression( 4531 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4532 ) 4533 4534 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4535 def _parse_open_json(self) -> exp.OpenJSON: 4536 this = self._parse_bitwise() 4537 path = self._match(TokenType.COMMA) and self._parse_string() 4538 4539 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4540 this = self._parse_field(any_token=True) 4541 kind = self._parse_types() 4542 path = self._parse_string() 4543 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4544 4545 return self.expression( 4546 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4547 ) 4548 4549 expressions = None 4550 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4551 self._match_l_paren() 4552 expressions = self._parse_csv(_parse_open_json_column_def) 4553 4554 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4555 4556 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4557 args = self._parse_csv(self._parse_bitwise) 4558 4559 if self._match(TokenType.IN): 4560 return self.expression( 4561 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4562 ) 4563 4564 if haystack_first: 4565 haystack = seq_get(args, 0) 4566 needle = seq_get(args, 1) 4567 else: 4568 needle = seq_get(args, 0) 4569 haystack = seq_get(args, 1) 4570 4571 return self.expression( 4572 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4573 ) 4574 4575 def _parse_predict(self) -> exp.Predict: 4576 self._match_text_seq("MODEL") 4577 this = self._parse_table() 4578 4579 self._match(TokenType.COMMA) 4580 self._match_text_seq("TABLE") 4581 4582 return self.expression( 4583 exp.Predict, 4584 this=this, 4585 expression=self._parse_table(), 4586 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4587 ) 4588 4589 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4590 args = self._parse_csv(self._parse_table) 4591 return exp.JoinHint(this=func_name.upper(), expressions=args) 4592 4593 def _parse_substring(self) -> exp.Substring: 4594 # Postgres supports the form: substring(string [from int] [for int]) 4595 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4596 4597 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4598 4599 if self._match(TokenType.FROM): 4600 args.append(self._parse_bitwise()) 4601 if self._match(TokenType.FOR): 4602 args.append(self._parse_bitwise()) 4603 4604 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4605 4606 def _parse_trim(self) -> exp.Trim: 4607 # https://www.w3resource.com/sql/character-functions/trim.php 4608 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4609 4610 position = None 4611 collation = None 4612 expression = None 4613 4614 if self._match_texts(self.TRIM_TYPES): 4615 position = self._prev.text.upper() 4616 4617 this = self._parse_bitwise() 4618 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4619 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4620 expression = self._parse_bitwise() 4621 4622 if invert_order: 4623 this, expression = expression, this 4624 4625 if self._match(TokenType.COLLATE): 4626 collation = self._parse_bitwise() 4627 4628 return self.expression( 4629 exp.Trim, this=this, position=position, expression=expression, collation=collation 4630 ) 4631 4632 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4633 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4634 4635 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4636 return self._parse_window(self._parse_id_var(), alias=True) 4637 4638 def _parse_respect_or_ignore_nulls( 4639 self, this: t.Optional[exp.Expression] 4640 ) -> t.Optional[exp.Expression]: 4641 if self._match_text_seq("IGNORE", "NULLS"): 4642 return self.expression(exp.IgnoreNulls, this=this) 4643 if self._match_text_seq("RESPECT", "NULLS"): 4644 return self.expression(exp.RespectNulls, this=this) 4645 return this 4646 4647 def _parse_window( 4648 self, this: t.Optional[exp.Expression], alias: bool = False 4649 ) -> t.Optional[exp.Expression]: 4650 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4651 self._match(TokenType.WHERE) 4652 this = self.expression( 4653 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4654 ) 4655 self._match_r_paren() 4656 4657 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4658 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4659 if self._match_text_seq("WITHIN", "GROUP"): 4660 order = self._parse_wrapped(self._parse_order) 4661 this = self.expression(exp.WithinGroup, this=this, expression=order) 4662 4663 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4664 # Some dialects choose to implement and some do not. 4665 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4666 4667 # There is some code above in _parse_lambda that handles 4668 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4669 4670 # The below changes handle 4671 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4672 4673 # Oracle allows both formats 4674 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4675 # and Snowflake chose to do the same for familiarity 4676 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4677 this = self._parse_respect_or_ignore_nulls(this) 4678 4679 # bigquery select from window x AS (partition by ...) 4680 if alias: 4681 over = None 4682 self._match(TokenType.ALIAS) 4683 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4684 return this 4685 else: 4686 over = self._prev.text.upper() 4687 4688 if not self._match(TokenType.L_PAREN): 4689 return self.expression( 4690 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4691 ) 4692 4693 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4694 4695 first = self._match(TokenType.FIRST) 4696 if self._match_text_seq("LAST"): 4697 first = False 4698 4699 partition, order = self._parse_partition_and_order() 4700 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4701 4702 if kind: 4703 self._match(TokenType.BETWEEN) 4704 start = self._parse_window_spec() 4705 self._match(TokenType.AND) 4706 end = self._parse_window_spec() 4707 4708 spec = self.expression( 4709 exp.WindowSpec, 4710 kind=kind, 4711 start=start["value"], 4712 start_side=start["side"], 4713 end=end["value"], 4714 end_side=end["side"], 4715 ) 4716 else: 4717 spec = None 4718 4719 self._match_r_paren() 4720 4721 window = self.expression( 4722 exp.Window, 4723 this=this, 4724 partition_by=partition, 4725 order=order, 4726 spec=spec, 4727 alias=window_alias, 4728 over=over, 4729 first=first, 4730 ) 4731 4732 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4733 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4734 return self._parse_window(window, alias=alias) 4735 4736 return window 4737 4738 def _parse_partition_and_order( 4739 self, 4740 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4741 return self._parse_partition_by(), self._parse_order() 4742 4743 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4744 self._match(TokenType.BETWEEN) 4745 4746 return { 4747 "value": ( 4748 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4749 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4750 or self._parse_bitwise() 4751 ), 4752 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4753 } 4754 4755 def _parse_alias( 4756 self, this: t.Optional[exp.Expression], explicit: bool = False 4757 ) -> t.Optional[exp.Expression]: 4758 any_token = self._match(TokenType.ALIAS) 4759 4760 if explicit and not any_token: 4761 return this 4762 4763 if self._match(TokenType.L_PAREN): 4764 aliases = self.expression( 4765 exp.Aliases, 4766 this=this, 4767 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4768 ) 4769 self._match_r_paren(aliases) 4770 return aliases 4771 4772 alias = self._parse_id_var(any_token) 4773 4774 if alias: 4775 return self.expression(exp.Alias, this=this, alias=alias) 4776 4777 return this 4778 4779 def _parse_id_var( 4780 self, 4781 any_token: bool = True, 4782 tokens: t.Optional[t.Collection[TokenType]] = None, 4783 ) -> t.Optional[exp.Expression]: 4784 identifier = self._parse_identifier() 4785 4786 if identifier: 4787 return identifier 4788 4789 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4790 quoted = self._prev.token_type == TokenType.STRING 4791 return exp.Identifier(this=self._prev.text, quoted=quoted) 4792 4793 return None 4794 4795 def _parse_string(self) -> t.Optional[exp.Expression]: 4796 if self._match(TokenType.STRING): 4797 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4798 return self._parse_placeholder() 4799 4800 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4801 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4802 4803 def _parse_number(self) -> t.Optional[exp.Expression]: 4804 if self._match(TokenType.NUMBER): 4805 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4806 return self._parse_placeholder() 4807 4808 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4809 if self._match(TokenType.IDENTIFIER): 4810 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4811 return self._parse_placeholder() 4812 4813 def _parse_var( 4814 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4815 ) -> t.Optional[exp.Expression]: 4816 if ( 4817 (any_token and self._advance_any()) 4818 or self._match(TokenType.VAR) 4819 or (self._match_set(tokens) if tokens else False) 4820 ): 4821 return self.expression(exp.Var, this=self._prev.text) 4822 return self._parse_placeholder() 4823 4824 def _advance_any(self) -> t.Optional[Token]: 4825 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4826 self._advance() 4827 return self._prev 4828 return None 4829 4830 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4831 return self._parse_var() or self._parse_string() 4832 4833 def _parse_null(self) -> t.Optional[exp.Expression]: 4834 if self._match_set(self.NULL_TOKENS): 4835 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4836 return self._parse_placeholder() 4837 4838 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4839 if self._match(TokenType.TRUE): 4840 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4841 if self._match(TokenType.FALSE): 4842 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4843 return self._parse_placeholder() 4844 4845 def _parse_star(self) -> t.Optional[exp.Expression]: 4846 if self._match(TokenType.STAR): 4847 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4848 return self._parse_placeholder() 4849 4850 def _parse_parameter(self) -> exp.Parameter: 4851 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4852 return ( 4853 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4854 ) 4855 4856 self._match(TokenType.L_BRACE) 4857 this = _parse_parameter_part() 4858 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4859 self._match(TokenType.R_BRACE) 4860 4861 return self.expression(exp.Parameter, this=this, expression=expression) 4862 4863 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4864 if self._match_set(self.PLACEHOLDER_PARSERS): 4865 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4866 if placeholder: 4867 return placeholder 4868 self._advance(-1) 4869 return None 4870 4871 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4872 if not self._match(TokenType.EXCEPT): 4873 return None 4874 if self._match(TokenType.L_PAREN, advance=False): 4875 return self._parse_wrapped_csv(self._parse_column) 4876 4877 except_column = self._parse_column() 4878 return [except_column] if except_column else None 4879 4880 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4881 if not self._match(TokenType.REPLACE): 4882 return None 4883 if self._match(TokenType.L_PAREN, advance=False): 4884 return self._parse_wrapped_csv(self._parse_expression) 4885 4886 replace_expression = self._parse_expression() 4887 return [replace_expression] if replace_expression else None 4888 4889 def _parse_csv( 4890 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4891 ) -> t.List[exp.Expression]: 4892 parse_result = parse_method() 4893 items = [parse_result] if parse_result is not None else [] 4894 4895 while self._match(sep): 4896 self._add_comments(parse_result) 4897 parse_result = parse_method() 4898 if parse_result is not None: 4899 items.append(parse_result) 4900 4901 return items 4902 4903 def _parse_tokens( 4904 self, parse_method: t.Callable, expressions: t.Dict 4905 ) -> t.Optional[exp.Expression]: 4906 this = parse_method() 4907 4908 while self._match_set(expressions): 4909 this = self.expression( 4910 expressions[self._prev.token_type], 4911 this=this, 4912 comments=self._prev_comments, 4913 expression=parse_method(), 4914 ) 4915 4916 return this 4917 4918 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4919 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4920 4921 def _parse_wrapped_csv( 4922 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4923 ) -> t.List[exp.Expression]: 4924 return self._parse_wrapped( 4925 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4926 ) 4927 4928 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4929 wrapped = self._match(TokenType.L_PAREN) 4930 if not wrapped and not optional: 4931 self.raise_error("Expecting (") 4932 parse_result = parse_method() 4933 if wrapped: 4934 self._match_r_paren() 4935 return parse_result 4936 4937 def _parse_expressions(self) -> t.List[exp.Expression]: 4938 return self._parse_csv(self._parse_expression) 4939 4940 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4941 return self._parse_select() or self._parse_set_operations( 4942 self._parse_expression() if alias else self._parse_conjunction() 4943 ) 4944 4945 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4946 return self._parse_query_modifiers( 4947 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4948 ) 4949 4950 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4951 this = None 4952 if self._match_texts(self.TRANSACTION_KIND): 4953 this = self._prev.text 4954 4955 self._match_texts({"TRANSACTION", "WORK"}) 4956 4957 modes = [] 4958 while True: 4959 mode = [] 4960 while self._match(TokenType.VAR): 4961 mode.append(self._prev.text) 4962 4963 if mode: 4964 modes.append(" ".join(mode)) 4965 if not self._match(TokenType.COMMA): 4966 break 4967 4968 return self.expression(exp.Transaction, this=this, modes=modes) 4969 4970 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4971 chain = None 4972 savepoint = None 4973 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4974 4975 self._match_texts({"TRANSACTION", "WORK"}) 4976 4977 if self._match_text_seq("TO"): 4978 self._match_text_seq("SAVEPOINT") 4979 savepoint = self._parse_id_var() 4980 4981 if self._match(TokenType.AND): 4982 chain = not self._match_text_seq("NO") 4983 self._match_text_seq("CHAIN") 4984 4985 if is_rollback: 4986 return self.expression(exp.Rollback, savepoint=savepoint) 4987 4988 return self.expression(exp.Commit, chain=chain) 4989 4990 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4991 if not self._match_text_seq("ADD"): 4992 return None 4993 4994 self._match(TokenType.COLUMN) 4995 exists_column = self._parse_exists(not_=True) 4996 expression = self._parse_field_def() 4997 4998 if expression: 4999 expression.set("exists", exists_column) 5000 5001 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5002 if self._match_texts(("FIRST", "AFTER")): 5003 position = self._prev.text 5004 column_position = self.expression( 5005 exp.ColumnPosition, this=self._parse_column(), position=position 5006 ) 5007 expression.set("position", column_position) 5008 5009 return expression 5010 5011 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5012 drop = self._match(TokenType.DROP) and self._parse_drop() 5013 if drop and not isinstance(drop, exp.Command): 5014 drop.set("kind", drop.args.get("kind", "COLUMN")) 5015 return drop 5016 5017 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5018 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5019 return self.expression( 5020 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5021 ) 5022 5023 def _parse_add_constraint(self) -> exp.AddConstraint: 5024 this = None 5025 kind = self._prev.token_type 5026 5027 if kind == TokenType.CONSTRAINT: 5028 this = self._parse_id_var() 5029 5030 if self._match_text_seq("CHECK"): 5031 expression = self._parse_wrapped(self._parse_conjunction) 5032 enforced = self._match_text_seq("ENFORCED") 5033 5034 return self.expression( 5035 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5036 ) 5037 5038 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5039 expression = self._parse_foreign_key() 5040 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5041 expression = self._parse_primary_key() 5042 else: 5043 expression = None 5044 5045 return self.expression(exp.AddConstraint, this=this, expression=expression) 5046 5047 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5048 index = self._index - 1 5049 5050 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5051 return self._parse_csv(self._parse_add_constraint) 5052 5053 self._retreat(index) 5054 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5055 return self._parse_csv(self._parse_field_def) 5056 5057 return self._parse_csv(self._parse_add_column) 5058 5059 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5060 self._match(TokenType.COLUMN) 5061 column = self._parse_field(any_token=True) 5062 5063 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5064 return self.expression(exp.AlterColumn, this=column, drop=True) 5065 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5066 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5067 5068 self._match_text_seq("SET", "DATA") 5069 return self.expression( 5070 exp.AlterColumn, 5071 this=column, 5072 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5073 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5074 using=self._match(TokenType.USING) and self._parse_conjunction(), 5075 ) 5076 5077 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5078 index = self._index - 1 5079 5080 partition_exists = self._parse_exists() 5081 if self._match(TokenType.PARTITION, advance=False): 5082 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5083 5084 self._retreat(index) 5085 return self._parse_csv(self._parse_drop_column) 5086 5087 def _parse_alter_table_rename(self) -> exp.RenameTable: 5088 self._match_text_seq("TO") 5089 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5090 5091 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5092 start = self._prev 5093 5094 if not self._match(TokenType.TABLE): 5095 return self._parse_as_command(start) 5096 5097 exists = self._parse_exists() 5098 only = self._match_text_seq("ONLY") 5099 this = self._parse_table(schema=True) 5100 5101 if self._next: 5102 self._advance() 5103 5104 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5105 if parser: 5106 actions = ensure_list(parser(self)) 5107 5108 if not self._curr: 5109 return self.expression( 5110 exp.AlterTable, 5111 this=this, 5112 exists=exists, 5113 actions=actions, 5114 only=only, 5115 ) 5116 5117 return self._parse_as_command(start) 5118 5119 def _parse_merge(self) -> exp.Merge: 5120 self._match(TokenType.INTO) 5121 target = self._parse_table() 5122 5123 if target and self._match(TokenType.ALIAS, advance=False): 5124 target.set("alias", self._parse_table_alias()) 5125 5126 self._match(TokenType.USING) 5127 using = self._parse_table() 5128 5129 self._match(TokenType.ON) 5130 on = self._parse_conjunction() 5131 5132 return self.expression( 5133 exp.Merge, 5134 this=target, 5135 using=using, 5136 on=on, 5137 expressions=self._parse_when_matched(), 5138 ) 5139 5140 def _parse_when_matched(self) -> t.List[exp.When]: 5141 whens = [] 5142 5143 while self._match(TokenType.WHEN): 5144 matched = not self._match(TokenType.NOT) 5145 self._match_text_seq("MATCHED") 5146 source = ( 5147 False 5148 if self._match_text_seq("BY", "TARGET") 5149 else self._match_text_seq("BY", "SOURCE") 5150 ) 5151 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5152 5153 self._match(TokenType.THEN) 5154 5155 if self._match(TokenType.INSERT): 5156 _this = self._parse_star() 5157 if _this: 5158 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5159 else: 5160 then = self.expression( 5161 exp.Insert, 5162 this=self._parse_value(), 5163 expression=self._match(TokenType.VALUES) and self._parse_value(), 5164 ) 5165 elif self._match(TokenType.UPDATE): 5166 expressions = self._parse_star() 5167 if expressions: 5168 then = self.expression(exp.Update, expressions=expressions) 5169 else: 5170 then = self.expression( 5171 exp.Update, 5172 expressions=self._match(TokenType.SET) 5173 and self._parse_csv(self._parse_equality), 5174 ) 5175 elif self._match(TokenType.DELETE): 5176 then = self.expression(exp.Var, this=self._prev.text) 5177 else: 5178 then = None 5179 5180 whens.append( 5181 self.expression( 5182 exp.When, 5183 matched=matched, 5184 source=source, 5185 condition=condition, 5186 then=then, 5187 ) 5188 ) 5189 return whens 5190 5191 def _parse_show(self) -> t.Optional[exp.Expression]: 5192 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5193 if parser: 5194 return parser(self) 5195 return self._parse_as_command(self._prev) 5196 5197 def _parse_set_item_assignment( 5198 self, kind: t.Optional[str] = None 5199 ) -> t.Optional[exp.Expression]: 5200 index = self._index 5201 5202 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5203 return self._parse_set_transaction(global_=kind == "GLOBAL") 5204 5205 left = self._parse_primary() or self._parse_id_var() 5206 assignment_delimiter = self._match_texts(("=", "TO")) 5207 5208 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5209 self._retreat(index) 5210 return None 5211 5212 right = self._parse_statement() or self._parse_id_var() 5213 this = self.expression(exp.EQ, this=left, expression=right) 5214 5215 return self.expression(exp.SetItem, this=this, kind=kind) 5216 5217 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5218 self._match_text_seq("TRANSACTION") 5219 characteristics = self._parse_csv( 5220 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5221 ) 5222 return self.expression( 5223 exp.SetItem, 5224 expressions=characteristics, 5225 kind="TRANSACTION", 5226 **{"global": global_}, # type: ignore 5227 ) 5228 5229 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5230 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5231 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5232 5233 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5234 index = self._index 5235 set_ = self.expression( 5236 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5237 ) 5238 5239 if self._curr: 5240 self._retreat(index) 5241 return self._parse_as_command(self._prev) 5242 5243 return set_ 5244 5245 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5246 for option in options: 5247 if self._match_text_seq(*option.split(" ")): 5248 return exp.var(option) 5249 return None 5250 5251 def _parse_as_command(self, start: Token) -> exp.Command: 5252 while self._curr: 5253 self._advance() 5254 text = self._find_sql(start, self._prev) 5255 size = len(start.text) 5256 return exp.Command(this=text[:size], expression=text[size:]) 5257 5258 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5259 settings = [] 5260 5261 self._match_l_paren() 5262 kind = self._parse_id_var() 5263 5264 if self._match(TokenType.L_PAREN): 5265 while True: 5266 key = self._parse_id_var() 5267 value = self._parse_primary() 5268 5269 if not key and value is None: 5270 break 5271 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5272 self._match(TokenType.R_PAREN) 5273 5274 self._match_r_paren() 5275 5276 return self.expression( 5277 exp.DictProperty, 5278 this=this, 5279 kind=kind.this if kind else None, 5280 settings=settings, 5281 ) 5282 5283 def _parse_dict_range(self, this: str) -> exp.DictRange: 5284 self._match_l_paren() 5285 has_min = self._match_text_seq("MIN") 5286 if has_min: 5287 min = self._parse_var() or self._parse_primary() 5288 self._match_text_seq("MAX") 5289 max = self._parse_var() or self._parse_primary() 5290 else: 5291 max = self._parse_var() or self._parse_primary() 5292 min = exp.Literal.number(0) 5293 self._match_r_paren() 5294 return self.expression(exp.DictRange, this=this, min=min, max=max) 5295 5296 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5297 index = self._index 5298 expression = self._parse_column() 5299 if not self._match(TokenType.IN): 5300 self._retreat(index - 1) 5301 return None 5302 iterator = self._parse_column() 5303 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5304 return self.expression( 5305 exp.Comprehension, 5306 this=this, 5307 expression=expression, 5308 iterator=iterator, 5309 condition=condition, 5310 ) 5311 5312 def _find_parser( 5313 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5314 ) -> t.Optional[t.Callable]: 5315 if not self._curr: 5316 return None 5317 5318 index = self._index 5319 this = [] 5320 while True: 5321 # The current token might be multiple words 5322 curr = self._curr.text.upper() 5323 key = curr.split(" ") 5324 this.append(curr) 5325 5326 self._advance() 5327 result, trie = in_trie(trie, key) 5328 if result == TrieResult.FAILED: 5329 break 5330 5331 if result == TrieResult.EXISTS: 5332 subparser = parsers[" ".join(this)] 5333 return subparser 5334 5335 self._retreat(index) 5336 return None 5337 5338 def _match(self, token_type, advance=True, expression=None): 5339 if not self._curr: 5340 return None 5341 5342 if self._curr.token_type == token_type: 5343 if advance: 5344 self._advance() 5345 self._add_comments(expression) 5346 return True 5347 5348 return None 5349 5350 def _match_set(self, types, advance=True): 5351 if not self._curr: 5352 return None 5353 5354 if self._curr.token_type in types: 5355 if advance: 5356 self._advance() 5357 return True 5358 5359 return None 5360 5361 def _match_pair(self, token_type_a, token_type_b, advance=True): 5362 if not self._curr or not self._next: 5363 return None 5364 5365 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5366 if advance: 5367 self._advance(2) 5368 return True 5369 5370 return None 5371 5372 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5373 if not self._match(TokenType.L_PAREN, expression=expression): 5374 self.raise_error("Expecting (") 5375 5376 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5377 if not self._match(TokenType.R_PAREN, expression=expression): 5378 self.raise_error("Expecting )") 5379 5380 def _match_texts(self, texts, advance=True): 5381 if self._curr and self._curr.text.upper() in texts: 5382 if advance: 5383 self._advance() 5384 return True 5385 return False 5386 5387 def _match_text_seq(self, *texts, advance=True): 5388 index = self._index 5389 for text in texts: 5390 if self._curr and self._curr.text.upper() == text: 5391 self._advance() 5392 else: 5393 self._retreat(index) 5394 return False 5395 5396 if not advance: 5397 self._retreat(index) 5398 5399 return True 5400 5401 @t.overload 5402 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5403 ... 5404 5405 @t.overload 5406 def _replace_columns_with_dots( 5407 self, this: t.Optional[exp.Expression] 5408 ) -> t.Optional[exp.Expression]: 5409 ... 5410 5411 def _replace_columns_with_dots(self, this): 5412 if isinstance(this, exp.Dot): 5413 exp.replace_children(this, self._replace_columns_with_dots) 5414 elif isinstance(this, exp.Column): 5415 exp.replace_children(this, self._replace_columns_with_dots) 5416 table = this.args.get("table") 5417 this = ( 5418 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5419 ) 5420 5421 return this 5422 5423 def _replace_lambda( 5424 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5425 ) -> t.Optional[exp.Expression]: 5426 if not node: 5427 return node 5428 5429 for column in node.find_all(exp.Column): 5430 if column.parts[0].name in lambda_variables: 5431 dot_or_id = column.to_dot() if column.table else column.this 5432 parent = column.parent 5433 5434 while isinstance(parent, exp.Dot): 5435 if not isinstance(parent.parent, exp.Dot): 5436 parent.replace(dot_or_id) 5437 break 5438 parent = parent.parent 5439 else: 5440 if column is node: 5441 node = dot_or_id 5442 else: 5443 column.replace(dot_or_id) 5444 return node 5445 5446 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5447 return [ 5448 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5449 for value in values 5450 if value 5451 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
952 def __init__( 953 self, 954 error_level: t.Optional[ErrorLevel] = None, 955 error_message_context: int = 100, 956 max_errors: int = 3, 957 ): 958 self.error_level = error_level or ErrorLevel.IMMEDIATE 959 self.error_message_context = error_message_context 960 self.max_errors = max_errors 961 self._tokenizer = self.TOKENIZER_CLASS() 962 self.reset()
974 def parse( 975 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 976 ) -> t.List[t.Optional[exp.Expression]]: 977 """ 978 Parses a list of tokens and returns a list of syntax trees, one tree 979 per parsed SQL statement. 980 981 Args: 982 raw_tokens: The list of tokens. 983 sql: The original SQL string, used to produce helpful debug messages. 984 985 Returns: 986 The list of the produced syntax trees. 987 """ 988 return self._parse( 989 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 990 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
992 def parse_into( 993 self, 994 expression_types: exp.IntoType, 995 raw_tokens: t.List[Token], 996 sql: t.Optional[str] = None, 997 ) -> t.List[t.Optional[exp.Expression]]: 998 """ 999 Parses a list of tokens into a given Expression type. If a collection of Expression 1000 types is given instead, this method will try to parse the token list into each one 1001 of them, stopping at the first for which the parsing succeeds. 1002 1003 Args: 1004 expression_types: The expression type(s) to try and parse the token list into. 1005 raw_tokens: The list of tokens. 1006 sql: The original SQL string, used to produce helpful debug messages. 1007 1008 Returns: 1009 The target Expression. 1010 """ 1011 errors = [] 1012 for expression_type in ensure_list(expression_types): 1013 parser = self.EXPRESSION_PARSERS.get(expression_type) 1014 if not parser: 1015 raise TypeError(f"No parser registered for {expression_type}") 1016 1017 try: 1018 return self._parse(parser, raw_tokens, sql) 1019 except ParseError as e: 1020 e.errors[0]["into_expression"] = expression_type 1021 errors.append(e) 1022 1023 raise ParseError( 1024 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1025 errors=merge_errors(errors), 1026 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1063 def check_errors(self) -> None: 1064 """Logs or raises any found errors, depending on the chosen error level setting.""" 1065 if self.error_level == ErrorLevel.WARN: 1066 for error in self.errors: 1067 logger.error(str(error)) 1068 elif self.error_level == ErrorLevel.RAISE and self.errors: 1069 raise ParseError( 1070 concat_messages(self.errors, self.max_errors), 1071 errors=merge_errors(self.errors), 1072 )
Logs or raises any found errors, depending on the chosen error level setting.
1074 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1075 """ 1076 Appends an error in the list of recorded errors or raises it, depending on the chosen 1077 error level setting. 1078 """ 1079 token = token or self._curr or self._prev or Token.string("") 1080 start = token.start 1081 end = token.end + 1 1082 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1083 highlight = self.sql[start:end] 1084 end_context = self.sql[end : end + self.error_message_context] 1085 1086 error = ParseError.new( 1087 f"{message}. Line {token.line}, Col: {token.col}.\n" 1088 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1089 description=message, 1090 line=token.line, 1091 col=token.col, 1092 start_context=start_context, 1093 highlight=highlight, 1094 end_context=end_context, 1095 ) 1096 1097 if self.error_level == ErrorLevel.IMMEDIATE: 1098 raise error 1099 1100 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1102 def expression( 1103 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1104 ) -> E: 1105 """ 1106 Creates a new, validated Expression. 1107 1108 Args: 1109 exp_class: The expression class to instantiate. 1110 comments: An optional list of comments to attach to the expression. 1111 kwargs: The arguments to set for the expression along with their respective values. 1112 1113 Returns: 1114 The target expression. 1115 """ 1116 instance = exp_class(**kwargs) 1117 instance.add_comments(comments) if comments else self._add_comments(instance) 1118 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1125 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1126 """ 1127 Validates an Expression, making sure that all its mandatory arguments are set. 1128 1129 Args: 1130 expression: The expression to validate. 1131 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1132 1133 Returns: 1134 The validated expression. 1135 """ 1136 if self.error_level != ErrorLevel.IGNORE: 1137 for error_message in expression.error_messages(args): 1138 self.raise_error(error_message) 1139 1140 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.