sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.CONSTRAINT, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 TokenType.FOREIGN_KEY, 253 *DB_CREATABLES, 254 } 255 256 # Tokens that can represent identifiers 257 ID_VAR_TOKENS = { 258 TokenType.VAR, 259 TokenType.ANTI, 260 TokenType.APPLY, 261 TokenType.ASC, 262 TokenType.AUTO_INCREMENT, 263 TokenType.BEGIN, 264 TokenType.CACHE, 265 TokenType.CASE, 266 TokenType.COLLATE, 267 TokenType.COMMAND, 268 TokenType.COMMENT, 269 TokenType.COMMIT, 270 TokenType.CONSTRAINT, 271 TokenType.DEFAULT, 272 TokenType.DELETE, 273 TokenType.DESC, 274 TokenType.DESCRIBE, 275 TokenType.DICTIONARY, 276 TokenType.DIV, 277 TokenType.END, 278 TokenType.EXECUTE, 279 TokenType.ESCAPE, 280 TokenType.FALSE, 281 TokenType.FIRST, 282 TokenType.FILTER, 283 TokenType.FORMAT, 284 TokenType.FULL, 285 TokenType.IS, 286 TokenType.ISNULL, 287 TokenType.INTERVAL, 288 TokenType.KEEP, 289 TokenType.KILL, 290 TokenType.LEFT, 291 TokenType.LOAD, 292 TokenType.MERGE, 293 TokenType.NATURAL, 294 TokenType.NEXT, 295 TokenType.OFFSET, 296 TokenType.OPERATOR, 297 TokenType.ORDINALITY, 298 TokenType.OVERLAPS, 299 TokenType.OVERWRITE, 300 TokenType.PARTITION, 301 TokenType.PERCENT, 302 TokenType.PIVOT, 303 TokenType.PRAGMA, 304 TokenType.RANGE, 305 TokenType.RECURSIVE, 306 TokenType.REFERENCES, 307 TokenType.REFRESH, 308 TokenType.RIGHT, 309 TokenType.ROW, 310 TokenType.ROWS, 311 TokenType.SEMI, 312 TokenType.SET, 313 TokenType.SETTINGS, 314 TokenType.SHOW, 315 TokenType.TEMPORARY, 316 TokenType.TOP, 317 TokenType.TRUE, 318 TokenType.UNIQUE, 319 TokenType.UNPIVOT, 320 TokenType.UPDATE, 321 TokenType.USE, 322 TokenType.VOLATILE, 323 TokenType.WINDOW, 324 *CREATABLES, 325 *SUBQUERY_PREDICATES, 326 *TYPE_TOKENS, 327 *NO_PAREN_FUNCTIONS, 328 } 329 330 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 331 332 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 333 TokenType.ANTI, 334 TokenType.APPLY, 335 TokenType.ASOF, 336 TokenType.FULL, 337 TokenType.LEFT, 338 TokenType.LOCK, 339 TokenType.NATURAL, 340 TokenType.OFFSET, 341 TokenType.RIGHT, 342 TokenType.SEMI, 343 TokenType.WINDOW, 344 } 345 346 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 347 348 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 349 350 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 351 352 FUNC_TOKENS = { 353 TokenType.COLLATE, 354 TokenType.COMMAND, 355 TokenType.CURRENT_DATE, 356 TokenType.CURRENT_DATETIME, 357 TokenType.CURRENT_TIMESTAMP, 358 TokenType.CURRENT_TIME, 359 TokenType.CURRENT_USER, 360 TokenType.FILTER, 361 TokenType.FIRST, 362 TokenType.FORMAT, 363 TokenType.GLOB, 364 TokenType.IDENTIFIER, 365 TokenType.INDEX, 366 TokenType.ISNULL, 367 TokenType.ILIKE, 368 TokenType.INSERT, 369 TokenType.LIKE, 370 TokenType.MERGE, 371 TokenType.OFFSET, 372 TokenType.PRIMARY_KEY, 373 TokenType.RANGE, 374 TokenType.REPLACE, 375 TokenType.RLIKE, 376 TokenType.ROW, 377 TokenType.UNNEST, 378 TokenType.VAR, 379 TokenType.LEFT, 380 TokenType.RIGHT, 381 TokenType.DATE, 382 TokenType.DATETIME, 383 TokenType.TABLE, 384 TokenType.TIMESTAMP, 385 TokenType.TIMESTAMPTZ, 386 TokenType.WINDOW, 387 TokenType.XOR, 388 *TYPE_TOKENS, 389 *SUBQUERY_PREDICATES, 390 } 391 392 CONJUNCTION = { 393 TokenType.AND: exp.And, 394 TokenType.OR: exp.Or, 395 } 396 397 EQUALITY = { 398 TokenType.COLON_EQ: exp.PropertyEQ, 399 TokenType.EQ: exp.EQ, 400 TokenType.NEQ: exp.NEQ, 401 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 402 } 403 404 COMPARISON = { 405 TokenType.GT: exp.GT, 406 TokenType.GTE: exp.GTE, 407 TokenType.LT: exp.LT, 408 TokenType.LTE: exp.LTE, 409 } 410 411 BITWISE = { 412 TokenType.AMP: exp.BitwiseAnd, 413 TokenType.CARET: exp.BitwiseXor, 414 TokenType.PIPE: exp.BitwiseOr, 415 TokenType.DPIPE: exp.DPipe, 416 } 417 418 TERM = { 419 TokenType.DASH: exp.Sub, 420 TokenType.PLUS: exp.Add, 421 TokenType.MOD: exp.Mod, 422 TokenType.COLLATE: exp.Collate, 423 } 424 425 FACTOR = { 426 TokenType.DIV: exp.IntDiv, 427 TokenType.LR_ARROW: exp.Distance, 428 TokenType.SLASH: exp.Div, 429 TokenType.STAR: exp.Mul, 430 } 431 432 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 433 434 TIMES = { 435 TokenType.TIME, 436 TokenType.TIMETZ, 437 } 438 439 TIMESTAMPS = { 440 TokenType.TIMESTAMP, 441 TokenType.TIMESTAMPTZ, 442 TokenType.TIMESTAMPLTZ, 443 *TIMES, 444 } 445 446 SET_OPERATIONS = { 447 TokenType.UNION, 448 TokenType.INTERSECT, 449 TokenType.EXCEPT, 450 } 451 452 JOIN_METHODS = { 453 TokenType.NATURAL, 454 TokenType.ASOF, 455 } 456 457 JOIN_SIDES = { 458 TokenType.LEFT, 459 TokenType.RIGHT, 460 TokenType.FULL, 461 } 462 463 JOIN_KINDS = { 464 TokenType.INNER, 465 TokenType.OUTER, 466 TokenType.CROSS, 467 TokenType.SEMI, 468 TokenType.ANTI, 469 } 470 471 JOIN_HINTS: t.Set[str] = set() 472 473 LAMBDAS = { 474 TokenType.ARROW: lambda self, expressions: self.expression( 475 exp.Lambda, 476 this=self._replace_lambda( 477 self._parse_conjunction(), 478 {node.name for node in expressions}, 479 ), 480 expressions=expressions, 481 ), 482 TokenType.FARROW: lambda self, expressions: self.expression( 483 exp.Kwarg, 484 this=exp.var(expressions[0].name), 485 expression=self._parse_conjunction(), 486 ), 487 } 488 489 COLUMN_OPERATORS = { 490 TokenType.DOT: None, 491 TokenType.DCOLON: lambda self, this, to: self.expression( 492 exp.Cast if self.STRICT_CAST else exp.TryCast, 493 this=this, 494 to=to, 495 ), 496 TokenType.ARROW: lambda self, this, path: self.expression( 497 exp.JSONExtract, 498 this=this, 499 expression=path, 500 ), 501 TokenType.DARROW: lambda self, this, path: self.expression( 502 exp.JSONExtractScalar, 503 this=this, 504 expression=path, 505 ), 506 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 507 exp.JSONBExtract, 508 this=this, 509 expression=path, 510 ), 511 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 512 exp.JSONBExtractScalar, 513 this=this, 514 expression=path, 515 ), 516 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 517 exp.JSONBContains, 518 this=this, 519 expression=key, 520 ), 521 } 522 523 EXPRESSION_PARSERS = { 524 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 525 exp.Column: lambda self: self._parse_column(), 526 exp.Condition: lambda self: self._parse_conjunction(), 527 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 528 exp.Expression: lambda self: self._parse_statement(), 529 exp.From: lambda self: self._parse_from(), 530 exp.Group: lambda self: self._parse_group(), 531 exp.Having: lambda self: self._parse_having(), 532 exp.Identifier: lambda self: self._parse_id_var(), 533 exp.Join: lambda self: self._parse_join(), 534 exp.Lambda: lambda self: self._parse_lambda(), 535 exp.Lateral: lambda self: self._parse_lateral(), 536 exp.Limit: lambda self: self._parse_limit(), 537 exp.Offset: lambda self: self._parse_offset(), 538 exp.Order: lambda self: self._parse_order(), 539 exp.Ordered: lambda self: self._parse_ordered(), 540 exp.Properties: lambda self: self._parse_properties(), 541 exp.Qualify: lambda self: self._parse_qualify(), 542 exp.Returning: lambda self: self._parse_returning(), 543 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 544 exp.Table: lambda self: self._parse_table_parts(), 545 exp.TableAlias: lambda self: self._parse_table_alias(), 546 exp.Where: lambda self: self._parse_where(), 547 exp.Window: lambda self: self._parse_named_window(), 548 exp.With: lambda self: self._parse_with(), 549 "JOIN_TYPE": lambda self: self._parse_join_parts(), 550 } 551 552 STATEMENT_PARSERS = { 553 TokenType.ALTER: lambda self: self._parse_alter(), 554 TokenType.BEGIN: lambda self: self._parse_transaction(), 555 TokenType.CACHE: lambda self: self._parse_cache(), 556 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 557 TokenType.COMMENT: lambda self: self._parse_comment(), 558 TokenType.CREATE: lambda self: self._parse_create(), 559 TokenType.DELETE: lambda self: self._parse_delete(), 560 TokenType.DESC: lambda self: self._parse_describe(), 561 TokenType.DESCRIBE: lambda self: self._parse_describe(), 562 TokenType.DROP: lambda self: self._parse_drop(), 563 TokenType.INSERT: lambda self: self._parse_insert(), 564 TokenType.KILL: lambda self: self._parse_kill(), 565 TokenType.LOAD: lambda self: self._parse_load(), 566 TokenType.MERGE: lambda self: self._parse_merge(), 567 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 568 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 569 TokenType.REFRESH: lambda self: self._parse_refresh(), 570 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 571 TokenType.SET: lambda self: self._parse_set(), 572 TokenType.UNCACHE: lambda self: self._parse_uncache(), 573 TokenType.UPDATE: lambda self: self._parse_update(), 574 TokenType.USE: lambda self: self.expression( 575 exp.Use, 576 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 577 and exp.var(self._prev.text), 578 this=self._parse_table(schema=False), 579 ), 580 } 581 582 UNARY_PARSERS = { 583 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 584 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 585 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 586 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 587 } 588 589 PRIMARY_PARSERS = { 590 TokenType.STRING: lambda self, token: self.expression( 591 exp.Literal, this=token.text, is_string=True 592 ), 593 TokenType.NUMBER: lambda self, token: self.expression( 594 exp.Literal, this=token.text, is_string=False 595 ), 596 TokenType.STAR: lambda self, _: self.expression( 597 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 598 ), 599 TokenType.NULL: lambda self, _: self.expression(exp.Null), 600 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 601 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 602 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 603 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 604 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 605 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 606 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 607 exp.National, this=token.text 608 ), 609 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 610 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 611 exp.RawString, this=token.text 612 ), 613 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 614 } 615 616 PLACEHOLDER_PARSERS = { 617 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 618 TokenType.PARAMETER: lambda self: self._parse_parameter(), 619 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 620 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 621 else None, 622 } 623 624 RANGE_PARSERS = { 625 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 626 TokenType.GLOB: binary_range_parser(exp.Glob), 627 TokenType.ILIKE: binary_range_parser(exp.ILike), 628 TokenType.IN: lambda self, this: self._parse_in(this), 629 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 630 TokenType.IS: lambda self, this: self._parse_is(this), 631 TokenType.LIKE: binary_range_parser(exp.Like), 632 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 633 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 634 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 635 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 636 } 637 638 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 639 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 640 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 641 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 642 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 643 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 644 "CHECKSUM": lambda self: self._parse_checksum(), 645 "CLUSTER BY": lambda self: self._parse_cluster(), 646 "CLUSTERED": lambda self: self._parse_clustered_by(), 647 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 648 exp.CollateProperty, **kwargs 649 ), 650 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 651 "COPY": lambda self: self._parse_copy_property(), 652 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 653 "DEFINER": lambda self: self._parse_definer(), 654 "DETERMINISTIC": lambda self: self.expression( 655 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 656 ), 657 "DISTKEY": lambda self: self._parse_distkey(), 658 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 659 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 660 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 661 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 662 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 663 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 664 "FREESPACE": lambda self: self._parse_freespace(), 665 "HEAP": lambda self: self.expression(exp.HeapProperty), 666 "IMMUTABLE": lambda self: self.expression( 667 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 668 ), 669 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 670 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 671 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 672 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 673 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 674 "LIKE": lambda self: self._parse_create_like(), 675 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 676 "LOCK": lambda self: self._parse_locking(), 677 "LOCKING": lambda self: self._parse_locking(), 678 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 679 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 680 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 681 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 682 "NO": lambda self: self._parse_no_property(), 683 "ON": lambda self: self._parse_on_property(), 684 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 685 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 686 "PARTITION": lambda self: self._parse_partitioned_of(), 687 "PARTITION BY": lambda self: self._parse_partitioned_by(), 688 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 689 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 690 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 691 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 692 "REMOTE": lambda self: self._parse_remote_with_connection(), 693 "RETURNS": lambda self: self._parse_returns(), 694 "ROW": lambda self: self._parse_row(), 695 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 696 "SAMPLE": lambda self: self.expression( 697 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 698 ), 699 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 700 "SETTINGS": lambda self: self.expression( 701 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 702 ), 703 "SORTKEY": lambda self: self._parse_sortkey(), 704 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 705 "STABLE": lambda self: self.expression( 706 exp.StabilityProperty, this=exp.Literal.string("STABLE") 707 ), 708 "STORED": lambda self: self._parse_stored(), 709 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 710 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 711 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 712 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 713 "TO": lambda self: self._parse_to_table(), 714 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 715 "TRANSFORM": lambda self: self.expression( 716 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 717 ), 718 "TTL": lambda self: self._parse_ttl(), 719 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 720 "VOLATILE": lambda self: self._parse_volatile_property(), 721 "WITH": lambda self: self._parse_with_property(), 722 } 723 724 CONSTRAINT_PARSERS = { 725 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 726 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 727 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 728 "CHARACTER SET": lambda self: self.expression( 729 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 730 ), 731 "CHECK": lambda self: self.expression( 732 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 733 ), 734 "COLLATE": lambda self: self.expression( 735 exp.CollateColumnConstraint, this=self._parse_var() 736 ), 737 "COMMENT": lambda self: self.expression( 738 exp.CommentColumnConstraint, this=self._parse_string() 739 ), 740 "COMPRESS": lambda self: self._parse_compress(), 741 "CLUSTERED": lambda self: self.expression( 742 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 743 ), 744 "NONCLUSTERED": lambda self: self.expression( 745 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 746 ), 747 "DEFAULT": lambda self: self.expression( 748 exp.DefaultColumnConstraint, this=self._parse_bitwise() 749 ), 750 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 751 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 752 "FORMAT": lambda self: self.expression( 753 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 754 ), 755 "GENERATED": lambda self: self._parse_generated_as_identity(), 756 "IDENTITY": lambda self: self._parse_auto_increment(), 757 "INLINE": lambda self: self._parse_inline(), 758 "LIKE": lambda self: self._parse_create_like(), 759 "NOT": lambda self: self._parse_not_constraint(), 760 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 761 "ON": lambda self: ( 762 self._match(TokenType.UPDATE) 763 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 764 ) 765 or self.expression(exp.OnProperty, this=self._parse_id_var()), 766 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 767 "PERIOD": lambda self: self._parse_period_for_system_time(), 768 "PRIMARY KEY": lambda self: self._parse_primary_key(), 769 "REFERENCES": lambda self: self._parse_references(match=False), 770 "TITLE": lambda self: self.expression( 771 exp.TitleColumnConstraint, this=self._parse_var_or_string() 772 ), 773 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 774 "UNIQUE": lambda self: self._parse_unique(), 775 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 776 "WITH": lambda self: self.expression( 777 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 778 ), 779 } 780 781 ALTER_PARSERS = { 782 "ADD": lambda self: self._parse_alter_table_add(), 783 "ALTER": lambda self: self._parse_alter_table_alter(), 784 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 785 "DROP": lambda self: self._parse_alter_table_drop(), 786 "RENAME": lambda self: self._parse_alter_table_rename(), 787 } 788 789 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 790 791 NO_PAREN_FUNCTION_PARSERS = { 792 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 793 "CASE": lambda self: self._parse_case(), 794 "IF": lambda self: self._parse_if(), 795 "NEXT": lambda self: self._parse_next_value_for(), 796 } 797 798 INVALID_FUNC_NAME_TOKENS = { 799 TokenType.IDENTIFIER, 800 TokenType.STRING, 801 } 802 803 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 804 805 FUNCTION_PARSERS = { 806 "ANY_VALUE": lambda self: self._parse_any_value(), 807 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 808 "CONCAT": lambda self: self._parse_concat(), 809 "CONCAT_WS": lambda self: self._parse_concat_ws(), 810 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 811 "DECODE": lambda self: self._parse_decode(), 812 "EXTRACT": lambda self: self._parse_extract(), 813 "JSON_OBJECT": lambda self: self._parse_json_object(), 814 "JSON_TABLE": lambda self: self._parse_json_table(), 815 "LOG": lambda self: self._parse_logarithm(), 816 "MATCH": lambda self: self._parse_match_against(), 817 "OPENJSON": lambda self: self._parse_open_json(), 818 "POSITION": lambda self: self._parse_position(), 819 "PREDICT": lambda self: self._parse_predict(), 820 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 821 "STRING_AGG": lambda self: self._parse_string_agg(), 822 "SUBSTRING": lambda self: self._parse_substring(), 823 "TRIM": lambda self: self._parse_trim(), 824 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 825 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 826 } 827 828 QUERY_MODIFIER_PARSERS = { 829 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 830 TokenType.WHERE: lambda self: ("where", self._parse_where()), 831 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 832 TokenType.HAVING: lambda self: ("having", self._parse_having()), 833 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 834 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 835 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 836 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 837 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 838 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 839 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 840 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 841 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 842 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 843 TokenType.CLUSTER_BY: lambda self: ( 844 "cluster", 845 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 846 ), 847 TokenType.DISTRIBUTE_BY: lambda self: ( 848 "distribute", 849 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 850 ), 851 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 852 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 853 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 854 } 855 856 SET_PARSERS = { 857 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 858 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 859 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 860 "TRANSACTION": lambda self: self._parse_set_transaction(), 861 } 862 863 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 864 865 TYPE_LITERAL_PARSERS = { 866 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 867 } 868 869 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 870 871 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 872 873 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 874 875 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 876 TRANSACTION_CHARACTERISTICS = { 877 "ISOLATION LEVEL REPEATABLE READ", 878 "ISOLATION LEVEL READ COMMITTED", 879 "ISOLATION LEVEL READ UNCOMMITTED", 880 "ISOLATION LEVEL SERIALIZABLE", 881 "READ WRITE", 882 "READ ONLY", 883 } 884 885 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 886 887 CLONE_KEYWORDS = {"CLONE", "COPY"} 888 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 889 890 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 891 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 892 893 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 894 895 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 896 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 897 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 898 899 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 900 901 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 902 903 DISTINCT_TOKENS = {TokenType.DISTINCT} 904 905 NULL_TOKENS = {TokenType.NULL} 906 907 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 908 909 STRICT_CAST = True 910 911 # A NULL arg in CONCAT yields NULL by default 912 CONCAT_NULL_OUTPUTS_STRING = False 913 914 PREFIXED_PIVOT_COLUMNS = False 915 IDENTIFY_PIVOT_STRINGS = False 916 917 LOG_BASE_FIRST = True 918 LOG_DEFAULTS_TO_LN = False 919 920 # Whether or not ADD is present for each column added by ALTER TABLE 921 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 922 923 # Whether or not the table sample clause expects CSV syntax 924 TABLESAMPLE_CSV = False 925 926 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 927 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 928 929 # Whether the TRIM function expects the characters to trim as its first argument 930 TRIM_PATTERN_FIRST = False 931 932 # Whether the behavior of a / b depends on the types of a and b. 933 # False means a / b is always float division. 934 # True means a / b is integer division if both a and b are integers. 935 TYPED_DIVISION = False 936 937 # False means 1 / 0 throws an error. 938 # True means 1 / 0 returns null. 939 SAFE_DIVISION = False 940 941 __slots__ = ( 942 "error_level", 943 "error_message_context", 944 "max_errors", 945 "sql", 946 "errors", 947 "_tokens", 948 "_index", 949 "_curr", 950 "_next", 951 "_prev", 952 "_prev_comments", 953 "_tokenizer", 954 ) 955 956 # Autofilled 957 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 958 INDEX_OFFSET: int = 0 959 UNNEST_COLUMN_ONLY: bool = False 960 ALIAS_POST_TABLESAMPLE: bool = False 961 STRICT_STRING_CONCAT = False 962 SUPPORTS_USER_DEFINED_TYPES = True 963 NORMALIZE_FUNCTIONS = "upper" 964 NULL_ORDERING: str = "nulls_are_small" 965 SHOW_TRIE: t.Dict = {} 966 SET_TRIE: t.Dict = {} 967 FORMAT_MAPPING: t.Dict[str, str] = {} 968 FORMAT_TRIE: t.Dict = {} 969 TIME_MAPPING: t.Dict[str, str] = {} 970 TIME_TRIE: t.Dict = {} 971 972 def __init__( 973 self, 974 error_level: t.Optional[ErrorLevel] = None, 975 error_message_context: int = 100, 976 max_errors: int = 3, 977 ): 978 self.error_level = error_level or ErrorLevel.IMMEDIATE 979 self.error_message_context = error_message_context 980 self.max_errors = max_errors 981 self._tokenizer = self.TOKENIZER_CLASS() 982 self.reset() 983 984 def reset(self): 985 self.sql = "" 986 self.errors = [] 987 self._tokens = [] 988 self._index = 0 989 self._curr = None 990 self._next = None 991 self._prev = None 992 self._prev_comments = None 993 994 def parse( 995 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 996 ) -> t.List[t.Optional[exp.Expression]]: 997 """ 998 Parses a list of tokens and returns a list of syntax trees, one tree 999 per parsed SQL statement. 1000 1001 Args: 1002 raw_tokens: The list of tokens. 1003 sql: The original SQL string, used to produce helpful debug messages. 1004 1005 Returns: 1006 The list of the produced syntax trees. 1007 """ 1008 return self._parse( 1009 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1010 ) 1011 1012 def parse_into( 1013 self, 1014 expression_types: exp.IntoType, 1015 raw_tokens: t.List[Token], 1016 sql: t.Optional[str] = None, 1017 ) -> t.List[t.Optional[exp.Expression]]: 1018 """ 1019 Parses a list of tokens into a given Expression type. If a collection of Expression 1020 types is given instead, this method will try to parse the token list into each one 1021 of them, stopping at the first for which the parsing succeeds. 1022 1023 Args: 1024 expression_types: The expression type(s) to try and parse the token list into. 1025 raw_tokens: The list of tokens. 1026 sql: The original SQL string, used to produce helpful debug messages. 1027 1028 Returns: 1029 The target Expression. 1030 """ 1031 errors = [] 1032 for expression_type in ensure_list(expression_types): 1033 parser = self.EXPRESSION_PARSERS.get(expression_type) 1034 if not parser: 1035 raise TypeError(f"No parser registered for {expression_type}") 1036 1037 try: 1038 return self._parse(parser, raw_tokens, sql) 1039 except ParseError as e: 1040 e.errors[0]["into_expression"] = expression_type 1041 errors.append(e) 1042 1043 raise ParseError( 1044 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1045 errors=merge_errors(errors), 1046 ) from errors[-1] 1047 1048 def _parse( 1049 self, 1050 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1051 raw_tokens: t.List[Token], 1052 sql: t.Optional[str] = None, 1053 ) -> t.List[t.Optional[exp.Expression]]: 1054 self.reset() 1055 self.sql = sql or "" 1056 1057 total = len(raw_tokens) 1058 chunks: t.List[t.List[Token]] = [[]] 1059 1060 for i, token in enumerate(raw_tokens): 1061 if token.token_type == TokenType.SEMICOLON: 1062 if i < total - 1: 1063 chunks.append([]) 1064 else: 1065 chunks[-1].append(token) 1066 1067 expressions = [] 1068 1069 for tokens in chunks: 1070 self._index = -1 1071 self._tokens = tokens 1072 self._advance() 1073 1074 expressions.append(parse_method(self)) 1075 1076 if self._index < len(self._tokens): 1077 self.raise_error("Invalid expression / Unexpected token") 1078 1079 self.check_errors() 1080 1081 return expressions 1082 1083 def check_errors(self) -> None: 1084 """Logs or raises any found errors, depending on the chosen error level setting.""" 1085 if self.error_level == ErrorLevel.WARN: 1086 for error in self.errors: 1087 logger.error(str(error)) 1088 elif self.error_level == ErrorLevel.RAISE and self.errors: 1089 raise ParseError( 1090 concat_messages(self.errors, self.max_errors), 1091 errors=merge_errors(self.errors), 1092 ) 1093 1094 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1095 """ 1096 Appends an error in the list of recorded errors or raises it, depending on the chosen 1097 error level setting. 1098 """ 1099 token = token or self._curr or self._prev or Token.string("") 1100 start = token.start 1101 end = token.end + 1 1102 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1103 highlight = self.sql[start:end] 1104 end_context = self.sql[end : end + self.error_message_context] 1105 1106 error = ParseError.new( 1107 f"{message}. Line {token.line}, Col: {token.col}.\n" 1108 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1109 description=message, 1110 line=token.line, 1111 col=token.col, 1112 start_context=start_context, 1113 highlight=highlight, 1114 end_context=end_context, 1115 ) 1116 1117 if self.error_level == ErrorLevel.IMMEDIATE: 1118 raise error 1119 1120 self.errors.append(error) 1121 1122 def expression( 1123 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1124 ) -> E: 1125 """ 1126 Creates a new, validated Expression. 1127 1128 Args: 1129 exp_class: The expression class to instantiate. 1130 comments: An optional list of comments to attach to the expression. 1131 kwargs: The arguments to set for the expression along with their respective values. 1132 1133 Returns: 1134 The target expression. 1135 """ 1136 instance = exp_class(**kwargs) 1137 instance.add_comments(comments) if comments else self._add_comments(instance) 1138 return self.validate_expression(instance) 1139 1140 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1141 if expression and self._prev_comments: 1142 expression.add_comments(self._prev_comments) 1143 self._prev_comments = None 1144 1145 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1146 """ 1147 Validates an Expression, making sure that all its mandatory arguments are set. 1148 1149 Args: 1150 expression: The expression to validate. 1151 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1152 1153 Returns: 1154 The validated expression. 1155 """ 1156 if self.error_level != ErrorLevel.IGNORE: 1157 for error_message in expression.error_messages(args): 1158 self.raise_error(error_message) 1159 1160 return expression 1161 1162 def _find_sql(self, start: Token, end: Token) -> str: 1163 return self.sql[start.start : end.end + 1] 1164 1165 def _advance(self, times: int = 1) -> None: 1166 self._index += times 1167 self._curr = seq_get(self._tokens, self._index) 1168 self._next = seq_get(self._tokens, self._index + 1) 1169 1170 if self._index > 0: 1171 self._prev = self._tokens[self._index - 1] 1172 self._prev_comments = self._prev.comments 1173 else: 1174 self._prev = None 1175 self._prev_comments = None 1176 1177 def _retreat(self, index: int) -> None: 1178 if index != self._index: 1179 self._advance(index - self._index) 1180 1181 def _parse_command(self) -> exp.Command: 1182 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1183 1184 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1185 start = self._prev 1186 exists = self._parse_exists() if allow_exists else None 1187 1188 self._match(TokenType.ON) 1189 1190 kind = self._match_set(self.CREATABLES) and self._prev 1191 if not kind: 1192 return self._parse_as_command(start) 1193 1194 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1195 this = self._parse_user_defined_function(kind=kind.token_type) 1196 elif kind.token_type == TokenType.TABLE: 1197 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1198 elif kind.token_type == TokenType.COLUMN: 1199 this = self._parse_column() 1200 else: 1201 this = self._parse_id_var() 1202 1203 self._match(TokenType.IS) 1204 1205 return self.expression( 1206 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1207 ) 1208 1209 def _parse_to_table( 1210 self, 1211 ) -> exp.ToTableProperty: 1212 table = self._parse_table_parts(schema=True) 1213 return self.expression(exp.ToTableProperty, this=table) 1214 1215 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1216 def _parse_ttl(self) -> exp.Expression: 1217 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1218 this = self._parse_bitwise() 1219 1220 if self._match_text_seq("DELETE"): 1221 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1222 if self._match_text_seq("RECOMPRESS"): 1223 return self.expression( 1224 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1225 ) 1226 if self._match_text_seq("TO", "DISK"): 1227 return self.expression( 1228 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1229 ) 1230 if self._match_text_seq("TO", "VOLUME"): 1231 return self.expression( 1232 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1233 ) 1234 1235 return this 1236 1237 expressions = self._parse_csv(_parse_ttl_action) 1238 where = self._parse_where() 1239 group = self._parse_group() 1240 1241 aggregates = None 1242 if group and self._match(TokenType.SET): 1243 aggregates = self._parse_csv(self._parse_set_item) 1244 1245 return self.expression( 1246 exp.MergeTreeTTL, 1247 expressions=expressions, 1248 where=where, 1249 group=group, 1250 aggregates=aggregates, 1251 ) 1252 1253 def _parse_statement(self) -> t.Optional[exp.Expression]: 1254 if self._curr is None: 1255 return None 1256 1257 if self._match_set(self.STATEMENT_PARSERS): 1258 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1259 1260 if self._match_set(Tokenizer.COMMANDS): 1261 return self._parse_command() 1262 1263 expression = self._parse_expression() 1264 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1265 return self._parse_query_modifiers(expression) 1266 1267 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1268 start = self._prev 1269 temporary = self._match(TokenType.TEMPORARY) 1270 materialized = self._match_text_seq("MATERIALIZED") 1271 1272 kind = self._match_set(self.CREATABLES) and self._prev.text 1273 if not kind: 1274 return self._parse_as_command(start) 1275 1276 return self.expression( 1277 exp.Drop, 1278 comments=start.comments, 1279 exists=exists or self._parse_exists(), 1280 this=self._parse_table(schema=True), 1281 kind=kind, 1282 temporary=temporary, 1283 materialized=materialized, 1284 cascade=self._match_text_seq("CASCADE"), 1285 constraints=self._match_text_seq("CONSTRAINTS"), 1286 purge=self._match_text_seq("PURGE"), 1287 ) 1288 1289 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1290 return ( 1291 self._match_text_seq("IF") 1292 and (not not_ or self._match(TokenType.NOT)) 1293 and self._match(TokenType.EXISTS) 1294 ) 1295 1296 def _parse_create(self) -> exp.Create | exp.Command: 1297 # Note: this can't be None because we've matched a statement parser 1298 start = self._prev 1299 comments = self._prev_comments 1300 1301 replace = start.text.upper() == "REPLACE" or self._match_pair( 1302 TokenType.OR, TokenType.REPLACE 1303 ) 1304 unique = self._match(TokenType.UNIQUE) 1305 1306 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1307 self._advance() 1308 1309 properties = None 1310 create_token = self._match_set(self.CREATABLES) and self._prev 1311 1312 if not create_token: 1313 # exp.Properties.Location.POST_CREATE 1314 properties = self._parse_properties() 1315 create_token = self._match_set(self.CREATABLES) and self._prev 1316 1317 if not properties or not create_token: 1318 return self._parse_as_command(start) 1319 1320 exists = self._parse_exists(not_=True) 1321 this = None 1322 expression: t.Optional[exp.Expression] = None 1323 indexes = None 1324 no_schema_binding = None 1325 begin = None 1326 end = None 1327 clone = None 1328 1329 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1330 nonlocal properties 1331 if properties and temp_props: 1332 properties.expressions.extend(temp_props.expressions) 1333 elif temp_props: 1334 properties = temp_props 1335 1336 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1337 this = self._parse_user_defined_function(kind=create_token.token_type) 1338 1339 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1340 extend_props(self._parse_properties()) 1341 1342 self._match(TokenType.ALIAS) 1343 1344 if self._match(TokenType.COMMAND): 1345 expression = self._parse_as_command(self._prev) 1346 else: 1347 begin = self._match(TokenType.BEGIN) 1348 return_ = self._match_text_seq("RETURN") 1349 1350 if self._match(TokenType.STRING, advance=False): 1351 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1352 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1353 expression = self._parse_string() 1354 extend_props(self._parse_properties()) 1355 else: 1356 expression = self._parse_statement() 1357 1358 end = self._match_text_seq("END") 1359 1360 if return_: 1361 expression = self.expression(exp.Return, this=expression) 1362 elif create_token.token_type == TokenType.INDEX: 1363 this = self._parse_index(index=self._parse_id_var()) 1364 elif create_token.token_type in self.DB_CREATABLES: 1365 table_parts = self._parse_table_parts(schema=True) 1366 1367 # exp.Properties.Location.POST_NAME 1368 self._match(TokenType.COMMA) 1369 extend_props(self._parse_properties(before=True)) 1370 1371 this = self._parse_schema(this=table_parts) 1372 1373 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1374 extend_props(self._parse_properties()) 1375 1376 self._match(TokenType.ALIAS) 1377 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1378 # exp.Properties.Location.POST_ALIAS 1379 extend_props(self._parse_properties()) 1380 1381 expression = self._parse_ddl_select() 1382 1383 if create_token.token_type == TokenType.TABLE: 1384 # exp.Properties.Location.POST_EXPRESSION 1385 extend_props(self._parse_properties()) 1386 1387 indexes = [] 1388 while True: 1389 index = self._parse_index() 1390 1391 # exp.Properties.Location.POST_INDEX 1392 extend_props(self._parse_properties()) 1393 1394 if not index: 1395 break 1396 else: 1397 self._match(TokenType.COMMA) 1398 indexes.append(index) 1399 elif create_token.token_type == TokenType.VIEW: 1400 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1401 no_schema_binding = True 1402 1403 shallow = self._match_text_seq("SHALLOW") 1404 1405 if self._match_texts(self.CLONE_KEYWORDS): 1406 copy = self._prev.text.lower() == "copy" 1407 clone = self._parse_table(schema=True) 1408 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1409 clone_kind = ( 1410 self._match(TokenType.L_PAREN) 1411 and self._match_texts(self.CLONE_KINDS) 1412 and self._prev.text.upper() 1413 ) 1414 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1415 self._match(TokenType.R_PAREN) 1416 clone = self.expression( 1417 exp.Clone, 1418 this=clone, 1419 when=when, 1420 kind=clone_kind, 1421 shallow=shallow, 1422 expression=clone_expression, 1423 copy=copy, 1424 ) 1425 1426 return self.expression( 1427 exp.Create, 1428 comments=comments, 1429 this=this, 1430 kind=create_token.text, 1431 replace=replace, 1432 unique=unique, 1433 expression=expression, 1434 exists=exists, 1435 properties=properties, 1436 indexes=indexes, 1437 no_schema_binding=no_schema_binding, 1438 begin=begin, 1439 end=end, 1440 clone=clone, 1441 ) 1442 1443 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1444 # only used for teradata currently 1445 self._match(TokenType.COMMA) 1446 1447 kwargs = { 1448 "no": self._match_text_seq("NO"), 1449 "dual": self._match_text_seq("DUAL"), 1450 "before": self._match_text_seq("BEFORE"), 1451 "default": self._match_text_seq("DEFAULT"), 1452 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1453 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1454 "after": self._match_text_seq("AFTER"), 1455 "minimum": self._match_texts(("MIN", "MINIMUM")), 1456 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1457 } 1458 1459 if self._match_texts(self.PROPERTY_PARSERS): 1460 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1461 try: 1462 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1463 except TypeError: 1464 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1465 1466 return None 1467 1468 def _parse_property(self) -> t.Optional[exp.Expression]: 1469 if self._match_texts(self.PROPERTY_PARSERS): 1470 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1471 1472 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1473 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1474 1475 if self._match_text_seq("COMPOUND", "SORTKEY"): 1476 return self._parse_sortkey(compound=True) 1477 1478 if self._match_text_seq("SQL", "SECURITY"): 1479 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1480 1481 index = self._index 1482 key = self._parse_column() 1483 1484 if not self._match(TokenType.EQ): 1485 self._retreat(index) 1486 return None 1487 1488 return self.expression( 1489 exp.Property, 1490 this=key.to_dot() if isinstance(key, exp.Column) else key, 1491 value=self._parse_column() or self._parse_var(any_token=True), 1492 ) 1493 1494 def _parse_stored(self) -> exp.FileFormatProperty: 1495 self._match(TokenType.ALIAS) 1496 1497 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1498 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1499 1500 return self.expression( 1501 exp.FileFormatProperty, 1502 this=self.expression( 1503 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1504 ) 1505 if input_format or output_format 1506 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1507 ) 1508 1509 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1510 self._match(TokenType.EQ) 1511 self._match(TokenType.ALIAS) 1512 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1513 1514 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1515 properties = [] 1516 while True: 1517 if before: 1518 prop = self._parse_property_before() 1519 else: 1520 prop = self._parse_property() 1521 1522 if not prop: 1523 break 1524 for p in ensure_list(prop): 1525 properties.append(p) 1526 1527 if properties: 1528 return self.expression(exp.Properties, expressions=properties) 1529 1530 return None 1531 1532 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1533 return self.expression( 1534 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1535 ) 1536 1537 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1538 if self._index >= 2: 1539 pre_volatile_token = self._tokens[self._index - 2] 1540 else: 1541 pre_volatile_token = None 1542 1543 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1544 return exp.VolatileProperty() 1545 1546 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1547 1548 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1549 self._match_pair(TokenType.EQ, TokenType.ON) 1550 1551 prop = self.expression(exp.WithSystemVersioningProperty) 1552 if self._match(TokenType.L_PAREN): 1553 self._match_text_seq("HISTORY_TABLE", "=") 1554 prop.set("this", self._parse_table_parts()) 1555 1556 if self._match(TokenType.COMMA): 1557 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1558 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1559 1560 self._match_r_paren() 1561 1562 return prop 1563 1564 def _parse_with_property( 1565 self, 1566 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1567 if self._match(TokenType.L_PAREN, advance=False): 1568 return self._parse_wrapped_csv(self._parse_property) 1569 1570 if self._match_text_seq("JOURNAL"): 1571 return self._parse_withjournaltable() 1572 1573 if self._match_text_seq("DATA"): 1574 return self._parse_withdata(no=False) 1575 elif self._match_text_seq("NO", "DATA"): 1576 return self._parse_withdata(no=True) 1577 1578 if not self._next: 1579 return None 1580 1581 return self._parse_withisolatedloading() 1582 1583 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1584 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1585 self._match(TokenType.EQ) 1586 1587 user = self._parse_id_var() 1588 self._match(TokenType.PARAMETER) 1589 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1590 1591 if not user or not host: 1592 return None 1593 1594 return exp.DefinerProperty(this=f"{user}@{host}") 1595 1596 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1597 self._match(TokenType.TABLE) 1598 self._match(TokenType.EQ) 1599 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1600 1601 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1602 return self.expression(exp.LogProperty, no=no) 1603 1604 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1605 return self.expression(exp.JournalProperty, **kwargs) 1606 1607 def _parse_checksum(self) -> exp.ChecksumProperty: 1608 self._match(TokenType.EQ) 1609 1610 on = None 1611 if self._match(TokenType.ON): 1612 on = True 1613 elif self._match_text_seq("OFF"): 1614 on = False 1615 1616 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1617 1618 def _parse_cluster(self) -> exp.Cluster: 1619 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1620 1621 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1622 self._match_text_seq("BY") 1623 1624 self._match_l_paren() 1625 expressions = self._parse_csv(self._parse_column) 1626 self._match_r_paren() 1627 1628 if self._match_text_seq("SORTED", "BY"): 1629 self._match_l_paren() 1630 sorted_by = self._parse_csv(self._parse_ordered) 1631 self._match_r_paren() 1632 else: 1633 sorted_by = None 1634 1635 self._match(TokenType.INTO) 1636 buckets = self._parse_number() 1637 self._match_text_seq("BUCKETS") 1638 1639 return self.expression( 1640 exp.ClusteredByProperty, 1641 expressions=expressions, 1642 sorted_by=sorted_by, 1643 buckets=buckets, 1644 ) 1645 1646 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1647 if not self._match_text_seq("GRANTS"): 1648 self._retreat(self._index - 1) 1649 return None 1650 1651 return self.expression(exp.CopyGrantsProperty) 1652 1653 def _parse_freespace(self) -> exp.FreespaceProperty: 1654 self._match(TokenType.EQ) 1655 return self.expression( 1656 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1657 ) 1658 1659 def _parse_mergeblockratio( 1660 self, no: bool = False, default: bool = False 1661 ) -> exp.MergeBlockRatioProperty: 1662 if self._match(TokenType.EQ): 1663 return self.expression( 1664 exp.MergeBlockRatioProperty, 1665 this=self._parse_number(), 1666 percent=self._match(TokenType.PERCENT), 1667 ) 1668 1669 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1670 1671 def _parse_datablocksize( 1672 self, 1673 default: t.Optional[bool] = None, 1674 minimum: t.Optional[bool] = None, 1675 maximum: t.Optional[bool] = None, 1676 ) -> exp.DataBlocksizeProperty: 1677 self._match(TokenType.EQ) 1678 size = self._parse_number() 1679 1680 units = None 1681 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1682 units = self._prev.text 1683 1684 return self.expression( 1685 exp.DataBlocksizeProperty, 1686 size=size, 1687 units=units, 1688 default=default, 1689 minimum=minimum, 1690 maximum=maximum, 1691 ) 1692 1693 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1694 self._match(TokenType.EQ) 1695 always = self._match_text_seq("ALWAYS") 1696 manual = self._match_text_seq("MANUAL") 1697 never = self._match_text_seq("NEVER") 1698 default = self._match_text_seq("DEFAULT") 1699 1700 autotemp = None 1701 if self._match_text_seq("AUTOTEMP"): 1702 autotemp = self._parse_schema() 1703 1704 return self.expression( 1705 exp.BlockCompressionProperty, 1706 always=always, 1707 manual=manual, 1708 never=never, 1709 default=default, 1710 autotemp=autotemp, 1711 ) 1712 1713 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1714 no = self._match_text_seq("NO") 1715 concurrent = self._match_text_seq("CONCURRENT") 1716 self._match_text_seq("ISOLATED", "LOADING") 1717 for_all = self._match_text_seq("FOR", "ALL") 1718 for_insert = self._match_text_seq("FOR", "INSERT") 1719 for_none = self._match_text_seq("FOR", "NONE") 1720 return self.expression( 1721 exp.IsolatedLoadingProperty, 1722 no=no, 1723 concurrent=concurrent, 1724 for_all=for_all, 1725 for_insert=for_insert, 1726 for_none=for_none, 1727 ) 1728 1729 def _parse_locking(self) -> exp.LockingProperty: 1730 if self._match(TokenType.TABLE): 1731 kind = "TABLE" 1732 elif self._match(TokenType.VIEW): 1733 kind = "VIEW" 1734 elif self._match(TokenType.ROW): 1735 kind = "ROW" 1736 elif self._match_text_seq("DATABASE"): 1737 kind = "DATABASE" 1738 else: 1739 kind = None 1740 1741 if kind in ("DATABASE", "TABLE", "VIEW"): 1742 this = self._parse_table_parts() 1743 else: 1744 this = None 1745 1746 if self._match(TokenType.FOR): 1747 for_or_in = "FOR" 1748 elif self._match(TokenType.IN): 1749 for_or_in = "IN" 1750 else: 1751 for_or_in = None 1752 1753 if self._match_text_seq("ACCESS"): 1754 lock_type = "ACCESS" 1755 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1756 lock_type = "EXCLUSIVE" 1757 elif self._match_text_seq("SHARE"): 1758 lock_type = "SHARE" 1759 elif self._match_text_seq("READ"): 1760 lock_type = "READ" 1761 elif self._match_text_seq("WRITE"): 1762 lock_type = "WRITE" 1763 elif self._match_text_seq("CHECKSUM"): 1764 lock_type = "CHECKSUM" 1765 else: 1766 lock_type = None 1767 1768 override = self._match_text_seq("OVERRIDE") 1769 1770 return self.expression( 1771 exp.LockingProperty, 1772 this=this, 1773 kind=kind, 1774 for_or_in=for_or_in, 1775 lock_type=lock_type, 1776 override=override, 1777 ) 1778 1779 def _parse_partition_by(self) -> t.List[exp.Expression]: 1780 if self._match(TokenType.PARTITION_BY): 1781 return self._parse_csv(self._parse_conjunction) 1782 return [] 1783 1784 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1785 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1786 if self._match_text_seq("MINVALUE"): 1787 return exp.var("MINVALUE") 1788 if self._match_text_seq("MAXVALUE"): 1789 return exp.var("MAXVALUE") 1790 return self._parse_bitwise() 1791 1792 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1793 expression = None 1794 from_expressions = None 1795 to_expressions = None 1796 1797 if self._match(TokenType.IN): 1798 this = self._parse_wrapped_csv(self._parse_bitwise) 1799 elif self._match(TokenType.FROM): 1800 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1801 self._match_text_seq("TO") 1802 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1803 elif self._match_text_seq("WITH", "(", "MODULUS"): 1804 this = self._parse_number() 1805 self._match_text_seq(",", "REMAINDER") 1806 expression = self._parse_number() 1807 self._match_r_paren() 1808 else: 1809 self.raise_error("Failed to parse partition bound spec.") 1810 1811 return self.expression( 1812 exp.PartitionBoundSpec, 1813 this=this, 1814 expression=expression, 1815 from_expressions=from_expressions, 1816 to_expressions=to_expressions, 1817 ) 1818 1819 # https://www.postgresql.org/docs/current/sql-createtable.html 1820 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1821 if not self._match_text_seq("OF"): 1822 self._retreat(self._index - 1) 1823 return None 1824 1825 this = self._parse_table(schema=True) 1826 1827 if self._match(TokenType.DEFAULT): 1828 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1829 elif self._match_text_seq("FOR", "VALUES"): 1830 expression = self._parse_partition_bound_spec() 1831 else: 1832 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1833 1834 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1835 1836 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1837 self._match(TokenType.EQ) 1838 return self.expression( 1839 exp.PartitionedByProperty, 1840 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1841 ) 1842 1843 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1844 if self._match_text_seq("AND", "STATISTICS"): 1845 statistics = True 1846 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1847 statistics = False 1848 else: 1849 statistics = None 1850 1851 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1852 1853 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1854 if self._match_text_seq("PRIMARY", "INDEX"): 1855 return exp.NoPrimaryIndexProperty() 1856 return None 1857 1858 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1859 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1860 return exp.OnCommitProperty() 1861 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1862 return exp.OnCommitProperty(delete=True) 1863 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1864 1865 def _parse_distkey(self) -> exp.DistKeyProperty: 1866 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1867 1868 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1869 table = self._parse_table(schema=True) 1870 1871 options = [] 1872 while self._match_texts(("INCLUDING", "EXCLUDING")): 1873 this = self._prev.text.upper() 1874 1875 id_var = self._parse_id_var() 1876 if not id_var: 1877 return None 1878 1879 options.append( 1880 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1881 ) 1882 1883 return self.expression(exp.LikeProperty, this=table, expressions=options) 1884 1885 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1886 return self.expression( 1887 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1888 ) 1889 1890 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1891 self._match(TokenType.EQ) 1892 return self.expression( 1893 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1894 ) 1895 1896 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1897 self._match_text_seq("WITH", "CONNECTION") 1898 return self.expression( 1899 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1900 ) 1901 1902 def _parse_returns(self) -> exp.ReturnsProperty: 1903 value: t.Optional[exp.Expression] 1904 is_table = self._match(TokenType.TABLE) 1905 1906 if is_table: 1907 if self._match(TokenType.LT): 1908 value = self.expression( 1909 exp.Schema, 1910 this="TABLE", 1911 expressions=self._parse_csv(self._parse_struct_types), 1912 ) 1913 if not self._match(TokenType.GT): 1914 self.raise_error("Expecting >") 1915 else: 1916 value = self._parse_schema(exp.var("TABLE")) 1917 else: 1918 value = self._parse_types() 1919 1920 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1921 1922 def _parse_describe(self) -> exp.Describe: 1923 kind = self._match_set(self.CREATABLES) and self._prev.text 1924 this = self._parse_table(schema=True) 1925 properties = self._parse_properties() 1926 expressions = properties.expressions if properties else None 1927 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1928 1929 def _parse_insert(self) -> exp.Insert: 1930 comments = ensure_list(self._prev_comments) 1931 overwrite = self._match(TokenType.OVERWRITE) 1932 ignore = self._match(TokenType.IGNORE) 1933 local = self._match_text_seq("LOCAL") 1934 alternative = None 1935 1936 if self._match_text_seq("DIRECTORY"): 1937 this: t.Optional[exp.Expression] = self.expression( 1938 exp.Directory, 1939 this=self._parse_var_or_string(), 1940 local=local, 1941 row_format=self._parse_row_format(match_row=True), 1942 ) 1943 else: 1944 if self._match(TokenType.OR): 1945 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1946 1947 self._match(TokenType.INTO) 1948 comments += ensure_list(self._prev_comments) 1949 self._match(TokenType.TABLE) 1950 this = self._parse_table(schema=True) 1951 1952 returning = self._parse_returning() 1953 1954 return self.expression( 1955 exp.Insert, 1956 comments=comments, 1957 this=this, 1958 by_name=self._match_text_seq("BY", "NAME"), 1959 exists=self._parse_exists(), 1960 partition=self._parse_partition(), 1961 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1962 and self._parse_conjunction(), 1963 expression=self._parse_ddl_select(), 1964 conflict=self._parse_on_conflict(), 1965 returning=returning or self._parse_returning(), 1966 overwrite=overwrite, 1967 alternative=alternative, 1968 ignore=ignore, 1969 ) 1970 1971 def _parse_kill(self) -> exp.Kill: 1972 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1973 1974 return self.expression( 1975 exp.Kill, 1976 this=self._parse_primary(), 1977 kind=kind, 1978 ) 1979 1980 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1981 conflict = self._match_text_seq("ON", "CONFLICT") 1982 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1983 1984 if not conflict and not duplicate: 1985 return None 1986 1987 nothing = None 1988 expressions = None 1989 key = None 1990 constraint = None 1991 1992 if conflict: 1993 if self._match_text_seq("ON", "CONSTRAINT"): 1994 constraint = self._parse_id_var() 1995 else: 1996 key = self._parse_csv(self._parse_value) 1997 1998 self._match_text_seq("DO") 1999 if self._match_text_seq("NOTHING"): 2000 nothing = True 2001 else: 2002 self._match(TokenType.UPDATE) 2003 self._match(TokenType.SET) 2004 expressions = self._parse_csv(self._parse_equality) 2005 2006 return self.expression( 2007 exp.OnConflict, 2008 duplicate=duplicate, 2009 expressions=expressions, 2010 nothing=nothing, 2011 key=key, 2012 constraint=constraint, 2013 ) 2014 2015 def _parse_returning(self) -> t.Optional[exp.Returning]: 2016 if not self._match(TokenType.RETURNING): 2017 return None 2018 return self.expression( 2019 exp.Returning, 2020 expressions=self._parse_csv(self._parse_expression), 2021 into=self._match(TokenType.INTO) and self._parse_table_part(), 2022 ) 2023 2024 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2025 if not self._match(TokenType.FORMAT): 2026 return None 2027 return self._parse_row_format() 2028 2029 def _parse_row_format( 2030 self, match_row: bool = False 2031 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2032 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2033 return None 2034 2035 if self._match_text_seq("SERDE"): 2036 this = self._parse_string() 2037 2038 serde_properties = None 2039 if self._match(TokenType.SERDE_PROPERTIES): 2040 serde_properties = self.expression( 2041 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2042 ) 2043 2044 return self.expression( 2045 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2046 ) 2047 2048 self._match_text_seq("DELIMITED") 2049 2050 kwargs = {} 2051 2052 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2053 kwargs["fields"] = self._parse_string() 2054 if self._match_text_seq("ESCAPED", "BY"): 2055 kwargs["escaped"] = self._parse_string() 2056 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2057 kwargs["collection_items"] = self._parse_string() 2058 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2059 kwargs["map_keys"] = self._parse_string() 2060 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2061 kwargs["lines"] = self._parse_string() 2062 if self._match_text_seq("NULL", "DEFINED", "AS"): 2063 kwargs["null"] = self._parse_string() 2064 2065 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2066 2067 def _parse_load(self) -> exp.LoadData | exp.Command: 2068 if self._match_text_seq("DATA"): 2069 local = self._match_text_seq("LOCAL") 2070 self._match_text_seq("INPATH") 2071 inpath = self._parse_string() 2072 overwrite = self._match(TokenType.OVERWRITE) 2073 self._match_pair(TokenType.INTO, TokenType.TABLE) 2074 2075 return self.expression( 2076 exp.LoadData, 2077 this=self._parse_table(schema=True), 2078 local=local, 2079 overwrite=overwrite, 2080 inpath=inpath, 2081 partition=self._parse_partition(), 2082 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2083 serde=self._match_text_seq("SERDE") and self._parse_string(), 2084 ) 2085 return self._parse_as_command(self._prev) 2086 2087 def _parse_delete(self) -> exp.Delete: 2088 # This handles MySQL's "Multiple-Table Syntax" 2089 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2090 tables = None 2091 comments = self._prev_comments 2092 if not self._match(TokenType.FROM, advance=False): 2093 tables = self._parse_csv(self._parse_table) or None 2094 2095 returning = self._parse_returning() 2096 2097 return self.expression( 2098 exp.Delete, 2099 comments=comments, 2100 tables=tables, 2101 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2102 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2103 where=self._parse_where(), 2104 returning=returning or self._parse_returning(), 2105 limit=self._parse_limit(), 2106 ) 2107 2108 def _parse_update(self) -> exp.Update: 2109 comments = self._prev_comments 2110 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2111 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2112 returning = self._parse_returning() 2113 return self.expression( 2114 exp.Update, 2115 comments=comments, 2116 **{ # type: ignore 2117 "this": this, 2118 "expressions": expressions, 2119 "from": self._parse_from(joins=True), 2120 "where": self._parse_where(), 2121 "returning": returning or self._parse_returning(), 2122 "order": self._parse_order(), 2123 "limit": self._parse_limit(), 2124 }, 2125 ) 2126 2127 def _parse_uncache(self) -> exp.Uncache: 2128 if not self._match(TokenType.TABLE): 2129 self.raise_error("Expecting TABLE after UNCACHE") 2130 2131 return self.expression( 2132 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2133 ) 2134 2135 def _parse_cache(self) -> exp.Cache: 2136 lazy = self._match_text_seq("LAZY") 2137 self._match(TokenType.TABLE) 2138 table = self._parse_table(schema=True) 2139 2140 options = [] 2141 if self._match_text_seq("OPTIONS"): 2142 self._match_l_paren() 2143 k = self._parse_string() 2144 self._match(TokenType.EQ) 2145 v = self._parse_string() 2146 options = [k, v] 2147 self._match_r_paren() 2148 2149 self._match(TokenType.ALIAS) 2150 return self.expression( 2151 exp.Cache, 2152 this=table, 2153 lazy=lazy, 2154 options=options, 2155 expression=self._parse_select(nested=True), 2156 ) 2157 2158 def _parse_partition(self) -> t.Optional[exp.Partition]: 2159 if not self._match(TokenType.PARTITION): 2160 return None 2161 2162 return self.expression( 2163 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2164 ) 2165 2166 def _parse_value(self) -> exp.Tuple: 2167 if self._match(TokenType.L_PAREN): 2168 expressions = self._parse_csv(self._parse_conjunction) 2169 self._match_r_paren() 2170 return self.expression(exp.Tuple, expressions=expressions) 2171 2172 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2173 # https://prestodb.io/docs/current/sql/values.html 2174 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2175 2176 def _parse_projections(self) -> t.List[exp.Expression]: 2177 return self._parse_expressions() 2178 2179 def _parse_select( 2180 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2181 ) -> t.Optional[exp.Expression]: 2182 cte = self._parse_with() 2183 2184 if cte: 2185 this = self._parse_statement() 2186 2187 if not this: 2188 self.raise_error("Failed to parse any statement following CTE") 2189 return cte 2190 2191 if "with" in this.arg_types: 2192 this.set("with", cte) 2193 else: 2194 self.raise_error(f"{this.key} does not support CTE") 2195 this = cte 2196 2197 return this 2198 2199 # duckdb supports leading with FROM x 2200 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2201 2202 if self._match(TokenType.SELECT): 2203 comments = self._prev_comments 2204 2205 hint = self._parse_hint() 2206 all_ = self._match(TokenType.ALL) 2207 distinct = self._match_set(self.DISTINCT_TOKENS) 2208 2209 kind = ( 2210 self._match(TokenType.ALIAS) 2211 and self._match_texts(("STRUCT", "VALUE")) 2212 and self._prev.text 2213 ) 2214 2215 if distinct: 2216 distinct = self.expression( 2217 exp.Distinct, 2218 on=self._parse_value() if self._match(TokenType.ON) else None, 2219 ) 2220 2221 if all_ and distinct: 2222 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2223 2224 limit = self._parse_limit(top=True) 2225 projections = self._parse_projections() 2226 2227 this = self.expression( 2228 exp.Select, 2229 kind=kind, 2230 hint=hint, 2231 distinct=distinct, 2232 expressions=projections, 2233 limit=limit, 2234 ) 2235 this.comments = comments 2236 2237 into = self._parse_into() 2238 if into: 2239 this.set("into", into) 2240 2241 if not from_: 2242 from_ = self._parse_from() 2243 2244 if from_: 2245 this.set("from", from_) 2246 2247 this = self._parse_query_modifiers(this) 2248 elif (table or nested) and self._match(TokenType.L_PAREN): 2249 if self._match(TokenType.PIVOT): 2250 this = self._parse_simplified_pivot() 2251 elif self._match(TokenType.FROM): 2252 this = exp.select("*").from_( 2253 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2254 ) 2255 else: 2256 this = self._parse_table() if table else self._parse_select(nested=True) 2257 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2258 2259 self._match_r_paren() 2260 2261 # We return early here so that the UNION isn't attached to the subquery by the 2262 # following call to _parse_set_operations, but instead becomes the parent node 2263 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2264 elif self._match(TokenType.VALUES): 2265 this = self.expression( 2266 exp.Values, 2267 expressions=self._parse_csv(self._parse_value), 2268 alias=self._parse_table_alias(), 2269 ) 2270 elif from_: 2271 this = exp.select("*").from_(from_.this, copy=False) 2272 else: 2273 this = None 2274 2275 return self._parse_set_operations(this) 2276 2277 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2278 if not skip_with_token and not self._match(TokenType.WITH): 2279 return None 2280 2281 comments = self._prev_comments 2282 recursive = self._match(TokenType.RECURSIVE) 2283 2284 expressions = [] 2285 while True: 2286 expressions.append(self._parse_cte()) 2287 2288 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2289 break 2290 else: 2291 self._match(TokenType.WITH) 2292 2293 return self.expression( 2294 exp.With, comments=comments, expressions=expressions, recursive=recursive 2295 ) 2296 2297 def _parse_cte(self) -> exp.CTE: 2298 alias = self._parse_table_alias() 2299 if not alias or not alias.this: 2300 self.raise_error("Expected CTE to have alias") 2301 2302 self._match(TokenType.ALIAS) 2303 return self.expression( 2304 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2305 ) 2306 2307 def _parse_table_alias( 2308 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2309 ) -> t.Optional[exp.TableAlias]: 2310 any_token = self._match(TokenType.ALIAS) 2311 alias = ( 2312 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2313 or self._parse_string_as_identifier() 2314 ) 2315 2316 index = self._index 2317 if self._match(TokenType.L_PAREN): 2318 columns = self._parse_csv(self._parse_function_parameter) 2319 self._match_r_paren() if columns else self._retreat(index) 2320 else: 2321 columns = None 2322 2323 if not alias and not columns: 2324 return None 2325 2326 return self.expression(exp.TableAlias, this=alias, columns=columns) 2327 2328 def _parse_subquery( 2329 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2330 ) -> t.Optional[exp.Subquery]: 2331 if not this: 2332 return None 2333 2334 return self.expression( 2335 exp.Subquery, 2336 this=this, 2337 pivots=self._parse_pivots(), 2338 alias=self._parse_table_alias() if parse_alias else None, 2339 ) 2340 2341 def _parse_query_modifiers( 2342 self, this: t.Optional[exp.Expression] 2343 ) -> t.Optional[exp.Expression]: 2344 if isinstance(this, self.MODIFIABLES): 2345 for join in iter(self._parse_join, None): 2346 this.append("joins", join) 2347 for lateral in iter(self._parse_lateral, None): 2348 this.append("laterals", lateral) 2349 2350 while True: 2351 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2352 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2353 key, expression = parser(self) 2354 2355 if expression: 2356 this.set(key, expression) 2357 if key == "limit": 2358 offset = expression.args.pop("offset", None) 2359 if offset: 2360 this.set("offset", exp.Offset(expression=offset)) 2361 continue 2362 break 2363 return this 2364 2365 def _parse_hint(self) -> t.Optional[exp.Hint]: 2366 if self._match(TokenType.HINT): 2367 hints = [] 2368 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2369 hints.extend(hint) 2370 2371 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2372 self.raise_error("Expected */ after HINT") 2373 2374 return self.expression(exp.Hint, expressions=hints) 2375 2376 return None 2377 2378 def _parse_into(self) -> t.Optional[exp.Into]: 2379 if not self._match(TokenType.INTO): 2380 return None 2381 2382 temp = self._match(TokenType.TEMPORARY) 2383 unlogged = self._match_text_seq("UNLOGGED") 2384 self._match(TokenType.TABLE) 2385 2386 return self.expression( 2387 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2388 ) 2389 2390 def _parse_from( 2391 self, joins: bool = False, skip_from_token: bool = False 2392 ) -> t.Optional[exp.From]: 2393 if not skip_from_token and not self._match(TokenType.FROM): 2394 return None 2395 2396 return self.expression( 2397 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2398 ) 2399 2400 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2401 if not self._match(TokenType.MATCH_RECOGNIZE): 2402 return None 2403 2404 self._match_l_paren() 2405 2406 partition = self._parse_partition_by() 2407 order = self._parse_order() 2408 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2409 2410 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2411 rows = exp.var("ONE ROW PER MATCH") 2412 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2413 text = "ALL ROWS PER MATCH" 2414 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2415 text += f" SHOW EMPTY MATCHES" 2416 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2417 text += f" OMIT EMPTY MATCHES" 2418 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2419 text += f" WITH UNMATCHED ROWS" 2420 rows = exp.var(text) 2421 else: 2422 rows = None 2423 2424 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2425 text = "AFTER MATCH SKIP" 2426 if self._match_text_seq("PAST", "LAST", "ROW"): 2427 text += f" PAST LAST ROW" 2428 elif self._match_text_seq("TO", "NEXT", "ROW"): 2429 text += f" TO NEXT ROW" 2430 elif self._match_text_seq("TO", "FIRST"): 2431 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2432 elif self._match_text_seq("TO", "LAST"): 2433 text += f" TO LAST {self._advance_any().text}" # type: ignore 2434 after = exp.var(text) 2435 else: 2436 after = None 2437 2438 if self._match_text_seq("PATTERN"): 2439 self._match_l_paren() 2440 2441 if not self._curr: 2442 self.raise_error("Expecting )", self._curr) 2443 2444 paren = 1 2445 start = self._curr 2446 2447 while self._curr and paren > 0: 2448 if self._curr.token_type == TokenType.L_PAREN: 2449 paren += 1 2450 if self._curr.token_type == TokenType.R_PAREN: 2451 paren -= 1 2452 2453 end = self._prev 2454 self._advance() 2455 2456 if paren > 0: 2457 self.raise_error("Expecting )", self._curr) 2458 2459 pattern = exp.var(self._find_sql(start, end)) 2460 else: 2461 pattern = None 2462 2463 define = ( 2464 self._parse_csv( 2465 lambda: self.expression( 2466 exp.Alias, 2467 alias=self._parse_id_var(any_token=True), 2468 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2469 ) 2470 ) 2471 if self._match_text_seq("DEFINE") 2472 else None 2473 ) 2474 2475 self._match_r_paren() 2476 2477 return self.expression( 2478 exp.MatchRecognize, 2479 partition_by=partition, 2480 order=order, 2481 measures=measures, 2482 rows=rows, 2483 after=after, 2484 pattern=pattern, 2485 define=define, 2486 alias=self._parse_table_alias(), 2487 ) 2488 2489 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2490 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2491 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2492 2493 if outer_apply or cross_apply: 2494 this = self._parse_select(table=True) 2495 view = None 2496 outer = not cross_apply 2497 elif self._match(TokenType.LATERAL): 2498 this = self._parse_select(table=True) 2499 view = self._match(TokenType.VIEW) 2500 outer = self._match(TokenType.OUTER) 2501 else: 2502 return None 2503 2504 if not this: 2505 this = ( 2506 self._parse_unnest() 2507 or self._parse_function() 2508 or self._parse_id_var(any_token=False) 2509 ) 2510 2511 while self._match(TokenType.DOT): 2512 this = exp.Dot( 2513 this=this, 2514 expression=self._parse_function() or self._parse_id_var(any_token=False), 2515 ) 2516 2517 if view: 2518 table = self._parse_id_var(any_token=False) 2519 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2520 table_alias: t.Optional[exp.TableAlias] = self.expression( 2521 exp.TableAlias, this=table, columns=columns 2522 ) 2523 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2524 # We move the alias from the lateral's child node to the lateral itself 2525 table_alias = this.args["alias"].pop() 2526 else: 2527 table_alias = self._parse_table_alias() 2528 2529 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2530 2531 def _parse_join_parts( 2532 self, 2533 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2534 return ( 2535 self._match_set(self.JOIN_METHODS) and self._prev, 2536 self._match_set(self.JOIN_SIDES) and self._prev, 2537 self._match_set(self.JOIN_KINDS) and self._prev, 2538 ) 2539 2540 def _parse_join( 2541 self, skip_join_token: bool = False, parse_bracket: bool = False 2542 ) -> t.Optional[exp.Join]: 2543 if self._match(TokenType.COMMA): 2544 return self.expression(exp.Join, this=self._parse_table()) 2545 2546 index = self._index 2547 method, side, kind = self._parse_join_parts() 2548 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2549 join = self._match(TokenType.JOIN) 2550 2551 if not skip_join_token and not join: 2552 self._retreat(index) 2553 kind = None 2554 method = None 2555 side = None 2556 2557 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2558 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2559 2560 if not skip_join_token and not join and not outer_apply and not cross_apply: 2561 return None 2562 2563 if outer_apply: 2564 side = Token(TokenType.LEFT, "LEFT") 2565 2566 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2567 2568 if method: 2569 kwargs["method"] = method.text 2570 if side: 2571 kwargs["side"] = side.text 2572 if kind: 2573 kwargs["kind"] = kind.text 2574 if hint: 2575 kwargs["hint"] = hint 2576 2577 if self._match(TokenType.ON): 2578 kwargs["on"] = self._parse_conjunction() 2579 elif self._match(TokenType.USING): 2580 kwargs["using"] = self._parse_wrapped_id_vars() 2581 elif not (kind and kind.token_type == TokenType.CROSS): 2582 index = self._index 2583 join = self._parse_join() 2584 2585 if join and self._match(TokenType.ON): 2586 kwargs["on"] = self._parse_conjunction() 2587 elif join and self._match(TokenType.USING): 2588 kwargs["using"] = self._parse_wrapped_id_vars() 2589 else: 2590 join = None 2591 self._retreat(index) 2592 2593 kwargs["this"].set("joins", [join] if join else None) 2594 2595 comments = [c for token in (method, side, kind) if token for c in token.comments] 2596 return self.expression(exp.Join, comments=comments, **kwargs) 2597 2598 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2599 this = self._parse_conjunction() 2600 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2601 return this 2602 2603 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2604 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2605 2606 return this 2607 2608 def _parse_index( 2609 self, 2610 index: t.Optional[exp.Expression] = None, 2611 ) -> t.Optional[exp.Index]: 2612 if index: 2613 unique = None 2614 primary = None 2615 amp = None 2616 2617 self._match(TokenType.ON) 2618 self._match(TokenType.TABLE) # hive 2619 table = self._parse_table_parts(schema=True) 2620 else: 2621 unique = self._match(TokenType.UNIQUE) 2622 primary = self._match_text_seq("PRIMARY") 2623 amp = self._match_text_seq("AMP") 2624 2625 if not self._match(TokenType.INDEX): 2626 return None 2627 2628 index = self._parse_id_var() 2629 table = None 2630 2631 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2632 2633 if self._match(TokenType.L_PAREN, advance=False): 2634 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2635 else: 2636 columns = None 2637 2638 return self.expression( 2639 exp.Index, 2640 this=index, 2641 table=table, 2642 using=using, 2643 columns=columns, 2644 unique=unique, 2645 primary=primary, 2646 amp=amp, 2647 partition_by=self._parse_partition_by(), 2648 where=self._parse_where(), 2649 ) 2650 2651 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2652 hints: t.List[exp.Expression] = [] 2653 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2654 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2655 hints.append( 2656 self.expression( 2657 exp.WithTableHint, 2658 expressions=self._parse_csv( 2659 lambda: self._parse_function() or self._parse_var(any_token=True) 2660 ), 2661 ) 2662 ) 2663 self._match_r_paren() 2664 else: 2665 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2666 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2667 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2668 2669 self._match_texts(("INDEX", "KEY")) 2670 if self._match(TokenType.FOR): 2671 hint.set("target", self._advance_any() and self._prev.text.upper()) 2672 2673 hint.set("expressions", self._parse_wrapped_id_vars()) 2674 hints.append(hint) 2675 2676 return hints or None 2677 2678 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2679 return ( 2680 (not schema and self._parse_function(optional_parens=False)) 2681 or self._parse_id_var(any_token=False) 2682 or self._parse_string_as_identifier() 2683 or self._parse_placeholder() 2684 ) 2685 2686 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2687 catalog = None 2688 db = None 2689 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2690 2691 while self._match(TokenType.DOT): 2692 if catalog: 2693 # This allows nesting the table in arbitrarily many dot expressions if needed 2694 table = self.expression( 2695 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2696 ) 2697 else: 2698 catalog = db 2699 db = table 2700 table = self._parse_table_part(schema=schema) or "" 2701 2702 if not table: 2703 self.raise_error(f"Expected table name but got {self._curr}") 2704 2705 return self.expression( 2706 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2707 ) 2708 2709 def _parse_table( 2710 self, 2711 schema: bool = False, 2712 joins: bool = False, 2713 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2714 parse_bracket: bool = False, 2715 ) -> t.Optional[exp.Expression]: 2716 lateral = self._parse_lateral() 2717 if lateral: 2718 return lateral 2719 2720 unnest = self._parse_unnest() 2721 if unnest: 2722 return unnest 2723 2724 values = self._parse_derived_table_values() 2725 if values: 2726 return values 2727 2728 subquery = self._parse_select(table=True) 2729 if subquery: 2730 if not subquery.args.get("pivots"): 2731 subquery.set("pivots", self._parse_pivots()) 2732 return subquery 2733 2734 bracket = parse_bracket and self._parse_bracket(None) 2735 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2736 this = t.cast( 2737 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2738 ) 2739 2740 if schema: 2741 return self._parse_schema(this=this) 2742 2743 version = self._parse_version() 2744 2745 if version: 2746 this.set("version", version) 2747 2748 if self.ALIAS_POST_TABLESAMPLE: 2749 table_sample = self._parse_table_sample() 2750 2751 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2752 if alias: 2753 this.set("alias", alias) 2754 2755 if self._match_text_seq("AT"): 2756 this.set("index", self._parse_id_var()) 2757 2758 this.set("hints", self._parse_table_hints()) 2759 2760 if not this.args.get("pivots"): 2761 this.set("pivots", self._parse_pivots()) 2762 2763 if not self.ALIAS_POST_TABLESAMPLE: 2764 table_sample = self._parse_table_sample() 2765 2766 if table_sample: 2767 table_sample.set("this", this) 2768 this = table_sample 2769 2770 if joins: 2771 for join in iter(self._parse_join, None): 2772 this.append("joins", join) 2773 2774 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2775 this.set("ordinality", True) 2776 this.set("alias", self._parse_table_alias()) 2777 2778 return this 2779 2780 def _parse_version(self) -> t.Optional[exp.Version]: 2781 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2782 this = "TIMESTAMP" 2783 elif self._match(TokenType.VERSION_SNAPSHOT): 2784 this = "VERSION" 2785 else: 2786 return None 2787 2788 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2789 kind = self._prev.text.upper() 2790 start = self._parse_bitwise() 2791 self._match_texts(("TO", "AND")) 2792 end = self._parse_bitwise() 2793 expression: t.Optional[exp.Expression] = self.expression( 2794 exp.Tuple, expressions=[start, end] 2795 ) 2796 elif self._match_text_seq("CONTAINED", "IN"): 2797 kind = "CONTAINED IN" 2798 expression = self.expression( 2799 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2800 ) 2801 elif self._match(TokenType.ALL): 2802 kind = "ALL" 2803 expression = None 2804 else: 2805 self._match_text_seq("AS", "OF") 2806 kind = "AS OF" 2807 expression = self._parse_type() 2808 2809 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2810 2811 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2812 if not self._match(TokenType.UNNEST): 2813 return None 2814 2815 expressions = self._parse_wrapped_csv(self._parse_equality) 2816 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2817 2818 alias = self._parse_table_alias() if with_alias else None 2819 2820 if alias: 2821 if self.UNNEST_COLUMN_ONLY: 2822 if alias.args.get("columns"): 2823 self.raise_error("Unexpected extra column alias in unnest.") 2824 2825 alias.set("columns", [alias.this]) 2826 alias.set("this", None) 2827 2828 columns = alias.args.get("columns") or [] 2829 if offset and len(expressions) < len(columns): 2830 offset = columns.pop() 2831 2832 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2833 self._match(TokenType.ALIAS) 2834 offset = self._parse_id_var( 2835 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2836 ) or exp.to_identifier("offset") 2837 2838 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2839 2840 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2841 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2842 if not is_derived and not self._match(TokenType.VALUES): 2843 return None 2844 2845 expressions = self._parse_csv(self._parse_value) 2846 alias = self._parse_table_alias() 2847 2848 if is_derived: 2849 self._match_r_paren() 2850 2851 return self.expression( 2852 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2853 ) 2854 2855 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2856 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2857 as_modifier and self._match_text_seq("USING", "SAMPLE") 2858 ): 2859 return None 2860 2861 bucket_numerator = None 2862 bucket_denominator = None 2863 bucket_field = None 2864 percent = None 2865 rows = None 2866 size = None 2867 seed = None 2868 2869 kind = ( 2870 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2871 ) 2872 method = self._parse_var(tokens=(TokenType.ROW,)) 2873 2874 matched_l_paren = self._match(TokenType.L_PAREN) 2875 2876 if self.TABLESAMPLE_CSV: 2877 num = None 2878 expressions = self._parse_csv(self._parse_primary) 2879 else: 2880 expressions = None 2881 num = ( 2882 self._parse_factor() 2883 if self._match(TokenType.NUMBER, advance=False) 2884 else self._parse_primary() or self._parse_placeholder() 2885 ) 2886 2887 if self._match_text_seq("BUCKET"): 2888 bucket_numerator = self._parse_number() 2889 self._match_text_seq("OUT", "OF") 2890 bucket_denominator = bucket_denominator = self._parse_number() 2891 self._match(TokenType.ON) 2892 bucket_field = self._parse_field() 2893 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2894 percent = num 2895 elif self._match(TokenType.ROWS): 2896 rows = num 2897 elif num: 2898 size = num 2899 2900 if matched_l_paren: 2901 self._match_r_paren() 2902 2903 if self._match(TokenType.L_PAREN): 2904 method = self._parse_var() 2905 seed = self._match(TokenType.COMMA) and self._parse_number() 2906 self._match_r_paren() 2907 elif self._match_texts(("SEED", "REPEATABLE")): 2908 seed = self._parse_wrapped(self._parse_number) 2909 2910 return self.expression( 2911 exp.TableSample, 2912 expressions=expressions, 2913 method=method, 2914 bucket_numerator=bucket_numerator, 2915 bucket_denominator=bucket_denominator, 2916 bucket_field=bucket_field, 2917 percent=percent, 2918 rows=rows, 2919 size=size, 2920 seed=seed, 2921 kind=kind, 2922 ) 2923 2924 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2925 return list(iter(self._parse_pivot, None)) or None 2926 2927 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2928 return list(iter(self._parse_join, None)) or None 2929 2930 # https://duckdb.org/docs/sql/statements/pivot 2931 def _parse_simplified_pivot(self) -> exp.Pivot: 2932 def _parse_on() -> t.Optional[exp.Expression]: 2933 this = self._parse_bitwise() 2934 return self._parse_in(this) if self._match(TokenType.IN) else this 2935 2936 this = self._parse_table() 2937 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2938 using = self._match(TokenType.USING) and self._parse_csv( 2939 lambda: self._parse_alias(self._parse_function()) 2940 ) 2941 group = self._parse_group() 2942 return self.expression( 2943 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2944 ) 2945 2946 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2947 index = self._index 2948 include_nulls = None 2949 2950 if self._match(TokenType.PIVOT): 2951 unpivot = False 2952 elif self._match(TokenType.UNPIVOT): 2953 unpivot = True 2954 2955 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2956 if self._match_text_seq("INCLUDE", "NULLS"): 2957 include_nulls = True 2958 elif self._match_text_seq("EXCLUDE", "NULLS"): 2959 include_nulls = False 2960 else: 2961 return None 2962 2963 expressions = [] 2964 field = None 2965 2966 if not self._match(TokenType.L_PAREN): 2967 self._retreat(index) 2968 return None 2969 2970 if unpivot: 2971 expressions = self._parse_csv(self._parse_column) 2972 else: 2973 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2974 2975 if not expressions: 2976 self.raise_error("Failed to parse PIVOT's aggregation list") 2977 2978 if not self._match(TokenType.FOR): 2979 self.raise_error("Expecting FOR") 2980 2981 value = self._parse_column() 2982 2983 if not self._match(TokenType.IN): 2984 self.raise_error("Expecting IN") 2985 2986 field = self._parse_in(value, alias=True) 2987 2988 self._match_r_paren() 2989 2990 pivot = self.expression( 2991 exp.Pivot, 2992 expressions=expressions, 2993 field=field, 2994 unpivot=unpivot, 2995 include_nulls=include_nulls, 2996 ) 2997 2998 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2999 pivot.set("alias", self._parse_table_alias()) 3000 3001 if not unpivot: 3002 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3003 3004 columns: t.List[exp.Expression] = [] 3005 for fld in pivot.args["field"].expressions: 3006 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3007 for name in names: 3008 if self.PREFIXED_PIVOT_COLUMNS: 3009 name = f"{name}_{field_name}" if name else field_name 3010 else: 3011 name = f"{field_name}_{name}" if name else field_name 3012 3013 columns.append(exp.to_identifier(name)) 3014 3015 pivot.set("columns", columns) 3016 3017 return pivot 3018 3019 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3020 return [agg.alias for agg in aggregations] 3021 3022 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3023 if not skip_where_token and not self._match(TokenType.WHERE): 3024 return None 3025 3026 return self.expression( 3027 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3028 ) 3029 3030 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3031 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3032 return None 3033 3034 elements = defaultdict(list) 3035 3036 if self._match(TokenType.ALL): 3037 return self.expression(exp.Group, all=True) 3038 3039 while True: 3040 expressions = self._parse_csv(self._parse_conjunction) 3041 if expressions: 3042 elements["expressions"].extend(expressions) 3043 3044 grouping_sets = self._parse_grouping_sets() 3045 if grouping_sets: 3046 elements["grouping_sets"].extend(grouping_sets) 3047 3048 rollup = None 3049 cube = None 3050 totals = None 3051 3052 index = self._index 3053 with_ = self._match(TokenType.WITH) 3054 if self._match(TokenType.ROLLUP): 3055 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3056 elements["rollup"].extend(ensure_list(rollup)) 3057 3058 if self._match(TokenType.CUBE): 3059 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3060 elements["cube"].extend(ensure_list(cube)) 3061 3062 if self._match_text_seq("TOTALS"): 3063 totals = True 3064 elements["totals"] = True # type: ignore 3065 3066 if not (grouping_sets or rollup or cube or totals): 3067 if with_: 3068 self._retreat(index) 3069 break 3070 3071 return self.expression(exp.Group, **elements) # type: ignore 3072 3073 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3074 if not self._match(TokenType.GROUPING_SETS): 3075 return None 3076 3077 return self._parse_wrapped_csv(self._parse_grouping_set) 3078 3079 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3080 if self._match(TokenType.L_PAREN): 3081 grouping_set = self._parse_csv(self._parse_column) 3082 self._match_r_paren() 3083 return self.expression(exp.Tuple, expressions=grouping_set) 3084 3085 return self._parse_column() 3086 3087 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3088 if not skip_having_token and not self._match(TokenType.HAVING): 3089 return None 3090 return self.expression(exp.Having, this=self._parse_conjunction()) 3091 3092 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3093 if not self._match(TokenType.QUALIFY): 3094 return None 3095 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3096 3097 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3098 if skip_start_token: 3099 start = None 3100 elif self._match(TokenType.START_WITH): 3101 start = self._parse_conjunction() 3102 else: 3103 return None 3104 3105 self._match(TokenType.CONNECT_BY) 3106 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3107 exp.Prior, this=self._parse_bitwise() 3108 ) 3109 connect = self._parse_conjunction() 3110 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3111 3112 if not start and self._match(TokenType.START_WITH): 3113 start = self._parse_conjunction() 3114 3115 return self.expression(exp.Connect, start=start, connect=connect) 3116 3117 def _parse_order( 3118 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3119 ) -> t.Optional[exp.Expression]: 3120 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3121 return this 3122 3123 return self.expression( 3124 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3125 ) 3126 3127 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3128 if not self._match(token): 3129 return None 3130 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3131 3132 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3133 this = parse_method() if parse_method else self._parse_conjunction() 3134 3135 asc = self._match(TokenType.ASC) 3136 desc = self._match(TokenType.DESC) or (asc and False) 3137 3138 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3139 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3140 3141 nulls_first = is_nulls_first or False 3142 explicitly_null_ordered = is_nulls_first or is_nulls_last 3143 3144 if ( 3145 not explicitly_null_ordered 3146 and ( 3147 (not desc and self.NULL_ORDERING == "nulls_are_small") 3148 or (desc and self.NULL_ORDERING != "nulls_are_small") 3149 ) 3150 and self.NULL_ORDERING != "nulls_are_last" 3151 ): 3152 nulls_first = True 3153 3154 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3155 3156 def _parse_limit( 3157 self, this: t.Optional[exp.Expression] = None, top: bool = False 3158 ) -> t.Optional[exp.Expression]: 3159 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3160 comments = self._prev_comments 3161 if top: 3162 limit_paren = self._match(TokenType.L_PAREN) 3163 expression = self._parse_term() if limit_paren else self._parse_number() 3164 3165 if limit_paren: 3166 self._match_r_paren() 3167 else: 3168 expression = self._parse_term() 3169 3170 if self._match(TokenType.COMMA): 3171 offset = expression 3172 expression = self._parse_term() 3173 else: 3174 offset = None 3175 3176 limit_exp = self.expression( 3177 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3178 ) 3179 3180 return limit_exp 3181 3182 if self._match(TokenType.FETCH): 3183 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3184 direction = self._prev.text if direction else "FIRST" 3185 3186 count = self._parse_field(tokens=self.FETCH_TOKENS) 3187 percent = self._match(TokenType.PERCENT) 3188 3189 self._match_set((TokenType.ROW, TokenType.ROWS)) 3190 3191 only = self._match_text_seq("ONLY") 3192 with_ties = self._match_text_seq("WITH", "TIES") 3193 3194 if only and with_ties: 3195 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3196 3197 return self.expression( 3198 exp.Fetch, 3199 direction=direction, 3200 count=count, 3201 percent=percent, 3202 with_ties=with_ties, 3203 ) 3204 3205 return this 3206 3207 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3208 if not self._match(TokenType.OFFSET): 3209 return this 3210 3211 count = self._parse_term() 3212 self._match_set((TokenType.ROW, TokenType.ROWS)) 3213 return self.expression(exp.Offset, this=this, expression=count) 3214 3215 def _parse_locks(self) -> t.List[exp.Lock]: 3216 locks = [] 3217 while True: 3218 if self._match_text_seq("FOR", "UPDATE"): 3219 update = True 3220 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3221 "LOCK", "IN", "SHARE", "MODE" 3222 ): 3223 update = False 3224 else: 3225 break 3226 3227 expressions = None 3228 if self._match_text_seq("OF"): 3229 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3230 3231 wait: t.Optional[bool | exp.Expression] = None 3232 if self._match_text_seq("NOWAIT"): 3233 wait = True 3234 elif self._match_text_seq("WAIT"): 3235 wait = self._parse_primary() 3236 elif self._match_text_seq("SKIP", "LOCKED"): 3237 wait = False 3238 3239 locks.append( 3240 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3241 ) 3242 3243 return locks 3244 3245 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3246 if not self._match_set(self.SET_OPERATIONS): 3247 return this 3248 3249 token_type = self._prev.token_type 3250 3251 if token_type == TokenType.UNION: 3252 expression = exp.Union 3253 elif token_type == TokenType.EXCEPT: 3254 expression = exp.Except 3255 else: 3256 expression = exp.Intersect 3257 3258 return self.expression( 3259 expression, 3260 comments=self._prev.comments, 3261 this=this, 3262 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3263 by_name=self._match_text_seq("BY", "NAME"), 3264 expression=self._parse_set_operations(self._parse_select(nested=True)), 3265 ) 3266 3267 def _parse_expression(self) -> t.Optional[exp.Expression]: 3268 return self._parse_alias(self._parse_conjunction()) 3269 3270 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3271 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3272 3273 def _parse_equality(self) -> t.Optional[exp.Expression]: 3274 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3275 3276 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3277 return self._parse_tokens(self._parse_range, self.COMPARISON) 3278 3279 def _parse_range(self) -> t.Optional[exp.Expression]: 3280 this = self._parse_bitwise() 3281 negate = self._match(TokenType.NOT) 3282 3283 if self._match_set(self.RANGE_PARSERS): 3284 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3285 if not expression: 3286 return this 3287 3288 this = expression 3289 elif self._match(TokenType.ISNULL): 3290 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3291 3292 # Postgres supports ISNULL and NOTNULL for conditions. 3293 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3294 if self._match(TokenType.NOTNULL): 3295 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3296 this = self.expression(exp.Not, this=this) 3297 3298 if negate: 3299 this = self.expression(exp.Not, this=this) 3300 3301 if self._match(TokenType.IS): 3302 this = self._parse_is(this) 3303 3304 return this 3305 3306 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3307 index = self._index - 1 3308 negate = self._match(TokenType.NOT) 3309 3310 if self._match_text_seq("DISTINCT", "FROM"): 3311 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3312 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3313 3314 expression = self._parse_null() or self._parse_boolean() 3315 if not expression: 3316 self._retreat(index) 3317 return None 3318 3319 this = self.expression(exp.Is, this=this, expression=expression) 3320 return self.expression(exp.Not, this=this) if negate else this 3321 3322 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3323 unnest = self._parse_unnest(with_alias=False) 3324 if unnest: 3325 this = self.expression(exp.In, this=this, unnest=unnest) 3326 elif self._match(TokenType.L_PAREN): 3327 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3328 3329 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3330 this = self.expression(exp.In, this=this, query=expressions[0]) 3331 else: 3332 this = self.expression(exp.In, this=this, expressions=expressions) 3333 3334 self._match_r_paren(this) 3335 else: 3336 this = self.expression(exp.In, this=this, field=self._parse_field()) 3337 3338 return this 3339 3340 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3341 low = self._parse_bitwise() 3342 self._match(TokenType.AND) 3343 high = self._parse_bitwise() 3344 return self.expression(exp.Between, this=this, low=low, high=high) 3345 3346 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3347 if not self._match(TokenType.ESCAPE): 3348 return this 3349 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3350 3351 def _parse_interval(self) -> t.Optional[exp.Interval]: 3352 index = self._index 3353 3354 if not self._match(TokenType.INTERVAL): 3355 return None 3356 3357 if self._match(TokenType.STRING, advance=False): 3358 this = self._parse_primary() 3359 else: 3360 this = self._parse_term() 3361 3362 if not this: 3363 self._retreat(index) 3364 return None 3365 3366 unit = self._parse_function() or self._parse_var(any_token=True) 3367 3368 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3369 # each INTERVAL expression into this canonical form so it's easy to transpile 3370 if this and this.is_number: 3371 this = exp.Literal.string(this.name) 3372 elif this and this.is_string: 3373 parts = this.name.split() 3374 3375 if len(parts) == 2: 3376 if unit: 3377 # This is not actually a unit, it's something else (e.g. a "window side") 3378 unit = None 3379 self._retreat(self._index - 1) 3380 3381 this = exp.Literal.string(parts[0]) 3382 unit = self.expression(exp.Var, this=parts[1]) 3383 3384 return self.expression(exp.Interval, this=this, unit=unit) 3385 3386 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3387 this = self._parse_term() 3388 3389 while True: 3390 if self._match_set(self.BITWISE): 3391 this = self.expression( 3392 self.BITWISE[self._prev.token_type], 3393 this=this, 3394 expression=self._parse_term(), 3395 ) 3396 elif self._match(TokenType.DQMARK): 3397 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3398 elif self._match_pair(TokenType.LT, TokenType.LT): 3399 this = self.expression( 3400 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3401 ) 3402 elif self._match_pair(TokenType.GT, TokenType.GT): 3403 this = self.expression( 3404 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3405 ) 3406 else: 3407 break 3408 3409 return this 3410 3411 def _parse_term(self) -> t.Optional[exp.Expression]: 3412 return self._parse_tokens(self._parse_factor, self.TERM) 3413 3414 def _parse_factor(self) -> t.Optional[exp.Expression]: 3415 if self.EXPONENT: 3416 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3417 else: 3418 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3419 if isinstance(factor, exp.Div): 3420 factor.args["typed"] = self.TYPED_DIVISION 3421 factor.args["safe"] = self.SAFE_DIVISION 3422 return factor 3423 3424 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3425 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3426 3427 def _parse_unary(self) -> t.Optional[exp.Expression]: 3428 if self._match_set(self.UNARY_PARSERS): 3429 return self.UNARY_PARSERS[self._prev.token_type](self) 3430 return self._parse_at_time_zone(self._parse_type()) 3431 3432 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3433 interval = parse_interval and self._parse_interval() 3434 if interval: 3435 return interval 3436 3437 index = self._index 3438 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3439 this = self._parse_column() 3440 3441 if data_type: 3442 if isinstance(this, exp.Literal): 3443 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3444 if parser: 3445 return parser(self, this, data_type) 3446 return self.expression(exp.Cast, this=this, to=data_type) 3447 if not data_type.expressions: 3448 self._retreat(index) 3449 return self._parse_column() 3450 return self._parse_column_ops(data_type) 3451 3452 return this and self._parse_column_ops(this) 3453 3454 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3455 this = self._parse_type() 3456 if not this: 3457 return None 3458 3459 return self.expression( 3460 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3461 ) 3462 3463 def _parse_types( 3464 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3465 ) -> t.Optional[exp.Expression]: 3466 index = self._index 3467 3468 prefix = self._match_text_seq("SYSUDTLIB", ".") 3469 3470 if not self._match_set(self.TYPE_TOKENS): 3471 identifier = allow_identifiers and self._parse_id_var( 3472 any_token=False, tokens=(TokenType.VAR,) 3473 ) 3474 3475 if identifier: 3476 tokens = self._tokenizer.tokenize(identifier.name) 3477 3478 if len(tokens) != 1: 3479 self.raise_error("Unexpected identifier", self._prev) 3480 3481 if tokens[0].token_type in self.TYPE_TOKENS: 3482 self._prev = tokens[0] 3483 elif self.SUPPORTS_USER_DEFINED_TYPES: 3484 type_name = identifier.name 3485 3486 while self._match(TokenType.DOT): 3487 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3488 3489 return exp.DataType.build(type_name, udt=True) 3490 else: 3491 return None 3492 else: 3493 return None 3494 3495 type_token = self._prev.token_type 3496 3497 if type_token == TokenType.PSEUDO_TYPE: 3498 return self.expression(exp.PseudoType, this=self._prev.text) 3499 3500 if type_token == TokenType.OBJECT_IDENTIFIER: 3501 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3502 3503 nested = type_token in self.NESTED_TYPE_TOKENS 3504 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3505 expressions = None 3506 maybe_func = False 3507 3508 if self._match(TokenType.L_PAREN): 3509 if is_struct: 3510 expressions = self._parse_csv(self._parse_struct_types) 3511 elif nested: 3512 expressions = self._parse_csv( 3513 lambda: self._parse_types( 3514 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3515 ) 3516 ) 3517 elif type_token in self.ENUM_TYPE_TOKENS: 3518 expressions = self._parse_csv(self._parse_equality) 3519 else: 3520 expressions = self._parse_csv(self._parse_type_size) 3521 3522 if not expressions or not self._match(TokenType.R_PAREN): 3523 self._retreat(index) 3524 return None 3525 3526 maybe_func = True 3527 3528 this: t.Optional[exp.Expression] = None 3529 values: t.Optional[t.List[exp.Expression]] = None 3530 3531 if nested and self._match(TokenType.LT): 3532 if is_struct: 3533 expressions = self._parse_csv(self._parse_struct_types) 3534 else: 3535 expressions = self._parse_csv( 3536 lambda: self._parse_types( 3537 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3538 ) 3539 ) 3540 3541 if not self._match(TokenType.GT): 3542 self.raise_error("Expecting >") 3543 3544 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3545 values = self._parse_csv(self._parse_conjunction) 3546 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3547 3548 if type_token in self.TIMESTAMPS: 3549 if self._match_text_seq("WITH", "TIME", "ZONE"): 3550 maybe_func = False 3551 tz_type = ( 3552 exp.DataType.Type.TIMETZ 3553 if type_token in self.TIMES 3554 else exp.DataType.Type.TIMESTAMPTZ 3555 ) 3556 this = exp.DataType(this=tz_type, expressions=expressions) 3557 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3558 maybe_func = False 3559 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3560 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3561 maybe_func = False 3562 elif type_token == TokenType.INTERVAL: 3563 unit = self._parse_var() 3564 3565 if self._match_text_seq("TO"): 3566 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3567 else: 3568 span = None 3569 3570 if span or not unit: 3571 this = self.expression( 3572 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3573 ) 3574 else: 3575 this = self.expression(exp.Interval, unit=unit) 3576 3577 if maybe_func and check_func: 3578 index2 = self._index 3579 peek = self._parse_string() 3580 3581 if not peek: 3582 self._retreat(index) 3583 return None 3584 3585 self._retreat(index2) 3586 3587 if not this: 3588 if self._match_text_seq("UNSIGNED"): 3589 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3590 if not unsigned_type_token: 3591 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3592 3593 type_token = unsigned_type_token or type_token 3594 3595 this = exp.DataType( 3596 this=exp.DataType.Type[type_token.value], 3597 expressions=expressions, 3598 nested=nested, 3599 values=values, 3600 prefix=prefix, 3601 ) 3602 3603 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3604 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3605 3606 return this 3607 3608 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3609 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3610 self._match(TokenType.COLON) 3611 return self._parse_column_def(this) 3612 3613 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3614 if not self._match_text_seq("AT", "TIME", "ZONE"): 3615 return this 3616 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3617 3618 def _parse_column(self) -> t.Optional[exp.Expression]: 3619 this = self._parse_field() 3620 if isinstance(this, exp.Identifier): 3621 this = self.expression(exp.Column, this=this) 3622 elif not this: 3623 return self._parse_bracket(this) 3624 return self._parse_column_ops(this) 3625 3626 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3627 this = self._parse_bracket(this) 3628 3629 while self._match_set(self.COLUMN_OPERATORS): 3630 op_token = self._prev.token_type 3631 op = self.COLUMN_OPERATORS.get(op_token) 3632 3633 if op_token == TokenType.DCOLON: 3634 field = self._parse_types() 3635 if not field: 3636 self.raise_error("Expected type") 3637 elif op and self._curr: 3638 self._advance() 3639 value = self._prev.text 3640 field = ( 3641 exp.Literal.number(value) 3642 if self._prev.token_type == TokenType.NUMBER 3643 else exp.Literal.string(value) 3644 ) 3645 else: 3646 field = self._parse_field(anonymous_func=True, any_token=True) 3647 3648 if isinstance(field, exp.Func): 3649 # bigquery allows function calls like x.y.count(...) 3650 # SAFE.SUBSTR(...) 3651 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3652 this = self._replace_columns_with_dots(this) 3653 3654 if op: 3655 this = op(self, this, field) 3656 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3657 this = self.expression( 3658 exp.Column, 3659 this=field, 3660 table=this.this, 3661 db=this.args.get("table"), 3662 catalog=this.args.get("db"), 3663 ) 3664 else: 3665 this = self.expression(exp.Dot, this=this, expression=field) 3666 this = self._parse_bracket(this) 3667 return this 3668 3669 def _parse_primary(self) -> t.Optional[exp.Expression]: 3670 if self._match_set(self.PRIMARY_PARSERS): 3671 token_type = self._prev.token_type 3672 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3673 3674 if token_type == TokenType.STRING: 3675 expressions = [primary] 3676 while self._match(TokenType.STRING): 3677 expressions.append(exp.Literal.string(self._prev.text)) 3678 3679 if len(expressions) > 1: 3680 return self.expression(exp.Concat, expressions=expressions) 3681 3682 return primary 3683 3684 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3685 return exp.Literal.number(f"0.{self._prev.text}") 3686 3687 if self._match(TokenType.L_PAREN): 3688 comments = self._prev_comments 3689 query = self._parse_select() 3690 3691 if query: 3692 expressions = [query] 3693 else: 3694 expressions = self._parse_expressions() 3695 3696 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3697 3698 if isinstance(this, exp.Subqueryable): 3699 this = self._parse_set_operations( 3700 self._parse_subquery(this=this, parse_alias=False) 3701 ) 3702 elif len(expressions) > 1: 3703 this = self.expression(exp.Tuple, expressions=expressions) 3704 else: 3705 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3706 3707 if this: 3708 this.add_comments(comments) 3709 3710 self._match_r_paren(expression=this) 3711 return this 3712 3713 return None 3714 3715 def _parse_field( 3716 self, 3717 any_token: bool = False, 3718 tokens: t.Optional[t.Collection[TokenType]] = None, 3719 anonymous_func: bool = False, 3720 ) -> t.Optional[exp.Expression]: 3721 return ( 3722 self._parse_primary() 3723 or self._parse_function(anonymous=anonymous_func) 3724 or self._parse_id_var(any_token=any_token, tokens=tokens) 3725 ) 3726 3727 def _parse_function( 3728 self, 3729 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3730 anonymous: bool = False, 3731 optional_parens: bool = True, 3732 ) -> t.Optional[exp.Expression]: 3733 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3734 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3735 fn_syntax = False 3736 if ( 3737 self._match(TokenType.L_BRACE, advance=False) 3738 and self._next 3739 and self._next.text.upper() == "FN" 3740 ): 3741 self._advance(2) 3742 fn_syntax = True 3743 3744 func = self._parse_function_call( 3745 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3746 ) 3747 3748 if fn_syntax: 3749 self._match(TokenType.R_BRACE) 3750 3751 return func 3752 3753 def _parse_function_call( 3754 self, 3755 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3756 anonymous: bool = False, 3757 optional_parens: bool = True, 3758 ) -> t.Optional[exp.Expression]: 3759 if not self._curr: 3760 return None 3761 3762 token_type = self._curr.token_type 3763 this = self._curr.text 3764 upper = this.upper() 3765 3766 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3767 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3768 self._advance() 3769 return parser(self) 3770 3771 if not self._next or self._next.token_type != TokenType.L_PAREN: 3772 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3773 self._advance() 3774 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3775 3776 return None 3777 3778 if token_type not in self.FUNC_TOKENS: 3779 return None 3780 3781 self._advance(2) 3782 3783 parser = self.FUNCTION_PARSERS.get(upper) 3784 if parser and not anonymous: 3785 this = parser(self) 3786 else: 3787 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3788 3789 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3790 this = self.expression(subquery_predicate, this=self._parse_select()) 3791 self._match_r_paren() 3792 return this 3793 3794 if functions is None: 3795 functions = self.FUNCTIONS 3796 3797 function = functions.get(upper) 3798 3799 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3800 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3801 3802 if function and not anonymous: 3803 func = self.validate_expression(function(args), args) 3804 if not self.NORMALIZE_FUNCTIONS: 3805 func.meta["name"] = this 3806 this = func 3807 else: 3808 this = self.expression(exp.Anonymous, this=this, expressions=args) 3809 3810 self._match_r_paren(this) 3811 return self._parse_window(this) 3812 3813 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3814 return self._parse_column_def(self._parse_id_var()) 3815 3816 def _parse_user_defined_function( 3817 self, kind: t.Optional[TokenType] = None 3818 ) -> t.Optional[exp.Expression]: 3819 this = self._parse_id_var() 3820 3821 while self._match(TokenType.DOT): 3822 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3823 3824 if not self._match(TokenType.L_PAREN): 3825 return this 3826 3827 expressions = self._parse_csv(self._parse_function_parameter) 3828 self._match_r_paren() 3829 return self.expression( 3830 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3831 ) 3832 3833 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3834 literal = self._parse_primary() 3835 if literal: 3836 return self.expression(exp.Introducer, this=token.text, expression=literal) 3837 3838 return self.expression(exp.Identifier, this=token.text) 3839 3840 def _parse_session_parameter(self) -> exp.SessionParameter: 3841 kind = None 3842 this = self._parse_id_var() or self._parse_primary() 3843 3844 if this and self._match(TokenType.DOT): 3845 kind = this.name 3846 this = self._parse_var() or self._parse_primary() 3847 3848 return self.expression(exp.SessionParameter, this=this, kind=kind) 3849 3850 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3851 index = self._index 3852 3853 if self._match(TokenType.L_PAREN): 3854 expressions = t.cast( 3855 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3856 ) 3857 3858 if not self._match(TokenType.R_PAREN): 3859 self._retreat(index) 3860 else: 3861 expressions = [self._parse_id_var()] 3862 3863 if self._match_set(self.LAMBDAS): 3864 return self.LAMBDAS[self._prev.token_type](self, expressions) 3865 3866 self._retreat(index) 3867 3868 this: t.Optional[exp.Expression] 3869 3870 if self._match(TokenType.DISTINCT): 3871 this = self.expression( 3872 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3873 ) 3874 else: 3875 this = self._parse_select_or_expression(alias=alias) 3876 3877 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3878 3879 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3880 index = self._index 3881 3882 if not self.errors: 3883 try: 3884 if self._parse_select(nested=True): 3885 return this 3886 except ParseError: 3887 pass 3888 finally: 3889 self.errors.clear() 3890 self._retreat(index) 3891 3892 if not self._match(TokenType.L_PAREN): 3893 return this 3894 3895 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3896 3897 self._match_r_paren() 3898 return self.expression(exp.Schema, this=this, expressions=args) 3899 3900 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3901 return self._parse_column_def(self._parse_field(any_token=True)) 3902 3903 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3904 # column defs are not really columns, they're identifiers 3905 if isinstance(this, exp.Column): 3906 this = this.this 3907 3908 kind = self._parse_types(schema=True) 3909 3910 if self._match_text_seq("FOR", "ORDINALITY"): 3911 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3912 3913 constraints: t.List[exp.Expression] = [] 3914 3915 if not kind and self._match(TokenType.ALIAS): 3916 constraints.append( 3917 self.expression( 3918 exp.ComputedColumnConstraint, 3919 this=self._parse_conjunction(), 3920 persisted=self._match_text_seq("PERSISTED"), 3921 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3922 ) 3923 ) 3924 3925 while True: 3926 constraint = self._parse_column_constraint() 3927 if not constraint: 3928 break 3929 constraints.append(constraint) 3930 3931 if not kind and not constraints: 3932 return this 3933 3934 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3935 3936 def _parse_auto_increment( 3937 self, 3938 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3939 start = None 3940 increment = None 3941 3942 if self._match(TokenType.L_PAREN, advance=False): 3943 args = self._parse_wrapped_csv(self._parse_bitwise) 3944 start = seq_get(args, 0) 3945 increment = seq_get(args, 1) 3946 elif self._match_text_seq("START"): 3947 start = self._parse_bitwise() 3948 self._match_text_seq("INCREMENT") 3949 increment = self._parse_bitwise() 3950 3951 if start and increment: 3952 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3953 3954 return exp.AutoIncrementColumnConstraint() 3955 3956 def _parse_compress(self) -> exp.CompressColumnConstraint: 3957 if self._match(TokenType.L_PAREN, advance=False): 3958 return self.expression( 3959 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3960 ) 3961 3962 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3963 3964 def _parse_generated_as_identity( 3965 self, 3966 ) -> ( 3967 exp.GeneratedAsIdentityColumnConstraint 3968 | exp.ComputedColumnConstraint 3969 | exp.GeneratedAsRowColumnConstraint 3970 ): 3971 if self._match_text_seq("BY", "DEFAULT"): 3972 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3973 this = self.expression( 3974 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3975 ) 3976 else: 3977 self._match_text_seq("ALWAYS") 3978 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3979 3980 self._match(TokenType.ALIAS) 3981 3982 if self._match_text_seq("ROW"): 3983 start = self._match_text_seq("START") 3984 if not start: 3985 self._match(TokenType.END) 3986 hidden = self._match_text_seq("HIDDEN") 3987 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 3988 3989 identity = self._match_text_seq("IDENTITY") 3990 3991 if self._match(TokenType.L_PAREN): 3992 if self._match(TokenType.START_WITH): 3993 this.set("start", self._parse_bitwise()) 3994 if self._match_text_seq("INCREMENT", "BY"): 3995 this.set("increment", self._parse_bitwise()) 3996 if self._match_text_seq("MINVALUE"): 3997 this.set("minvalue", self._parse_bitwise()) 3998 if self._match_text_seq("MAXVALUE"): 3999 this.set("maxvalue", self._parse_bitwise()) 4000 4001 if self._match_text_seq("CYCLE"): 4002 this.set("cycle", True) 4003 elif self._match_text_seq("NO", "CYCLE"): 4004 this.set("cycle", False) 4005 4006 if not identity: 4007 this.set("expression", self._parse_bitwise()) 4008 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4009 args = self._parse_csv(self._parse_bitwise) 4010 this.set("start", seq_get(args, 0)) 4011 this.set("increment", seq_get(args, 1)) 4012 4013 self._match_r_paren() 4014 4015 return this 4016 4017 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4018 self._match_text_seq("LENGTH") 4019 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4020 4021 def _parse_not_constraint( 4022 self, 4023 ) -> t.Optional[exp.Expression]: 4024 if self._match_text_seq("NULL"): 4025 return self.expression(exp.NotNullColumnConstraint) 4026 if self._match_text_seq("CASESPECIFIC"): 4027 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4028 if self._match_text_seq("FOR", "REPLICATION"): 4029 return self.expression(exp.NotForReplicationColumnConstraint) 4030 return None 4031 4032 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4033 if self._match(TokenType.CONSTRAINT): 4034 this = self._parse_id_var() 4035 else: 4036 this = None 4037 4038 if self._match_texts(self.CONSTRAINT_PARSERS): 4039 return self.expression( 4040 exp.ColumnConstraint, 4041 this=this, 4042 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4043 ) 4044 4045 return this 4046 4047 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4048 if not self._match(TokenType.CONSTRAINT): 4049 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4050 4051 this = self._parse_id_var() 4052 expressions = [] 4053 4054 while True: 4055 constraint = self._parse_unnamed_constraint() or self._parse_function() 4056 if not constraint: 4057 break 4058 expressions.append(constraint) 4059 4060 return self.expression(exp.Constraint, this=this, expressions=expressions) 4061 4062 def _parse_unnamed_constraint( 4063 self, constraints: t.Optional[t.Collection[str]] = None 4064 ) -> t.Optional[exp.Expression]: 4065 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4066 constraints or self.CONSTRAINT_PARSERS 4067 ): 4068 return None 4069 4070 constraint = self._prev.text.upper() 4071 if constraint not in self.CONSTRAINT_PARSERS: 4072 self.raise_error(f"No parser found for schema constraint {constraint}.") 4073 4074 return self.CONSTRAINT_PARSERS[constraint](self) 4075 4076 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4077 self._match_text_seq("KEY") 4078 return self.expression( 4079 exp.UniqueColumnConstraint, 4080 this=self._parse_schema(self._parse_id_var(any_token=False)), 4081 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4082 ) 4083 4084 def _parse_key_constraint_options(self) -> t.List[str]: 4085 options = [] 4086 while True: 4087 if not self._curr: 4088 break 4089 4090 if self._match(TokenType.ON): 4091 action = None 4092 on = self._advance_any() and self._prev.text 4093 4094 if self._match_text_seq("NO", "ACTION"): 4095 action = "NO ACTION" 4096 elif self._match_text_seq("CASCADE"): 4097 action = "CASCADE" 4098 elif self._match_text_seq("RESTRICT"): 4099 action = "RESTRICT" 4100 elif self._match_pair(TokenType.SET, TokenType.NULL): 4101 action = "SET NULL" 4102 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4103 action = "SET DEFAULT" 4104 else: 4105 self.raise_error("Invalid key constraint") 4106 4107 options.append(f"ON {on} {action}") 4108 elif self._match_text_seq("NOT", "ENFORCED"): 4109 options.append("NOT ENFORCED") 4110 elif self._match_text_seq("DEFERRABLE"): 4111 options.append("DEFERRABLE") 4112 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4113 options.append("INITIALLY DEFERRED") 4114 elif self._match_text_seq("NORELY"): 4115 options.append("NORELY") 4116 elif self._match_text_seq("MATCH", "FULL"): 4117 options.append("MATCH FULL") 4118 else: 4119 break 4120 4121 return options 4122 4123 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4124 if match and not self._match(TokenType.REFERENCES): 4125 return None 4126 4127 expressions = None 4128 this = self._parse_table(schema=True) 4129 options = self._parse_key_constraint_options() 4130 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4131 4132 def _parse_foreign_key(self) -> exp.ForeignKey: 4133 expressions = self._parse_wrapped_id_vars() 4134 reference = self._parse_references() 4135 options = {} 4136 4137 while self._match(TokenType.ON): 4138 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4139 self.raise_error("Expected DELETE or UPDATE") 4140 4141 kind = self._prev.text.lower() 4142 4143 if self._match_text_seq("NO", "ACTION"): 4144 action = "NO ACTION" 4145 elif self._match(TokenType.SET): 4146 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4147 action = "SET " + self._prev.text.upper() 4148 else: 4149 self._advance() 4150 action = self._prev.text.upper() 4151 4152 options[kind] = action 4153 4154 return self.expression( 4155 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4156 ) 4157 4158 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4159 return self._parse_field() 4160 4161 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4162 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4163 4164 id_vars = self._parse_wrapped_id_vars() 4165 return self.expression( 4166 exp.PeriodForSystemTimeConstraint, 4167 this=seq_get(id_vars, 0), 4168 expression=seq_get(id_vars, 1), 4169 ) 4170 4171 def _parse_primary_key( 4172 self, wrapped_optional: bool = False, in_props: bool = False 4173 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4174 desc = ( 4175 self._match_set((TokenType.ASC, TokenType.DESC)) 4176 and self._prev.token_type == TokenType.DESC 4177 ) 4178 4179 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4180 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4181 4182 expressions = self._parse_wrapped_csv( 4183 self._parse_primary_key_part, optional=wrapped_optional 4184 ) 4185 options = self._parse_key_constraint_options() 4186 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4187 4188 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4189 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4190 return this 4191 4192 bracket_kind = self._prev.token_type 4193 4194 if self._match(TokenType.COLON): 4195 expressions: t.List[exp.Expression] = [ 4196 self.expression(exp.Slice, expression=self._parse_conjunction()) 4197 ] 4198 else: 4199 expressions = self._parse_csv( 4200 lambda: self._parse_slice( 4201 self._parse_alias(self._parse_conjunction(), explicit=True) 4202 ) 4203 ) 4204 4205 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4206 self.raise_error("Expected ]") 4207 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4208 self.raise_error("Expected }") 4209 4210 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4211 if bracket_kind == TokenType.L_BRACE: 4212 this = self.expression(exp.Struct, expressions=expressions) 4213 elif not this or this.name.upper() == "ARRAY": 4214 this = self.expression(exp.Array, expressions=expressions) 4215 else: 4216 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4217 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4218 4219 self._add_comments(this) 4220 return self._parse_bracket(this) 4221 4222 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4223 if self._match(TokenType.COLON): 4224 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4225 return this 4226 4227 def _parse_case(self) -> t.Optional[exp.Expression]: 4228 ifs = [] 4229 default = None 4230 4231 comments = self._prev_comments 4232 expression = self._parse_conjunction() 4233 4234 while self._match(TokenType.WHEN): 4235 this = self._parse_conjunction() 4236 self._match(TokenType.THEN) 4237 then = self._parse_conjunction() 4238 ifs.append(self.expression(exp.If, this=this, true=then)) 4239 4240 if self._match(TokenType.ELSE): 4241 default = self._parse_conjunction() 4242 4243 if not self._match(TokenType.END): 4244 self.raise_error("Expected END after CASE", self._prev) 4245 4246 return self._parse_window( 4247 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4248 ) 4249 4250 def _parse_if(self) -> t.Optional[exp.Expression]: 4251 if self._match(TokenType.L_PAREN): 4252 args = self._parse_csv(self._parse_conjunction) 4253 this = self.validate_expression(exp.If.from_arg_list(args), args) 4254 self._match_r_paren() 4255 else: 4256 index = self._index - 1 4257 condition = self._parse_conjunction() 4258 4259 if not condition: 4260 self._retreat(index) 4261 return None 4262 4263 self._match(TokenType.THEN) 4264 true = self._parse_conjunction() 4265 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4266 self._match(TokenType.END) 4267 this = self.expression(exp.If, this=condition, true=true, false=false) 4268 4269 return self._parse_window(this) 4270 4271 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4272 if not self._match_text_seq("VALUE", "FOR"): 4273 self._retreat(self._index - 1) 4274 return None 4275 4276 return self.expression( 4277 exp.NextValueFor, 4278 this=self._parse_column(), 4279 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4280 ) 4281 4282 def _parse_extract(self) -> exp.Extract: 4283 this = self._parse_function() or self._parse_var() or self._parse_type() 4284 4285 if self._match(TokenType.FROM): 4286 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4287 4288 if not self._match(TokenType.COMMA): 4289 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4290 4291 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4292 4293 def _parse_any_value(self) -> exp.AnyValue: 4294 this = self._parse_lambda() 4295 is_max = None 4296 having = None 4297 4298 if self._match(TokenType.HAVING): 4299 self._match_texts(("MAX", "MIN")) 4300 is_max = self._prev.text == "MAX" 4301 having = self._parse_column() 4302 4303 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4304 4305 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4306 this = self._parse_conjunction() 4307 4308 if not self._match(TokenType.ALIAS): 4309 if self._match(TokenType.COMMA): 4310 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4311 4312 self.raise_error("Expected AS after CAST") 4313 4314 fmt = None 4315 to = self._parse_types() 4316 4317 if self._match(TokenType.FORMAT): 4318 fmt_string = self._parse_string() 4319 fmt = self._parse_at_time_zone(fmt_string) 4320 4321 if not to: 4322 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4323 if to.this in exp.DataType.TEMPORAL_TYPES: 4324 this = self.expression( 4325 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4326 this=this, 4327 format=exp.Literal.string( 4328 format_time( 4329 fmt_string.this if fmt_string else "", 4330 self.FORMAT_MAPPING or self.TIME_MAPPING, 4331 self.FORMAT_TRIE or self.TIME_TRIE, 4332 ) 4333 ), 4334 ) 4335 4336 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4337 this.set("zone", fmt.args["zone"]) 4338 return this 4339 elif not to: 4340 self.raise_error("Expected TYPE after CAST") 4341 elif isinstance(to, exp.Identifier): 4342 to = exp.DataType.build(to.name, udt=True) 4343 elif to.this == exp.DataType.Type.CHAR: 4344 if self._match(TokenType.CHARACTER_SET): 4345 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4346 4347 return self.expression( 4348 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4349 ) 4350 4351 def _parse_concat(self) -> t.Optional[exp.Expression]: 4352 args = self._parse_csv(self._parse_conjunction) 4353 if self.CONCAT_NULL_OUTPUTS_STRING: 4354 args = self._ensure_string_if_null(args) 4355 4356 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4357 # we find such a call we replace it with its argument. 4358 if len(args) == 1: 4359 return args[0] 4360 4361 return self.expression( 4362 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4363 ) 4364 4365 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4366 args = self._parse_csv(self._parse_conjunction) 4367 if len(args) < 2: 4368 return self.expression(exp.ConcatWs, expressions=args) 4369 delim, *values = args 4370 if self.CONCAT_NULL_OUTPUTS_STRING: 4371 values = self._ensure_string_if_null(values) 4372 4373 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4374 4375 def _parse_string_agg(self) -> exp.Expression: 4376 if self._match(TokenType.DISTINCT): 4377 args: t.List[t.Optional[exp.Expression]] = [ 4378 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4379 ] 4380 if self._match(TokenType.COMMA): 4381 args.extend(self._parse_csv(self._parse_conjunction)) 4382 else: 4383 args = self._parse_csv(self._parse_conjunction) # type: ignore 4384 4385 index = self._index 4386 if not self._match(TokenType.R_PAREN) and args: 4387 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4388 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4389 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4390 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4391 4392 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4393 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4394 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4395 if not self._match_text_seq("WITHIN", "GROUP"): 4396 self._retreat(index) 4397 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4398 4399 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4400 order = self._parse_order(this=seq_get(args, 0)) 4401 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4402 4403 def _parse_convert( 4404 self, strict: bool, safe: t.Optional[bool] = None 4405 ) -> t.Optional[exp.Expression]: 4406 this = self._parse_bitwise() 4407 4408 if self._match(TokenType.USING): 4409 to: t.Optional[exp.Expression] = self.expression( 4410 exp.CharacterSet, this=self._parse_var() 4411 ) 4412 elif self._match(TokenType.COMMA): 4413 to = self._parse_types() 4414 else: 4415 to = None 4416 4417 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4418 4419 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4420 """ 4421 There are generally two variants of the DECODE function: 4422 4423 - DECODE(bin, charset) 4424 - DECODE(expression, search, result [, search, result] ... [, default]) 4425 4426 The second variant will always be parsed into a CASE expression. Note that NULL 4427 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4428 instead of relying on pattern matching. 4429 """ 4430 args = self._parse_csv(self._parse_conjunction) 4431 4432 if len(args) < 3: 4433 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4434 4435 expression, *expressions = args 4436 if not expression: 4437 return None 4438 4439 ifs = [] 4440 for search, result in zip(expressions[::2], expressions[1::2]): 4441 if not search or not result: 4442 return None 4443 4444 if isinstance(search, exp.Literal): 4445 ifs.append( 4446 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4447 ) 4448 elif isinstance(search, exp.Null): 4449 ifs.append( 4450 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4451 ) 4452 else: 4453 cond = exp.or_( 4454 exp.EQ(this=expression.copy(), expression=search), 4455 exp.and_( 4456 exp.Is(this=expression.copy(), expression=exp.Null()), 4457 exp.Is(this=search.copy(), expression=exp.Null()), 4458 copy=False, 4459 ), 4460 copy=False, 4461 ) 4462 ifs.append(exp.If(this=cond, true=result)) 4463 4464 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4465 4466 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4467 self._match_text_seq("KEY") 4468 key = self._parse_column() 4469 self._match_set((TokenType.COLON, TokenType.COMMA)) 4470 self._match_text_seq("VALUE") 4471 value = self._parse_bitwise() 4472 4473 if not key and not value: 4474 return None 4475 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4476 4477 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4478 if not this or not self._match_text_seq("FORMAT", "JSON"): 4479 return this 4480 4481 return self.expression(exp.FormatJson, this=this) 4482 4483 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4484 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4485 for value in values: 4486 if self._match_text_seq(value, "ON", on): 4487 return f"{value} ON {on}" 4488 4489 return None 4490 4491 def _parse_json_object(self) -> exp.JSONObject: 4492 star = self._parse_star() 4493 expressions = ( 4494 [star] 4495 if star 4496 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4497 ) 4498 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4499 4500 unique_keys = None 4501 if self._match_text_seq("WITH", "UNIQUE"): 4502 unique_keys = True 4503 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4504 unique_keys = False 4505 4506 self._match_text_seq("KEYS") 4507 4508 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4509 self._parse_type() 4510 ) 4511 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4512 4513 return self.expression( 4514 exp.JSONObject, 4515 expressions=expressions, 4516 null_handling=null_handling, 4517 unique_keys=unique_keys, 4518 return_type=return_type, 4519 encoding=encoding, 4520 ) 4521 4522 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4523 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4524 if not self._match_text_seq("NESTED"): 4525 this = self._parse_id_var() 4526 kind = self._parse_types(allow_identifiers=False) 4527 nested = None 4528 else: 4529 this = None 4530 kind = None 4531 nested = True 4532 4533 path = self._match_text_seq("PATH") and self._parse_string() 4534 nested_schema = nested and self._parse_json_schema() 4535 4536 return self.expression( 4537 exp.JSONColumnDef, 4538 this=this, 4539 kind=kind, 4540 path=path, 4541 nested_schema=nested_schema, 4542 ) 4543 4544 def _parse_json_schema(self) -> exp.JSONSchema: 4545 self._match_text_seq("COLUMNS") 4546 return self.expression( 4547 exp.JSONSchema, 4548 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4549 ) 4550 4551 def _parse_json_table(self) -> exp.JSONTable: 4552 this = self._parse_format_json(self._parse_bitwise()) 4553 path = self._match(TokenType.COMMA) and self._parse_string() 4554 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4555 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4556 schema = self._parse_json_schema() 4557 4558 return exp.JSONTable( 4559 this=this, 4560 schema=schema, 4561 path=path, 4562 error_handling=error_handling, 4563 empty_handling=empty_handling, 4564 ) 4565 4566 def _parse_logarithm(self) -> exp.Func: 4567 # Default argument order is base, expression 4568 args = self._parse_csv(self._parse_range) 4569 4570 if len(args) > 1: 4571 if not self.LOG_BASE_FIRST: 4572 args.reverse() 4573 return exp.Log.from_arg_list(args) 4574 4575 return self.expression( 4576 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4577 ) 4578 4579 def _parse_match_against(self) -> exp.MatchAgainst: 4580 expressions = self._parse_csv(self._parse_column) 4581 4582 self._match_text_seq(")", "AGAINST", "(") 4583 4584 this = self._parse_string() 4585 4586 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4587 modifier = "IN NATURAL LANGUAGE MODE" 4588 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4589 modifier = f"{modifier} WITH QUERY EXPANSION" 4590 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4591 modifier = "IN BOOLEAN MODE" 4592 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4593 modifier = "WITH QUERY EXPANSION" 4594 else: 4595 modifier = None 4596 4597 return self.expression( 4598 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4599 ) 4600 4601 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4602 def _parse_open_json(self) -> exp.OpenJSON: 4603 this = self._parse_bitwise() 4604 path = self._match(TokenType.COMMA) and self._parse_string() 4605 4606 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4607 this = self._parse_field(any_token=True) 4608 kind = self._parse_types() 4609 path = self._parse_string() 4610 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4611 4612 return self.expression( 4613 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4614 ) 4615 4616 expressions = None 4617 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4618 self._match_l_paren() 4619 expressions = self._parse_csv(_parse_open_json_column_def) 4620 4621 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4622 4623 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4624 args = self._parse_csv(self._parse_bitwise) 4625 4626 if self._match(TokenType.IN): 4627 return self.expression( 4628 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4629 ) 4630 4631 if haystack_first: 4632 haystack = seq_get(args, 0) 4633 needle = seq_get(args, 1) 4634 else: 4635 needle = seq_get(args, 0) 4636 haystack = seq_get(args, 1) 4637 4638 return self.expression( 4639 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4640 ) 4641 4642 def _parse_predict(self) -> exp.Predict: 4643 self._match_text_seq("MODEL") 4644 this = self._parse_table() 4645 4646 self._match(TokenType.COMMA) 4647 self._match_text_seq("TABLE") 4648 4649 return self.expression( 4650 exp.Predict, 4651 this=this, 4652 expression=self._parse_table(), 4653 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4654 ) 4655 4656 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4657 args = self._parse_csv(self._parse_table) 4658 return exp.JoinHint(this=func_name.upper(), expressions=args) 4659 4660 def _parse_substring(self) -> exp.Substring: 4661 # Postgres supports the form: substring(string [from int] [for int]) 4662 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4663 4664 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4665 4666 if self._match(TokenType.FROM): 4667 args.append(self._parse_bitwise()) 4668 if self._match(TokenType.FOR): 4669 args.append(self._parse_bitwise()) 4670 4671 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4672 4673 def _parse_trim(self) -> exp.Trim: 4674 # https://www.w3resource.com/sql/character-functions/trim.php 4675 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4676 4677 position = None 4678 collation = None 4679 expression = None 4680 4681 if self._match_texts(self.TRIM_TYPES): 4682 position = self._prev.text.upper() 4683 4684 this = self._parse_bitwise() 4685 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4686 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4687 expression = self._parse_bitwise() 4688 4689 if invert_order: 4690 this, expression = expression, this 4691 4692 if self._match(TokenType.COLLATE): 4693 collation = self._parse_bitwise() 4694 4695 return self.expression( 4696 exp.Trim, this=this, position=position, expression=expression, collation=collation 4697 ) 4698 4699 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4700 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4701 4702 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4703 return self._parse_window(self._parse_id_var(), alias=True) 4704 4705 def _parse_respect_or_ignore_nulls( 4706 self, this: t.Optional[exp.Expression] 4707 ) -> t.Optional[exp.Expression]: 4708 if self._match_text_seq("IGNORE", "NULLS"): 4709 return self.expression(exp.IgnoreNulls, this=this) 4710 if self._match_text_seq("RESPECT", "NULLS"): 4711 return self.expression(exp.RespectNulls, this=this) 4712 return this 4713 4714 def _parse_window( 4715 self, this: t.Optional[exp.Expression], alias: bool = False 4716 ) -> t.Optional[exp.Expression]: 4717 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4718 self._match(TokenType.WHERE) 4719 this = self.expression( 4720 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4721 ) 4722 self._match_r_paren() 4723 4724 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4725 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4726 if self._match_text_seq("WITHIN", "GROUP"): 4727 order = self._parse_wrapped(self._parse_order) 4728 this = self.expression(exp.WithinGroup, this=this, expression=order) 4729 4730 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4731 # Some dialects choose to implement and some do not. 4732 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4733 4734 # There is some code above in _parse_lambda that handles 4735 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4736 4737 # The below changes handle 4738 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4739 4740 # Oracle allows both formats 4741 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4742 # and Snowflake chose to do the same for familiarity 4743 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4744 this = self._parse_respect_or_ignore_nulls(this) 4745 4746 # bigquery select from window x AS (partition by ...) 4747 if alias: 4748 over = None 4749 self._match(TokenType.ALIAS) 4750 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4751 return this 4752 else: 4753 over = self._prev.text.upper() 4754 4755 if not self._match(TokenType.L_PAREN): 4756 return self.expression( 4757 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4758 ) 4759 4760 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4761 4762 first = self._match(TokenType.FIRST) 4763 if self._match_text_seq("LAST"): 4764 first = False 4765 4766 partition, order = self._parse_partition_and_order() 4767 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4768 4769 if kind: 4770 self._match(TokenType.BETWEEN) 4771 start = self._parse_window_spec() 4772 self._match(TokenType.AND) 4773 end = self._parse_window_spec() 4774 4775 spec = self.expression( 4776 exp.WindowSpec, 4777 kind=kind, 4778 start=start["value"], 4779 start_side=start["side"], 4780 end=end["value"], 4781 end_side=end["side"], 4782 ) 4783 else: 4784 spec = None 4785 4786 self._match_r_paren() 4787 4788 window = self.expression( 4789 exp.Window, 4790 this=this, 4791 partition_by=partition, 4792 order=order, 4793 spec=spec, 4794 alias=window_alias, 4795 over=over, 4796 first=first, 4797 ) 4798 4799 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4800 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4801 return self._parse_window(window, alias=alias) 4802 4803 return window 4804 4805 def _parse_partition_and_order( 4806 self, 4807 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4808 return self._parse_partition_by(), self._parse_order() 4809 4810 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4811 self._match(TokenType.BETWEEN) 4812 4813 return { 4814 "value": ( 4815 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4816 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4817 or self._parse_bitwise() 4818 ), 4819 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4820 } 4821 4822 def _parse_alias( 4823 self, this: t.Optional[exp.Expression], explicit: bool = False 4824 ) -> t.Optional[exp.Expression]: 4825 any_token = self._match(TokenType.ALIAS) 4826 4827 if explicit and not any_token: 4828 return this 4829 4830 if self._match(TokenType.L_PAREN): 4831 aliases = self.expression( 4832 exp.Aliases, 4833 this=this, 4834 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4835 ) 4836 self._match_r_paren(aliases) 4837 return aliases 4838 4839 alias = self._parse_id_var(any_token) 4840 4841 if alias: 4842 return self.expression(exp.Alias, this=this, alias=alias) 4843 4844 return this 4845 4846 def _parse_id_var( 4847 self, 4848 any_token: bool = True, 4849 tokens: t.Optional[t.Collection[TokenType]] = None, 4850 ) -> t.Optional[exp.Expression]: 4851 identifier = self._parse_identifier() 4852 4853 if identifier: 4854 return identifier 4855 4856 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4857 quoted = self._prev.token_type == TokenType.STRING 4858 return exp.Identifier(this=self._prev.text, quoted=quoted) 4859 4860 return None 4861 4862 def _parse_string(self) -> t.Optional[exp.Expression]: 4863 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4864 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4865 return self._parse_placeholder() 4866 4867 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4868 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4869 4870 def _parse_number(self) -> t.Optional[exp.Expression]: 4871 if self._match(TokenType.NUMBER): 4872 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4873 return self._parse_placeholder() 4874 4875 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4876 if self._match(TokenType.IDENTIFIER): 4877 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4878 return self._parse_placeholder() 4879 4880 def _parse_var( 4881 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4882 ) -> t.Optional[exp.Expression]: 4883 if ( 4884 (any_token and self._advance_any()) 4885 or self._match(TokenType.VAR) 4886 or (self._match_set(tokens) if tokens else False) 4887 ): 4888 return self.expression(exp.Var, this=self._prev.text) 4889 return self._parse_placeholder() 4890 4891 def _advance_any(self) -> t.Optional[Token]: 4892 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4893 self._advance() 4894 return self._prev 4895 return None 4896 4897 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4898 return self._parse_var() or self._parse_string() 4899 4900 def _parse_null(self) -> t.Optional[exp.Expression]: 4901 if self._match_set(self.NULL_TOKENS): 4902 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4903 return self._parse_placeholder() 4904 4905 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4906 if self._match(TokenType.TRUE): 4907 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4908 if self._match(TokenType.FALSE): 4909 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4910 return self._parse_placeholder() 4911 4912 def _parse_star(self) -> t.Optional[exp.Expression]: 4913 if self._match(TokenType.STAR): 4914 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4915 return self._parse_placeholder() 4916 4917 def _parse_parameter(self) -> exp.Parameter: 4918 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4919 return ( 4920 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4921 ) 4922 4923 self._match(TokenType.L_BRACE) 4924 this = _parse_parameter_part() 4925 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4926 self._match(TokenType.R_BRACE) 4927 4928 return self.expression(exp.Parameter, this=this, expression=expression) 4929 4930 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4931 if self._match_set(self.PLACEHOLDER_PARSERS): 4932 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4933 if placeholder: 4934 return placeholder 4935 self._advance(-1) 4936 return None 4937 4938 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4939 if not self._match(TokenType.EXCEPT): 4940 return None 4941 if self._match(TokenType.L_PAREN, advance=False): 4942 return self._parse_wrapped_csv(self._parse_column) 4943 4944 except_column = self._parse_column() 4945 return [except_column] if except_column else None 4946 4947 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4948 if not self._match(TokenType.REPLACE): 4949 return None 4950 if self._match(TokenType.L_PAREN, advance=False): 4951 return self._parse_wrapped_csv(self._parse_expression) 4952 4953 replace_expression = self._parse_expression() 4954 return [replace_expression] if replace_expression else None 4955 4956 def _parse_csv( 4957 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4958 ) -> t.List[exp.Expression]: 4959 parse_result = parse_method() 4960 items = [parse_result] if parse_result is not None else [] 4961 4962 while self._match(sep): 4963 self._add_comments(parse_result) 4964 parse_result = parse_method() 4965 if parse_result is not None: 4966 items.append(parse_result) 4967 4968 return items 4969 4970 def _parse_tokens( 4971 self, parse_method: t.Callable, expressions: t.Dict 4972 ) -> t.Optional[exp.Expression]: 4973 this = parse_method() 4974 4975 while self._match_set(expressions): 4976 this = self.expression( 4977 expressions[self._prev.token_type], 4978 this=this, 4979 comments=self._prev_comments, 4980 expression=parse_method(), 4981 ) 4982 4983 return this 4984 4985 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4986 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4987 4988 def _parse_wrapped_csv( 4989 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4990 ) -> t.List[exp.Expression]: 4991 return self._parse_wrapped( 4992 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4993 ) 4994 4995 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4996 wrapped = self._match(TokenType.L_PAREN) 4997 if not wrapped and not optional: 4998 self.raise_error("Expecting (") 4999 parse_result = parse_method() 5000 if wrapped: 5001 self._match_r_paren() 5002 return parse_result 5003 5004 def _parse_expressions(self) -> t.List[exp.Expression]: 5005 return self._parse_csv(self._parse_expression) 5006 5007 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5008 return self._parse_select() or self._parse_set_operations( 5009 self._parse_expression() if alias else self._parse_conjunction() 5010 ) 5011 5012 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5013 return self._parse_query_modifiers( 5014 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5015 ) 5016 5017 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5018 this = None 5019 if self._match_texts(self.TRANSACTION_KIND): 5020 this = self._prev.text 5021 5022 self._match_texts(("TRANSACTION", "WORK")) 5023 5024 modes = [] 5025 while True: 5026 mode = [] 5027 while self._match(TokenType.VAR): 5028 mode.append(self._prev.text) 5029 5030 if mode: 5031 modes.append(" ".join(mode)) 5032 if not self._match(TokenType.COMMA): 5033 break 5034 5035 return self.expression(exp.Transaction, this=this, modes=modes) 5036 5037 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5038 chain = None 5039 savepoint = None 5040 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5041 5042 self._match_texts(("TRANSACTION", "WORK")) 5043 5044 if self._match_text_seq("TO"): 5045 self._match_text_seq("SAVEPOINT") 5046 savepoint = self._parse_id_var() 5047 5048 if self._match(TokenType.AND): 5049 chain = not self._match_text_seq("NO") 5050 self._match_text_seq("CHAIN") 5051 5052 if is_rollback: 5053 return self.expression(exp.Rollback, savepoint=savepoint) 5054 5055 return self.expression(exp.Commit, chain=chain) 5056 5057 def _parse_refresh(self) -> exp.Refresh: 5058 self._match(TokenType.TABLE) 5059 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5060 5061 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5062 if not self._match_text_seq("ADD"): 5063 return None 5064 5065 self._match(TokenType.COLUMN) 5066 exists_column = self._parse_exists(not_=True) 5067 expression = self._parse_field_def() 5068 5069 if expression: 5070 expression.set("exists", exists_column) 5071 5072 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5073 if self._match_texts(("FIRST", "AFTER")): 5074 position = self._prev.text 5075 column_position = self.expression( 5076 exp.ColumnPosition, this=self._parse_column(), position=position 5077 ) 5078 expression.set("position", column_position) 5079 5080 return expression 5081 5082 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5083 drop = self._match(TokenType.DROP) and self._parse_drop() 5084 if drop and not isinstance(drop, exp.Command): 5085 drop.set("kind", drop.args.get("kind", "COLUMN")) 5086 return drop 5087 5088 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5089 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5090 return self.expression( 5091 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5092 ) 5093 5094 def _parse_add_constraint(self) -> exp.AddConstraint: 5095 this = None 5096 kind = self._prev.token_type 5097 5098 if kind == TokenType.CONSTRAINT: 5099 this = self._parse_id_var() 5100 5101 if self._match_text_seq("CHECK"): 5102 expression = self._parse_wrapped(self._parse_conjunction) 5103 enforced = self._match_text_seq("ENFORCED") 5104 5105 return self.expression( 5106 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5107 ) 5108 5109 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5110 expression = self._parse_foreign_key() 5111 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5112 expression = self._parse_primary_key() 5113 else: 5114 expression = None 5115 5116 return self.expression(exp.AddConstraint, this=this, expression=expression) 5117 5118 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5119 index = self._index - 1 5120 5121 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5122 return self._parse_csv(self._parse_add_constraint) 5123 5124 self._retreat(index) 5125 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5126 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5127 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5128 5129 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5130 self._match(TokenType.COLUMN) 5131 column = self._parse_field(any_token=True) 5132 5133 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5134 return self.expression(exp.AlterColumn, this=column, drop=True) 5135 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5136 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5137 5138 self._match_text_seq("SET", "DATA") 5139 return self.expression( 5140 exp.AlterColumn, 5141 this=column, 5142 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5143 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5144 using=self._match(TokenType.USING) and self._parse_conjunction(), 5145 ) 5146 5147 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5148 index = self._index - 1 5149 5150 partition_exists = self._parse_exists() 5151 if self._match(TokenType.PARTITION, advance=False): 5152 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5153 5154 self._retreat(index) 5155 return self._parse_csv(self._parse_drop_column) 5156 5157 def _parse_alter_table_rename(self) -> exp.RenameTable: 5158 self._match_text_seq("TO") 5159 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5160 5161 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5162 start = self._prev 5163 5164 if not self._match(TokenType.TABLE): 5165 return self._parse_as_command(start) 5166 5167 exists = self._parse_exists() 5168 only = self._match_text_seq("ONLY") 5169 this = self._parse_table(schema=True) 5170 5171 if self._next: 5172 self._advance() 5173 5174 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5175 if parser: 5176 actions = ensure_list(parser(self)) 5177 5178 if not self._curr: 5179 return self.expression( 5180 exp.AlterTable, 5181 this=this, 5182 exists=exists, 5183 actions=actions, 5184 only=only, 5185 ) 5186 5187 return self._parse_as_command(start) 5188 5189 def _parse_merge(self) -> exp.Merge: 5190 self._match(TokenType.INTO) 5191 target = self._parse_table() 5192 5193 if target and self._match(TokenType.ALIAS, advance=False): 5194 target.set("alias", self._parse_table_alias()) 5195 5196 self._match(TokenType.USING) 5197 using = self._parse_table() 5198 5199 self._match(TokenType.ON) 5200 on = self._parse_conjunction() 5201 5202 return self.expression( 5203 exp.Merge, 5204 this=target, 5205 using=using, 5206 on=on, 5207 expressions=self._parse_when_matched(), 5208 ) 5209 5210 def _parse_when_matched(self) -> t.List[exp.When]: 5211 whens = [] 5212 5213 while self._match(TokenType.WHEN): 5214 matched = not self._match(TokenType.NOT) 5215 self._match_text_seq("MATCHED") 5216 source = ( 5217 False 5218 if self._match_text_seq("BY", "TARGET") 5219 else self._match_text_seq("BY", "SOURCE") 5220 ) 5221 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5222 5223 self._match(TokenType.THEN) 5224 5225 if self._match(TokenType.INSERT): 5226 _this = self._parse_star() 5227 if _this: 5228 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5229 else: 5230 then = self.expression( 5231 exp.Insert, 5232 this=self._parse_value(), 5233 expression=self._match(TokenType.VALUES) and self._parse_value(), 5234 ) 5235 elif self._match(TokenType.UPDATE): 5236 expressions = self._parse_star() 5237 if expressions: 5238 then = self.expression(exp.Update, expressions=expressions) 5239 else: 5240 then = self.expression( 5241 exp.Update, 5242 expressions=self._match(TokenType.SET) 5243 and self._parse_csv(self._parse_equality), 5244 ) 5245 elif self._match(TokenType.DELETE): 5246 then = self.expression(exp.Var, this=self._prev.text) 5247 else: 5248 then = None 5249 5250 whens.append( 5251 self.expression( 5252 exp.When, 5253 matched=matched, 5254 source=source, 5255 condition=condition, 5256 then=then, 5257 ) 5258 ) 5259 return whens 5260 5261 def _parse_show(self) -> t.Optional[exp.Expression]: 5262 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5263 if parser: 5264 return parser(self) 5265 return self._parse_as_command(self._prev) 5266 5267 def _parse_set_item_assignment( 5268 self, kind: t.Optional[str] = None 5269 ) -> t.Optional[exp.Expression]: 5270 index = self._index 5271 5272 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5273 return self._parse_set_transaction(global_=kind == "GLOBAL") 5274 5275 left = self._parse_primary() or self._parse_id_var() 5276 assignment_delimiter = self._match_texts(("=", "TO")) 5277 5278 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5279 self._retreat(index) 5280 return None 5281 5282 right = self._parse_statement() or self._parse_id_var() 5283 this = self.expression(exp.EQ, this=left, expression=right) 5284 5285 return self.expression(exp.SetItem, this=this, kind=kind) 5286 5287 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5288 self._match_text_seq("TRANSACTION") 5289 characteristics = self._parse_csv( 5290 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5291 ) 5292 return self.expression( 5293 exp.SetItem, 5294 expressions=characteristics, 5295 kind="TRANSACTION", 5296 **{"global": global_}, # type: ignore 5297 ) 5298 5299 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5300 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5301 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5302 5303 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5304 index = self._index 5305 set_ = self.expression( 5306 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5307 ) 5308 5309 if self._curr: 5310 self._retreat(index) 5311 return self._parse_as_command(self._prev) 5312 5313 return set_ 5314 5315 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5316 for option in options: 5317 if self._match_text_seq(*option.split(" ")): 5318 return exp.var(option) 5319 return None 5320 5321 def _parse_as_command(self, start: Token) -> exp.Command: 5322 while self._curr: 5323 self._advance() 5324 text = self._find_sql(start, self._prev) 5325 size = len(start.text) 5326 return exp.Command(this=text[:size], expression=text[size:]) 5327 5328 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5329 settings = [] 5330 5331 self._match_l_paren() 5332 kind = self._parse_id_var() 5333 5334 if self._match(TokenType.L_PAREN): 5335 while True: 5336 key = self._parse_id_var() 5337 value = self._parse_primary() 5338 5339 if not key and value is None: 5340 break 5341 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5342 self._match(TokenType.R_PAREN) 5343 5344 self._match_r_paren() 5345 5346 return self.expression( 5347 exp.DictProperty, 5348 this=this, 5349 kind=kind.this if kind else None, 5350 settings=settings, 5351 ) 5352 5353 def _parse_dict_range(self, this: str) -> exp.DictRange: 5354 self._match_l_paren() 5355 has_min = self._match_text_seq("MIN") 5356 if has_min: 5357 min = self._parse_var() or self._parse_primary() 5358 self._match_text_seq("MAX") 5359 max = self._parse_var() or self._parse_primary() 5360 else: 5361 max = self._parse_var() or self._parse_primary() 5362 min = exp.Literal.number(0) 5363 self._match_r_paren() 5364 return self.expression(exp.DictRange, this=this, min=min, max=max) 5365 5366 def _parse_comprehension( 5367 self, this: t.Optional[exp.Expression] 5368 ) -> t.Optional[exp.Comprehension]: 5369 index = self._index 5370 expression = self._parse_column() 5371 if not self._match(TokenType.IN): 5372 self._retreat(index - 1) 5373 return None 5374 iterator = self._parse_column() 5375 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5376 return self.expression( 5377 exp.Comprehension, 5378 this=this, 5379 expression=expression, 5380 iterator=iterator, 5381 condition=condition, 5382 ) 5383 5384 def _find_parser( 5385 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5386 ) -> t.Optional[t.Callable]: 5387 if not self._curr: 5388 return None 5389 5390 index = self._index 5391 this = [] 5392 while True: 5393 # The current token might be multiple words 5394 curr = self._curr.text.upper() 5395 key = curr.split(" ") 5396 this.append(curr) 5397 5398 self._advance() 5399 result, trie = in_trie(trie, key) 5400 if result == TrieResult.FAILED: 5401 break 5402 5403 if result == TrieResult.EXISTS: 5404 subparser = parsers[" ".join(this)] 5405 return subparser 5406 5407 self._retreat(index) 5408 return None 5409 5410 def _match(self, token_type, advance=True, expression=None): 5411 if not self._curr: 5412 return None 5413 5414 if self._curr.token_type == token_type: 5415 if advance: 5416 self._advance() 5417 self._add_comments(expression) 5418 return True 5419 5420 return None 5421 5422 def _match_set(self, types, advance=True): 5423 if not self._curr: 5424 return None 5425 5426 if self._curr.token_type in types: 5427 if advance: 5428 self._advance() 5429 return True 5430 5431 return None 5432 5433 def _match_pair(self, token_type_a, token_type_b, advance=True): 5434 if not self._curr or not self._next: 5435 return None 5436 5437 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5438 if advance: 5439 self._advance(2) 5440 return True 5441 5442 return None 5443 5444 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5445 if not self._match(TokenType.L_PAREN, expression=expression): 5446 self.raise_error("Expecting (") 5447 5448 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5449 if not self._match(TokenType.R_PAREN, expression=expression): 5450 self.raise_error("Expecting )") 5451 5452 def _match_texts(self, texts, advance=True): 5453 if self._curr and self._curr.text.upper() in texts: 5454 if advance: 5455 self._advance() 5456 return True 5457 return False 5458 5459 def _match_text_seq(self, *texts, advance=True): 5460 index = self._index 5461 for text in texts: 5462 if self._curr and self._curr.text.upper() == text: 5463 self._advance() 5464 else: 5465 self._retreat(index) 5466 return False 5467 5468 if not advance: 5469 self._retreat(index) 5470 5471 return True 5472 5473 @t.overload 5474 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5475 ... 5476 5477 @t.overload 5478 def _replace_columns_with_dots( 5479 self, this: t.Optional[exp.Expression] 5480 ) -> t.Optional[exp.Expression]: 5481 ... 5482 5483 def _replace_columns_with_dots(self, this): 5484 if isinstance(this, exp.Dot): 5485 exp.replace_children(this, self._replace_columns_with_dots) 5486 elif isinstance(this, exp.Column): 5487 exp.replace_children(this, self._replace_columns_with_dots) 5488 table = this.args.get("table") 5489 this = ( 5490 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5491 ) 5492 5493 return this 5494 5495 def _replace_lambda( 5496 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5497 ) -> t.Optional[exp.Expression]: 5498 if not node: 5499 return node 5500 5501 for column in node.find_all(exp.Column): 5502 if column.parts[0].name in lambda_variables: 5503 dot_or_id = column.to_dot() if column.table else column.this 5504 parent = column.parent 5505 5506 while isinstance(parent, exp.Dot): 5507 if not isinstance(parent.parent, exp.Dot): 5508 parent.replace(dot_or_id) 5509 break 5510 parent = parent.parent 5511 else: 5512 if column is node: 5513 node = dot_or_id 5514 else: 5515 column.replace(dot_or_id) 5516 return node 5517 5518 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5519 return [ 5520 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5521 for value in values 5522 if value 5523 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.CONSTRAINT, 250 TokenType.FUNCTION, 251 TokenType.INDEX, 252 TokenType.PROCEDURE, 253 TokenType.FOREIGN_KEY, 254 *DB_CREATABLES, 255 } 256 257 # Tokens that can represent identifiers 258 ID_VAR_TOKENS = { 259 TokenType.VAR, 260 TokenType.ANTI, 261 TokenType.APPLY, 262 TokenType.ASC, 263 TokenType.AUTO_INCREMENT, 264 TokenType.BEGIN, 265 TokenType.CACHE, 266 TokenType.CASE, 267 TokenType.COLLATE, 268 TokenType.COMMAND, 269 TokenType.COMMENT, 270 TokenType.COMMIT, 271 TokenType.CONSTRAINT, 272 TokenType.DEFAULT, 273 TokenType.DELETE, 274 TokenType.DESC, 275 TokenType.DESCRIBE, 276 TokenType.DICTIONARY, 277 TokenType.DIV, 278 TokenType.END, 279 TokenType.EXECUTE, 280 TokenType.ESCAPE, 281 TokenType.FALSE, 282 TokenType.FIRST, 283 TokenType.FILTER, 284 TokenType.FORMAT, 285 TokenType.FULL, 286 TokenType.IS, 287 TokenType.ISNULL, 288 TokenType.INTERVAL, 289 TokenType.KEEP, 290 TokenType.KILL, 291 TokenType.LEFT, 292 TokenType.LOAD, 293 TokenType.MERGE, 294 TokenType.NATURAL, 295 TokenType.NEXT, 296 TokenType.OFFSET, 297 TokenType.OPERATOR, 298 TokenType.ORDINALITY, 299 TokenType.OVERLAPS, 300 TokenType.OVERWRITE, 301 TokenType.PARTITION, 302 TokenType.PERCENT, 303 TokenType.PIVOT, 304 TokenType.PRAGMA, 305 TokenType.RANGE, 306 TokenType.RECURSIVE, 307 TokenType.REFERENCES, 308 TokenType.REFRESH, 309 TokenType.RIGHT, 310 TokenType.ROW, 311 TokenType.ROWS, 312 TokenType.SEMI, 313 TokenType.SET, 314 TokenType.SETTINGS, 315 TokenType.SHOW, 316 TokenType.TEMPORARY, 317 TokenType.TOP, 318 TokenType.TRUE, 319 TokenType.UNIQUE, 320 TokenType.UNPIVOT, 321 TokenType.UPDATE, 322 TokenType.USE, 323 TokenType.VOLATILE, 324 TokenType.WINDOW, 325 *CREATABLES, 326 *SUBQUERY_PREDICATES, 327 *TYPE_TOKENS, 328 *NO_PAREN_FUNCTIONS, 329 } 330 331 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 332 333 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 334 TokenType.ANTI, 335 TokenType.APPLY, 336 TokenType.ASOF, 337 TokenType.FULL, 338 TokenType.LEFT, 339 TokenType.LOCK, 340 TokenType.NATURAL, 341 TokenType.OFFSET, 342 TokenType.RIGHT, 343 TokenType.SEMI, 344 TokenType.WINDOW, 345 } 346 347 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 348 349 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 350 351 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 352 353 FUNC_TOKENS = { 354 TokenType.COLLATE, 355 TokenType.COMMAND, 356 TokenType.CURRENT_DATE, 357 TokenType.CURRENT_DATETIME, 358 TokenType.CURRENT_TIMESTAMP, 359 TokenType.CURRENT_TIME, 360 TokenType.CURRENT_USER, 361 TokenType.FILTER, 362 TokenType.FIRST, 363 TokenType.FORMAT, 364 TokenType.GLOB, 365 TokenType.IDENTIFIER, 366 TokenType.INDEX, 367 TokenType.ISNULL, 368 TokenType.ILIKE, 369 TokenType.INSERT, 370 TokenType.LIKE, 371 TokenType.MERGE, 372 TokenType.OFFSET, 373 TokenType.PRIMARY_KEY, 374 TokenType.RANGE, 375 TokenType.REPLACE, 376 TokenType.RLIKE, 377 TokenType.ROW, 378 TokenType.UNNEST, 379 TokenType.VAR, 380 TokenType.LEFT, 381 TokenType.RIGHT, 382 TokenType.DATE, 383 TokenType.DATETIME, 384 TokenType.TABLE, 385 TokenType.TIMESTAMP, 386 TokenType.TIMESTAMPTZ, 387 TokenType.WINDOW, 388 TokenType.XOR, 389 *TYPE_TOKENS, 390 *SUBQUERY_PREDICATES, 391 } 392 393 CONJUNCTION = { 394 TokenType.AND: exp.And, 395 TokenType.OR: exp.Or, 396 } 397 398 EQUALITY = { 399 TokenType.COLON_EQ: exp.PropertyEQ, 400 TokenType.EQ: exp.EQ, 401 TokenType.NEQ: exp.NEQ, 402 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 403 } 404 405 COMPARISON = { 406 TokenType.GT: exp.GT, 407 TokenType.GTE: exp.GTE, 408 TokenType.LT: exp.LT, 409 TokenType.LTE: exp.LTE, 410 } 411 412 BITWISE = { 413 TokenType.AMP: exp.BitwiseAnd, 414 TokenType.CARET: exp.BitwiseXor, 415 TokenType.PIPE: exp.BitwiseOr, 416 TokenType.DPIPE: exp.DPipe, 417 } 418 419 TERM = { 420 TokenType.DASH: exp.Sub, 421 TokenType.PLUS: exp.Add, 422 TokenType.MOD: exp.Mod, 423 TokenType.COLLATE: exp.Collate, 424 } 425 426 FACTOR = { 427 TokenType.DIV: exp.IntDiv, 428 TokenType.LR_ARROW: exp.Distance, 429 TokenType.SLASH: exp.Div, 430 TokenType.STAR: exp.Mul, 431 } 432 433 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 434 435 TIMES = { 436 TokenType.TIME, 437 TokenType.TIMETZ, 438 } 439 440 TIMESTAMPS = { 441 TokenType.TIMESTAMP, 442 TokenType.TIMESTAMPTZ, 443 TokenType.TIMESTAMPLTZ, 444 *TIMES, 445 } 446 447 SET_OPERATIONS = { 448 TokenType.UNION, 449 TokenType.INTERSECT, 450 TokenType.EXCEPT, 451 } 452 453 JOIN_METHODS = { 454 TokenType.NATURAL, 455 TokenType.ASOF, 456 } 457 458 JOIN_SIDES = { 459 TokenType.LEFT, 460 TokenType.RIGHT, 461 TokenType.FULL, 462 } 463 464 JOIN_KINDS = { 465 TokenType.INNER, 466 TokenType.OUTER, 467 TokenType.CROSS, 468 TokenType.SEMI, 469 TokenType.ANTI, 470 } 471 472 JOIN_HINTS: t.Set[str] = set() 473 474 LAMBDAS = { 475 TokenType.ARROW: lambda self, expressions: self.expression( 476 exp.Lambda, 477 this=self._replace_lambda( 478 self._parse_conjunction(), 479 {node.name for node in expressions}, 480 ), 481 expressions=expressions, 482 ), 483 TokenType.FARROW: lambda self, expressions: self.expression( 484 exp.Kwarg, 485 this=exp.var(expressions[0].name), 486 expression=self._parse_conjunction(), 487 ), 488 } 489 490 COLUMN_OPERATORS = { 491 TokenType.DOT: None, 492 TokenType.DCOLON: lambda self, this, to: self.expression( 493 exp.Cast if self.STRICT_CAST else exp.TryCast, 494 this=this, 495 to=to, 496 ), 497 TokenType.ARROW: lambda self, this, path: self.expression( 498 exp.JSONExtract, 499 this=this, 500 expression=path, 501 ), 502 TokenType.DARROW: lambda self, this, path: self.expression( 503 exp.JSONExtractScalar, 504 this=this, 505 expression=path, 506 ), 507 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 508 exp.JSONBExtract, 509 this=this, 510 expression=path, 511 ), 512 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 513 exp.JSONBExtractScalar, 514 this=this, 515 expression=path, 516 ), 517 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 518 exp.JSONBContains, 519 this=this, 520 expression=key, 521 ), 522 } 523 524 EXPRESSION_PARSERS = { 525 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 526 exp.Column: lambda self: self._parse_column(), 527 exp.Condition: lambda self: self._parse_conjunction(), 528 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 529 exp.Expression: lambda self: self._parse_statement(), 530 exp.From: lambda self: self._parse_from(), 531 exp.Group: lambda self: self._parse_group(), 532 exp.Having: lambda self: self._parse_having(), 533 exp.Identifier: lambda self: self._parse_id_var(), 534 exp.Join: lambda self: self._parse_join(), 535 exp.Lambda: lambda self: self._parse_lambda(), 536 exp.Lateral: lambda self: self._parse_lateral(), 537 exp.Limit: lambda self: self._parse_limit(), 538 exp.Offset: lambda self: self._parse_offset(), 539 exp.Order: lambda self: self._parse_order(), 540 exp.Ordered: lambda self: self._parse_ordered(), 541 exp.Properties: lambda self: self._parse_properties(), 542 exp.Qualify: lambda self: self._parse_qualify(), 543 exp.Returning: lambda self: self._parse_returning(), 544 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 545 exp.Table: lambda self: self._parse_table_parts(), 546 exp.TableAlias: lambda self: self._parse_table_alias(), 547 exp.Where: lambda self: self._parse_where(), 548 exp.Window: lambda self: self._parse_named_window(), 549 exp.With: lambda self: self._parse_with(), 550 "JOIN_TYPE": lambda self: self._parse_join_parts(), 551 } 552 553 STATEMENT_PARSERS = { 554 TokenType.ALTER: lambda self: self._parse_alter(), 555 TokenType.BEGIN: lambda self: self._parse_transaction(), 556 TokenType.CACHE: lambda self: self._parse_cache(), 557 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 558 TokenType.COMMENT: lambda self: self._parse_comment(), 559 TokenType.CREATE: lambda self: self._parse_create(), 560 TokenType.DELETE: lambda self: self._parse_delete(), 561 TokenType.DESC: lambda self: self._parse_describe(), 562 TokenType.DESCRIBE: lambda self: self._parse_describe(), 563 TokenType.DROP: lambda self: self._parse_drop(), 564 TokenType.INSERT: lambda self: self._parse_insert(), 565 TokenType.KILL: lambda self: self._parse_kill(), 566 TokenType.LOAD: lambda self: self._parse_load(), 567 TokenType.MERGE: lambda self: self._parse_merge(), 568 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 569 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 570 TokenType.REFRESH: lambda self: self._parse_refresh(), 571 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 572 TokenType.SET: lambda self: self._parse_set(), 573 TokenType.UNCACHE: lambda self: self._parse_uncache(), 574 TokenType.UPDATE: lambda self: self._parse_update(), 575 TokenType.USE: lambda self: self.expression( 576 exp.Use, 577 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 578 and exp.var(self._prev.text), 579 this=self._parse_table(schema=False), 580 ), 581 } 582 583 UNARY_PARSERS = { 584 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 585 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 586 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 587 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 588 } 589 590 PRIMARY_PARSERS = { 591 TokenType.STRING: lambda self, token: self.expression( 592 exp.Literal, this=token.text, is_string=True 593 ), 594 TokenType.NUMBER: lambda self, token: self.expression( 595 exp.Literal, this=token.text, is_string=False 596 ), 597 TokenType.STAR: lambda self, _: self.expression( 598 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 599 ), 600 TokenType.NULL: lambda self, _: self.expression(exp.Null), 601 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 602 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 603 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 604 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 605 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 606 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 607 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 608 exp.National, this=token.text 609 ), 610 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 611 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 612 exp.RawString, this=token.text 613 ), 614 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 615 } 616 617 PLACEHOLDER_PARSERS = { 618 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 619 TokenType.PARAMETER: lambda self: self._parse_parameter(), 620 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 621 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 622 else None, 623 } 624 625 RANGE_PARSERS = { 626 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 627 TokenType.GLOB: binary_range_parser(exp.Glob), 628 TokenType.ILIKE: binary_range_parser(exp.ILike), 629 TokenType.IN: lambda self, this: self._parse_in(this), 630 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 631 TokenType.IS: lambda self, this: self._parse_is(this), 632 TokenType.LIKE: binary_range_parser(exp.Like), 633 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 634 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 635 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 636 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 637 } 638 639 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 640 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 641 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 642 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 643 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 644 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 645 "CHECKSUM": lambda self: self._parse_checksum(), 646 "CLUSTER BY": lambda self: self._parse_cluster(), 647 "CLUSTERED": lambda self: self._parse_clustered_by(), 648 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 649 exp.CollateProperty, **kwargs 650 ), 651 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 652 "COPY": lambda self: self._parse_copy_property(), 653 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 654 "DEFINER": lambda self: self._parse_definer(), 655 "DETERMINISTIC": lambda self: self.expression( 656 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 657 ), 658 "DISTKEY": lambda self: self._parse_distkey(), 659 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 660 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 661 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 662 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 663 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 664 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 665 "FREESPACE": lambda self: self._parse_freespace(), 666 "HEAP": lambda self: self.expression(exp.HeapProperty), 667 "IMMUTABLE": lambda self: self.expression( 668 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 669 ), 670 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 671 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 672 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 673 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 674 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 675 "LIKE": lambda self: self._parse_create_like(), 676 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 677 "LOCK": lambda self: self._parse_locking(), 678 "LOCKING": lambda self: self._parse_locking(), 679 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 680 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 681 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 682 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 683 "NO": lambda self: self._parse_no_property(), 684 "ON": lambda self: self._parse_on_property(), 685 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 686 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 687 "PARTITION": lambda self: self._parse_partitioned_of(), 688 "PARTITION BY": lambda self: self._parse_partitioned_by(), 689 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 690 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 691 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 692 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 693 "REMOTE": lambda self: self._parse_remote_with_connection(), 694 "RETURNS": lambda self: self._parse_returns(), 695 "ROW": lambda self: self._parse_row(), 696 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 697 "SAMPLE": lambda self: self.expression( 698 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 699 ), 700 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 701 "SETTINGS": lambda self: self.expression( 702 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 703 ), 704 "SORTKEY": lambda self: self._parse_sortkey(), 705 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 706 "STABLE": lambda self: self.expression( 707 exp.StabilityProperty, this=exp.Literal.string("STABLE") 708 ), 709 "STORED": lambda self: self._parse_stored(), 710 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 711 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 712 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 713 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 714 "TO": lambda self: self._parse_to_table(), 715 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 716 "TRANSFORM": lambda self: self.expression( 717 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 718 ), 719 "TTL": lambda self: self._parse_ttl(), 720 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 721 "VOLATILE": lambda self: self._parse_volatile_property(), 722 "WITH": lambda self: self._parse_with_property(), 723 } 724 725 CONSTRAINT_PARSERS = { 726 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 727 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 728 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 729 "CHARACTER SET": lambda self: self.expression( 730 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 731 ), 732 "CHECK": lambda self: self.expression( 733 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 734 ), 735 "COLLATE": lambda self: self.expression( 736 exp.CollateColumnConstraint, this=self._parse_var() 737 ), 738 "COMMENT": lambda self: self.expression( 739 exp.CommentColumnConstraint, this=self._parse_string() 740 ), 741 "COMPRESS": lambda self: self._parse_compress(), 742 "CLUSTERED": lambda self: self.expression( 743 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 744 ), 745 "NONCLUSTERED": lambda self: self.expression( 746 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 747 ), 748 "DEFAULT": lambda self: self.expression( 749 exp.DefaultColumnConstraint, this=self._parse_bitwise() 750 ), 751 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 752 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 753 "FORMAT": lambda self: self.expression( 754 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 755 ), 756 "GENERATED": lambda self: self._parse_generated_as_identity(), 757 "IDENTITY": lambda self: self._parse_auto_increment(), 758 "INLINE": lambda self: self._parse_inline(), 759 "LIKE": lambda self: self._parse_create_like(), 760 "NOT": lambda self: self._parse_not_constraint(), 761 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 762 "ON": lambda self: ( 763 self._match(TokenType.UPDATE) 764 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 765 ) 766 or self.expression(exp.OnProperty, this=self._parse_id_var()), 767 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 768 "PERIOD": lambda self: self._parse_period_for_system_time(), 769 "PRIMARY KEY": lambda self: self._parse_primary_key(), 770 "REFERENCES": lambda self: self._parse_references(match=False), 771 "TITLE": lambda self: self.expression( 772 exp.TitleColumnConstraint, this=self._parse_var_or_string() 773 ), 774 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 775 "UNIQUE": lambda self: self._parse_unique(), 776 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 777 "WITH": lambda self: self.expression( 778 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 779 ), 780 } 781 782 ALTER_PARSERS = { 783 "ADD": lambda self: self._parse_alter_table_add(), 784 "ALTER": lambda self: self._parse_alter_table_alter(), 785 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 786 "DROP": lambda self: self._parse_alter_table_drop(), 787 "RENAME": lambda self: self._parse_alter_table_rename(), 788 } 789 790 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 791 792 NO_PAREN_FUNCTION_PARSERS = { 793 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 794 "CASE": lambda self: self._parse_case(), 795 "IF": lambda self: self._parse_if(), 796 "NEXT": lambda self: self._parse_next_value_for(), 797 } 798 799 INVALID_FUNC_NAME_TOKENS = { 800 TokenType.IDENTIFIER, 801 TokenType.STRING, 802 } 803 804 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 805 806 FUNCTION_PARSERS = { 807 "ANY_VALUE": lambda self: self._parse_any_value(), 808 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 809 "CONCAT": lambda self: self._parse_concat(), 810 "CONCAT_WS": lambda self: self._parse_concat_ws(), 811 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 812 "DECODE": lambda self: self._parse_decode(), 813 "EXTRACT": lambda self: self._parse_extract(), 814 "JSON_OBJECT": lambda self: self._parse_json_object(), 815 "JSON_TABLE": lambda self: self._parse_json_table(), 816 "LOG": lambda self: self._parse_logarithm(), 817 "MATCH": lambda self: self._parse_match_against(), 818 "OPENJSON": lambda self: self._parse_open_json(), 819 "POSITION": lambda self: self._parse_position(), 820 "PREDICT": lambda self: self._parse_predict(), 821 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 822 "STRING_AGG": lambda self: self._parse_string_agg(), 823 "SUBSTRING": lambda self: self._parse_substring(), 824 "TRIM": lambda self: self._parse_trim(), 825 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 826 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 827 } 828 829 QUERY_MODIFIER_PARSERS = { 830 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 831 TokenType.WHERE: lambda self: ("where", self._parse_where()), 832 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 833 TokenType.HAVING: lambda self: ("having", self._parse_having()), 834 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 835 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 836 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 837 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 838 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 839 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 840 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 841 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 842 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 843 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 844 TokenType.CLUSTER_BY: lambda self: ( 845 "cluster", 846 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 847 ), 848 TokenType.DISTRIBUTE_BY: lambda self: ( 849 "distribute", 850 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 851 ), 852 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 853 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 854 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 855 } 856 857 SET_PARSERS = { 858 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 859 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 860 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 861 "TRANSACTION": lambda self: self._parse_set_transaction(), 862 } 863 864 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 865 866 TYPE_LITERAL_PARSERS = { 867 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 868 } 869 870 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 871 872 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 873 874 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 875 876 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 877 TRANSACTION_CHARACTERISTICS = { 878 "ISOLATION LEVEL REPEATABLE READ", 879 "ISOLATION LEVEL READ COMMITTED", 880 "ISOLATION LEVEL READ UNCOMMITTED", 881 "ISOLATION LEVEL SERIALIZABLE", 882 "READ WRITE", 883 "READ ONLY", 884 } 885 886 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 887 888 CLONE_KEYWORDS = {"CLONE", "COPY"} 889 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 890 891 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 892 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 893 894 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 895 896 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 897 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 898 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 899 900 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 901 902 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 903 904 DISTINCT_TOKENS = {TokenType.DISTINCT} 905 906 NULL_TOKENS = {TokenType.NULL} 907 908 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 909 910 STRICT_CAST = True 911 912 # A NULL arg in CONCAT yields NULL by default 913 CONCAT_NULL_OUTPUTS_STRING = False 914 915 PREFIXED_PIVOT_COLUMNS = False 916 IDENTIFY_PIVOT_STRINGS = False 917 918 LOG_BASE_FIRST = True 919 LOG_DEFAULTS_TO_LN = False 920 921 # Whether or not ADD is present for each column added by ALTER TABLE 922 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 923 924 # Whether or not the table sample clause expects CSV syntax 925 TABLESAMPLE_CSV = False 926 927 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 928 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 929 930 # Whether the TRIM function expects the characters to trim as its first argument 931 TRIM_PATTERN_FIRST = False 932 933 # Whether the behavior of a / b depends on the types of a and b. 934 # False means a / b is always float division. 935 # True means a / b is integer division if both a and b are integers. 936 TYPED_DIVISION = False 937 938 # False means 1 / 0 throws an error. 939 # True means 1 / 0 returns null. 940 SAFE_DIVISION = False 941 942 __slots__ = ( 943 "error_level", 944 "error_message_context", 945 "max_errors", 946 "sql", 947 "errors", 948 "_tokens", 949 "_index", 950 "_curr", 951 "_next", 952 "_prev", 953 "_prev_comments", 954 "_tokenizer", 955 ) 956 957 # Autofilled 958 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 959 INDEX_OFFSET: int = 0 960 UNNEST_COLUMN_ONLY: bool = False 961 ALIAS_POST_TABLESAMPLE: bool = False 962 STRICT_STRING_CONCAT = False 963 SUPPORTS_USER_DEFINED_TYPES = True 964 NORMALIZE_FUNCTIONS = "upper" 965 NULL_ORDERING: str = "nulls_are_small" 966 SHOW_TRIE: t.Dict = {} 967 SET_TRIE: t.Dict = {} 968 FORMAT_MAPPING: t.Dict[str, str] = {} 969 FORMAT_TRIE: t.Dict = {} 970 TIME_MAPPING: t.Dict[str, str] = {} 971 TIME_TRIE: t.Dict = {} 972 973 def __init__( 974 self, 975 error_level: t.Optional[ErrorLevel] = None, 976 error_message_context: int = 100, 977 max_errors: int = 3, 978 ): 979 self.error_level = error_level or ErrorLevel.IMMEDIATE 980 self.error_message_context = error_message_context 981 self.max_errors = max_errors 982 self._tokenizer = self.TOKENIZER_CLASS() 983 self.reset() 984 985 def reset(self): 986 self.sql = "" 987 self.errors = [] 988 self._tokens = [] 989 self._index = 0 990 self._curr = None 991 self._next = None 992 self._prev = None 993 self._prev_comments = None 994 995 def parse( 996 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 997 ) -> t.List[t.Optional[exp.Expression]]: 998 """ 999 Parses a list of tokens and returns a list of syntax trees, one tree 1000 per parsed SQL statement. 1001 1002 Args: 1003 raw_tokens: The list of tokens. 1004 sql: The original SQL string, used to produce helpful debug messages. 1005 1006 Returns: 1007 The list of the produced syntax trees. 1008 """ 1009 return self._parse( 1010 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1011 ) 1012 1013 def parse_into( 1014 self, 1015 expression_types: exp.IntoType, 1016 raw_tokens: t.List[Token], 1017 sql: t.Optional[str] = None, 1018 ) -> t.List[t.Optional[exp.Expression]]: 1019 """ 1020 Parses a list of tokens into a given Expression type. If a collection of Expression 1021 types is given instead, this method will try to parse the token list into each one 1022 of them, stopping at the first for which the parsing succeeds. 1023 1024 Args: 1025 expression_types: The expression type(s) to try and parse the token list into. 1026 raw_tokens: The list of tokens. 1027 sql: The original SQL string, used to produce helpful debug messages. 1028 1029 Returns: 1030 The target Expression. 1031 """ 1032 errors = [] 1033 for expression_type in ensure_list(expression_types): 1034 parser = self.EXPRESSION_PARSERS.get(expression_type) 1035 if not parser: 1036 raise TypeError(f"No parser registered for {expression_type}") 1037 1038 try: 1039 return self._parse(parser, raw_tokens, sql) 1040 except ParseError as e: 1041 e.errors[0]["into_expression"] = expression_type 1042 errors.append(e) 1043 1044 raise ParseError( 1045 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1046 errors=merge_errors(errors), 1047 ) from errors[-1] 1048 1049 def _parse( 1050 self, 1051 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1052 raw_tokens: t.List[Token], 1053 sql: t.Optional[str] = None, 1054 ) -> t.List[t.Optional[exp.Expression]]: 1055 self.reset() 1056 self.sql = sql or "" 1057 1058 total = len(raw_tokens) 1059 chunks: t.List[t.List[Token]] = [[]] 1060 1061 for i, token in enumerate(raw_tokens): 1062 if token.token_type == TokenType.SEMICOLON: 1063 if i < total - 1: 1064 chunks.append([]) 1065 else: 1066 chunks[-1].append(token) 1067 1068 expressions = [] 1069 1070 for tokens in chunks: 1071 self._index = -1 1072 self._tokens = tokens 1073 self._advance() 1074 1075 expressions.append(parse_method(self)) 1076 1077 if self._index < len(self._tokens): 1078 self.raise_error("Invalid expression / Unexpected token") 1079 1080 self.check_errors() 1081 1082 return expressions 1083 1084 def check_errors(self) -> None: 1085 """Logs or raises any found errors, depending on the chosen error level setting.""" 1086 if self.error_level == ErrorLevel.WARN: 1087 for error in self.errors: 1088 logger.error(str(error)) 1089 elif self.error_level == ErrorLevel.RAISE and self.errors: 1090 raise ParseError( 1091 concat_messages(self.errors, self.max_errors), 1092 errors=merge_errors(self.errors), 1093 ) 1094 1095 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1096 """ 1097 Appends an error in the list of recorded errors or raises it, depending on the chosen 1098 error level setting. 1099 """ 1100 token = token or self._curr or self._prev or Token.string("") 1101 start = token.start 1102 end = token.end + 1 1103 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1104 highlight = self.sql[start:end] 1105 end_context = self.sql[end : end + self.error_message_context] 1106 1107 error = ParseError.new( 1108 f"{message}. Line {token.line}, Col: {token.col}.\n" 1109 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1110 description=message, 1111 line=token.line, 1112 col=token.col, 1113 start_context=start_context, 1114 highlight=highlight, 1115 end_context=end_context, 1116 ) 1117 1118 if self.error_level == ErrorLevel.IMMEDIATE: 1119 raise error 1120 1121 self.errors.append(error) 1122 1123 def expression( 1124 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1125 ) -> E: 1126 """ 1127 Creates a new, validated Expression. 1128 1129 Args: 1130 exp_class: The expression class to instantiate. 1131 comments: An optional list of comments to attach to the expression. 1132 kwargs: The arguments to set for the expression along with their respective values. 1133 1134 Returns: 1135 The target expression. 1136 """ 1137 instance = exp_class(**kwargs) 1138 instance.add_comments(comments) if comments else self._add_comments(instance) 1139 return self.validate_expression(instance) 1140 1141 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1142 if expression and self._prev_comments: 1143 expression.add_comments(self._prev_comments) 1144 self._prev_comments = None 1145 1146 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1147 """ 1148 Validates an Expression, making sure that all its mandatory arguments are set. 1149 1150 Args: 1151 expression: The expression to validate. 1152 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1153 1154 Returns: 1155 The validated expression. 1156 """ 1157 if self.error_level != ErrorLevel.IGNORE: 1158 for error_message in expression.error_messages(args): 1159 self.raise_error(error_message) 1160 1161 return expression 1162 1163 def _find_sql(self, start: Token, end: Token) -> str: 1164 return self.sql[start.start : end.end + 1] 1165 1166 def _advance(self, times: int = 1) -> None: 1167 self._index += times 1168 self._curr = seq_get(self._tokens, self._index) 1169 self._next = seq_get(self._tokens, self._index + 1) 1170 1171 if self._index > 0: 1172 self._prev = self._tokens[self._index - 1] 1173 self._prev_comments = self._prev.comments 1174 else: 1175 self._prev = None 1176 self._prev_comments = None 1177 1178 def _retreat(self, index: int) -> None: 1179 if index != self._index: 1180 self._advance(index - self._index) 1181 1182 def _parse_command(self) -> exp.Command: 1183 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1184 1185 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1186 start = self._prev 1187 exists = self._parse_exists() if allow_exists else None 1188 1189 self._match(TokenType.ON) 1190 1191 kind = self._match_set(self.CREATABLES) and self._prev 1192 if not kind: 1193 return self._parse_as_command(start) 1194 1195 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1196 this = self._parse_user_defined_function(kind=kind.token_type) 1197 elif kind.token_type == TokenType.TABLE: 1198 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1199 elif kind.token_type == TokenType.COLUMN: 1200 this = self._parse_column() 1201 else: 1202 this = self._parse_id_var() 1203 1204 self._match(TokenType.IS) 1205 1206 return self.expression( 1207 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1208 ) 1209 1210 def _parse_to_table( 1211 self, 1212 ) -> exp.ToTableProperty: 1213 table = self._parse_table_parts(schema=True) 1214 return self.expression(exp.ToTableProperty, this=table) 1215 1216 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1217 def _parse_ttl(self) -> exp.Expression: 1218 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1219 this = self._parse_bitwise() 1220 1221 if self._match_text_seq("DELETE"): 1222 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1223 if self._match_text_seq("RECOMPRESS"): 1224 return self.expression( 1225 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1226 ) 1227 if self._match_text_seq("TO", "DISK"): 1228 return self.expression( 1229 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1230 ) 1231 if self._match_text_seq("TO", "VOLUME"): 1232 return self.expression( 1233 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1234 ) 1235 1236 return this 1237 1238 expressions = self._parse_csv(_parse_ttl_action) 1239 where = self._parse_where() 1240 group = self._parse_group() 1241 1242 aggregates = None 1243 if group and self._match(TokenType.SET): 1244 aggregates = self._parse_csv(self._parse_set_item) 1245 1246 return self.expression( 1247 exp.MergeTreeTTL, 1248 expressions=expressions, 1249 where=where, 1250 group=group, 1251 aggregates=aggregates, 1252 ) 1253 1254 def _parse_statement(self) -> t.Optional[exp.Expression]: 1255 if self._curr is None: 1256 return None 1257 1258 if self._match_set(self.STATEMENT_PARSERS): 1259 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1260 1261 if self._match_set(Tokenizer.COMMANDS): 1262 return self._parse_command() 1263 1264 expression = self._parse_expression() 1265 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1266 return self._parse_query_modifiers(expression) 1267 1268 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1269 start = self._prev 1270 temporary = self._match(TokenType.TEMPORARY) 1271 materialized = self._match_text_seq("MATERIALIZED") 1272 1273 kind = self._match_set(self.CREATABLES) and self._prev.text 1274 if not kind: 1275 return self._parse_as_command(start) 1276 1277 return self.expression( 1278 exp.Drop, 1279 comments=start.comments, 1280 exists=exists or self._parse_exists(), 1281 this=self._parse_table(schema=True), 1282 kind=kind, 1283 temporary=temporary, 1284 materialized=materialized, 1285 cascade=self._match_text_seq("CASCADE"), 1286 constraints=self._match_text_seq("CONSTRAINTS"), 1287 purge=self._match_text_seq("PURGE"), 1288 ) 1289 1290 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1291 return ( 1292 self._match_text_seq("IF") 1293 and (not not_ or self._match(TokenType.NOT)) 1294 and self._match(TokenType.EXISTS) 1295 ) 1296 1297 def _parse_create(self) -> exp.Create | exp.Command: 1298 # Note: this can't be None because we've matched a statement parser 1299 start = self._prev 1300 comments = self._prev_comments 1301 1302 replace = start.text.upper() == "REPLACE" or self._match_pair( 1303 TokenType.OR, TokenType.REPLACE 1304 ) 1305 unique = self._match(TokenType.UNIQUE) 1306 1307 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1308 self._advance() 1309 1310 properties = None 1311 create_token = self._match_set(self.CREATABLES) and self._prev 1312 1313 if not create_token: 1314 # exp.Properties.Location.POST_CREATE 1315 properties = self._parse_properties() 1316 create_token = self._match_set(self.CREATABLES) and self._prev 1317 1318 if not properties or not create_token: 1319 return self._parse_as_command(start) 1320 1321 exists = self._parse_exists(not_=True) 1322 this = None 1323 expression: t.Optional[exp.Expression] = None 1324 indexes = None 1325 no_schema_binding = None 1326 begin = None 1327 end = None 1328 clone = None 1329 1330 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1331 nonlocal properties 1332 if properties and temp_props: 1333 properties.expressions.extend(temp_props.expressions) 1334 elif temp_props: 1335 properties = temp_props 1336 1337 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1338 this = self._parse_user_defined_function(kind=create_token.token_type) 1339 1340 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1341 extend_props(self._parse_properties()) 1342 1343 self._match(TokenType.ALIAS) 1344 1345 if self._match(TokenType.COMMAND): 1346 expression = self._parse_as_command(self._prev) 1347 else: 1348 begin = self._match(TokenType.BEGIN) 1349 return_ = self._match_text_seq("RETURN") 1350 1351 if self._match(TokenType.STRING, advance=False): 1352 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1353 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1354 expression = self._parse_string() 1355 extend_props(self._parse_properties()) 1356 else: 1357 expression = self._parse_statement() 1358 1359 end = self._match_text_seq("END") 1360 1361 if return_: 1362 expression = self.expression(exp.Return, this=expression) 1363 elif create_token.token_type == TokenType.INDEX: 1364 this = self._parse_index(index=self._parse_id_var()) 1365 elif create_token.token_type in self.DB_CREATABLES: 1366 table_parts = self._parse_table_parts(schema=True) 1367 1368 # exp.Properties.Location.POST_NAME 1369 self._match(TokenType.COMMA) 1370 extend_props(self._parse_properties(before=True)) 1371 1372 this = self._parse_schema(this=table_parts) 1373 1374 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1375 extend_props(self._parse_properties()) 1376 1377 self._match(TokenType.ALIAS) 1378 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1379 # exp.Properties.Location.POST_ALIAS 1380 extend_props(self._parse_properties()) 1381 1382 expression = self._parse_ddl_select() 1383 1384 if create_token.token_type == TokenType.TABLE: 1385 # exp.Properties.Location.POST_EXPRESSION 1386 extend_props(self._parse_properties()) 1387 1388 indexes = [] 1389 while True: 1390 index = self._parse_index() 1391 1392 # exp.Properties.Location.POST_INDEX 1393 extend_props(self._parse_properties()) 1394 1395 if not index: 1396 break 1397 else: 1398 self._match(TokenType.COMMA) 1399 indexes.append(index) 1400 elif create_token.token_type == TokenType.VIEW: 1401 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1402 no_schema_binding = True 1403 1404 shallow = self._match_text_seq("SHALLOW") 1405 1406 if self._match_texts(self.CLONE_KEYWORDS): 1407 copy = self._prev.text.lower() == "copy" 1408 clone = self._parse_table(schema=True) 1409 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1410 clone_kind = ( 1411 self._match(TokenType.L_PAREN) 1412 and self._match_texts(self.CLONE_KINDS) 1413 and self._prev.text.upper() 1414 ) 1415 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1416 self._match(TokenType.R_PAREN) 1417 clone = self.expression( 1418 exp.Clone, 1419 this=clone, 1420 when=when, 1421 kind=clone_kind, 1422 shallow=shallow, 1423 expression=clone_expression, 1424 copy=copy, 1425 ) 1426 1427 return self.expression( 1428 exp.Create, 1429 comments=comments, 1430 this=this, 1431 kind=create_token.text, 1432 replace=replace, 1433 unique=unique, 1434 expression=expression, 1435 exists=exists, 1436 properties=properties, 1437 indexes=indexes, 1438 no_schema_binding=no_schema_binding, 1439 begin=begin, 1440 end=end, 1441 clone=clone, 1442 ) 1443 1444 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1445 # only used for teradata currently 1446 self._match(TokenType.COMMA) 1447 1448 kwargs = { 1449 "no": self._match_text_seq("NO"), 1450 "dual": self._match_text_seq("DUAL"), 1451 "before": self._match_text_seq("BEFORE"), 1452 "default": self._match_text_seq("DEFAULT"), 1453 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1454 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1455 "after": self._match_text_seq("AFTER"), 1456 "minimum": self._match_texts(("MIN", "MINIMUM")), 1457 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1458 } 1459 1460 if self._match_texts(self.PROPERTY_PARSERS): 1461 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1462 try: 1463 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1464 except TypeError: 1465 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1466 1467 return None 1468 1469 def _parse_property(self) -> t.Optional[exp.Expression]: 1470 if self._match_texts(self.PROPERTY_PARSERS): 1471 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1472 1473 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1474 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1475 1476 if self._match_text_seq("COMPOUND", "SORTKEY"): 1477 return self._parse_sortkey(compound=True) 1478 1479 if self._match_text_seq("SQL", "SECURITY"): 1480 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1481 1482 index = self._index 1483 key = self._parse_column() 1484 1485 if not self._match(TokenType.EQ): 1486 self._retreat(index) 1487 return None 1488 1489 return self.expression( 1490 exp.Property, 1491 this=key.to_dot() if isinstance(key, exp.Column) else key, 1492 value=self._parse_column() or self._parse_var(any_token=True), 1493 ) 1494 1495 def _parse_stored(self) -> exp.FileFormatProperty: 1496 self._match(TokenType.ALIAS) 1497 1498 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1499 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1500 1501 return self.expression( 1502 exp.FileFormatProperty, 1503 this=self.expression( 1504 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1505 ) 1506 if input_format or output_format 1507 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1508 ) 1509 1510 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1511 self._match(TokenType.EQ) 1512 self._match(TokenType.ALIAS) 1513 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1514 1515 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1516 properties = [] 1517 while True: 1518 if before: 1519 prop = self._parse_property_before() 1520 else: 1521 prop = self._parse_property() 1522 1523 if not prop: 1524 break 1525 for p in ensure_list(prop): 1526 properties.append(p) 1527 1528 if properties: 1529 return self.expression(exp.Properties, expressions=properties) 1530 1531 return None 1532 1533 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1534 return self.expression( 1535 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1536 ) 1537 1538 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1539 if self._index >= 2: 1540 pre_volatile_token = self._tokens[self._index - 2] 1541 else: 1542 pre_volatile_token = None 1543 1544 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1545 return exp.VolatileProperty() 1546 1547 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1548 1549 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1550 self._match_pair(TokenType.EQ, TokenType.ON) 1551 1552 prop = self.expression(exp.WithSystemVersioningProperty) 1553 if self._match(TokenType.L_PAREN): 1554 self._match_text_seq("HISTORY_TABLE", "=") 1555 prop.set("this", self._parse_table_parts()) 1556 1557 if self._match(TokenType.COMMA): 1558 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1559 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1560 1561 self._match_r_paren() 1562 1563 return prop 1564 1565 def _parse_with_property( 1566 self, 1567 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1568 if self._match(TokenType.L_PAREN, advance=False): 1569 return self._parse_wrapped_csv(self._parse_property) 1570 1571 if self._match_text_seq("JOURNAL"): 1572 return self._parse_withjournaltable() 1573 1574 if self._match_text_seq("DATA"): 1575 return self._parse_withdata(no=False) 1576 elif self._match_text_seq("NO", "DATA"): 1577 return self._parse_withdata(no=True) 1578 1579 if not self._next: 1580 return None 1581 1582 return self._parse_withisolatedloading() 1583 1584 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1585 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1586 self._match(TokenType.EQ) 1587 1588 user = self._parse_id_var() 1589 self._match(TokenType.PARAMETER) 1590 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1591 1592 if not user or not host: 1593 return None 1594 1595 return exp.DefinerProperty(this=f"{user}@{host}") 1596 1597 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1598 self._match(TokenType.TABLE) 1599 self._match(TokenType.EQ) 1600 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1601 1602 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1603 return self.expression(exp.LogProperty, no=no) 1604 1605 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1606 return self.expression(exp.JournalProperty, **kwargs) 1607 1608 def _parse_checksum(self) -> exp.ChecksumProperty: 1609 self._match(TokenType.EQ) 1610 1611 on = None 1612 if self._match(TokenType.ON): 1613 on = True 1614 elif self._match_text_seq("OFF"): 1615 on = False 1616 1617 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1618 1619 def _parse_cluster(self) -> exp.Cluster: 1620 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1621 1622 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1623 self._match_text_seq("BY") 1624 1625 self._match_l_paren() 1626 expressions = self._parse_csv(self._parse_column) 1627 self._match_r_paren() 1628 1629 if self._match_text_seq("SORTED", "BY"): 1630 self._match_l_paren() 1631 sorted_by = self._parse_csv(self._parse_ordered) 1632 self._match_r_paren() 1633 else: 1634 sorted_by = None 1635 1636 self._match(TokenType.INTO) 1637 buckets = self._parse_number() 1638 self._match_text_seq("BUCKETS") 1639 1640 return self.expression( 1641 exp.ClusteredByProperty, 1642 expressions=expressions, 1643 sorted_by=sorted_by, 1644 buckets=buckets, 1645 ) 1646 1647 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1648 if not self._match_text_seq("GRANTS"): 1649 self._retreat(self._index - 1) 1650 return None 1651 1652 return self.expression(exp.CopyGrantsProperty) 1653 1654 def _parse_freespace(self) -> exp.FreespaceProperty: 1655 self._match(TokenType.EQ) 1656 return self.expression( 1657 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1658 ) 1659 1660 def _parse_mergeblockratio( 1661 self, no: bool = False, default: bool = False 1662 ) -> exp.MergeBlockRatioProperty: 1663 if self._match(TokenType.EQ): 1664 return self.expression( 1665 exp.MergeBlockRatioProperty, 1666 this=self._parse_number(), 1667 percent=self._match(TokenType.PERCENT), 1668 ) 1669 1670 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1671 1672 def _parse_datablocksize( 1673 self, 1674 default: t.Optional[bool] = None, 1675 minimum: t.Optional[bool] = None, 1676 maximum: t.Optional[bool] = None, 1677 ) -> exp.DataBlocksizeProperty: 1678 self._match(TokenType.EQ) 1679 size = self._parse_number() 1680 1681 units = None 1682 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1683 units = self._prev.text 1684 1685 return self.expression( 1686 exp.DataBlocksizeProperty, 1687 size=size, 1688 units=units, 1689 default=default, 1690 minimum=minimum, 1691 maximum=maximum, 1692 ) 1693 1694 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1695 self._match(TokenType.EQ) 1696 always = self._match_text_seq("ALWAYS") 1697 manual = self._match_text_seq("MANUAL") 1698 never = self._match_text_seq("NEVER") 1699 default = self._match_text_seq("DEFAULT") 1700 1701 autotemp = None 1702 if self._match_text_seq("AUTOTEMP"): 1703 autotemp = self._parse_schema() 1704 1705 return self.expression( 1706 exp.BlockCompressionProperty, 1707 always=always, 1708 manual=manual, 1709 never=never, 1710 default=default, 1711 autotemp=autotemp, 1712 ) 1713 1714 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1715 no = self._match_text_seq("NO") 1716 concurrent = self._match_text_seq("CONCURRENT") 1717 self._match_text_seq("ISOLATED", "LOADING") 1718 for_all = self._match_text_seq("FOR", "ALL") 1719 for_insert = self._match_text_seq("FOR", "INSERT") 1720 for_none = self._match_text_seq("FOR", "NONE") 1721 return self.expression( 1722 exp.IsolatedLoadingProperty, 1723 no=no, 1724 concurrent=concurrent, 1725 for_all=for_all, 1726 for_insert=for_insert, 1727 for_none=for_none, 1728 ) 1729 1730 def _parse_locking(self) -> exp.LockingProperty: 1731 if self._match(TokenType.TABLE): 1732 kind = "TABLE" 1733 elif self._match(TokenType.VIEW): 1734 kind = "VIEW" 1735 elif self._match(TokenType.ROW): 1736 kind = "ROW" 1737 elif self._match_text_seq("DATABASE"): 1738 kind = "DATABASE" 1739 else: 1740 kind = None 1741 1742 if kind in ("DATABASE", "TABLE", "VIEW"): 1743 this = self._parse_table_parts() 1744 else: 1745 this = None 1746 1747 if self._match(TokenType.FOR): 1748 for_or_in = "FOR" 1749 elif self._match(TokenType.IN): 1750 for_or_in = "IN" 1751 else: 1752 for_or_in = None 1753 1754 if self._match_text_seq("ACCESS"): 1755 lock_type = "ACCESS" 1756 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1757 lock_type = "EXCLUSIVE" 1758 elif self._match_text_seq("SHARE"): 1759 lock_type = "SHARE" 1760 elif self._match_text_seq("READ"): 1761 lock_type = "READ" 1762 elif self._match_text_seq("WRITE"): 1763 lock_type = "WRITE" 1764 elif self._match_text_seq("CHECKSUM"): 1765 lock_type = "CHECKSUM" 1766 else: 1767 lock_type = None 1768 1769 override = self._match_text_seq("OVERRIDE") 1770 1771 return self.expression( 1772 exp.LockingProperty, 1773 this=this, 1774 kind=kind, 1775 for_or_in=for_or_in, 1776 lock_type=lock_type, 1777 override=override, 1778 ) 1779 1780 def _parse_partition_by(self) -> t.List[exp.Expression]: 1781 if self._match(TokenType.PARTITION_BY): 1782 return self._parse_csv(self._parse_conjunction) 1783 return [] 1784 1785 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1786 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1787 if self._match_text_seq("MINVALUE"): 1788 return exp.var("MINVALUE") 1789 if self._match_text_seq("MAXVALUE"): 1790 return exp.var("MAXVALUE") 1791 return self._parse_bitwise() 1792 1793 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1794 expression = None 1795 from_expressions = None 1796 to_expressions = None 1797 1798 if self._match(TokenType.IN): 1799 this = self._parse_wrapped_csv(self._parse_bitwise) 1800 elif self._match(TokenType.FROM): 1801 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1802 self._match_text_seq("TO") 1803 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1804 elif self._match_text_seq("WITH", "(", "MODULUS"): 1805 this = self._parse_number() 1806 self._match_text_seq(",", "REMAINDER") 1807 expression = self._parse_number() 1808 self._match_r_paren() 1809 else: 1810 self.raise_error("Failed to parse partition bound spec.") 1811 1812 return self.expression( 1813 exp.PartitionBoundSpec, 1814 this=this, 1815 expression=expression, 1816 from_expressions=from_expressions, 1817 to_expressions=to_expressions, 1818 ) 1819 1820 # https://www.postgresql.org/docs/current/sql-createtable.html 1821 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1822 if not self._match_text_seq("OF"): 1823 self._retreat(self._index - 1) 1824 return None 1825 1826 this = self._parse_table(schema=True) 1827 1828 if self._match(TokenType.DEFAULT): 1829 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1830 elif self._match_text_seq("FOR", "VALUES"): 1831 expression = self._parse_partition_bound_spec() 1832 else: 1833 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1834 1835 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1836 1837 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1838 self._match(TokenType.EQ) 1839 return self.expression( 1840 exp.PartitionedByProperty, 1841 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1842 ) 1843 1844 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1845 if self._match_text_seq("AND", "STATISTICS"): 1846 statistics = True 1847 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1848 statistics = False 1849 else: 1850 statistics = None 1851 1852 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1853 1854 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1855 if self._match_text_seq("PRIMARY", "INDEX"): 1856 return exp.NoPrimaryIndexProperty() 1857 return None 1858 1859 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1860 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1861 return exp.OnCommitProperty() 1862 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1863 return exp.OnCommitProperty(delete=True) 1864 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1865 1866 def _parse_distkey(self) -> exp.DistKeyProperty: 1867 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1868 1869 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1870 table = self._parse_table(schema=True) 1871 1872 options = [] 1873 while self._match_texts(("INCLUDING", "EXCLUDING")): 1874 this = self._prev.text.upper() 1875 1876 id_var = self._parse_id_var() 1877 if not id_var: 1878 return None 1879 1880 options.append( 1881 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1882 ) 1883 1884 return self.expression(exp.LikeProperty, this=table, expressions=options) 1885 1886 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1887 return self.expression( 1888 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1889 ) 1890 1891 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1892 self._match(TokenType.EQ) 1893 return self.expression( 1894 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1895 ) 1896 1897 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1898 self._match_text_seq("WITH", "CONNECTION") 1899 return self.expression( 1900 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1901 ) 1902 1903 def _parse_returns(self) -> exp.ReturnsProperty: 1904 value: t.Optional[exp.Expression] 1905 is_table = self._match(TokenType.TABLE) 1906 1907 if is_table: 1908 if self._match(TokenType.LT): 1909 value = self.expression( 1910 exp.Schema, 1911 this="TABLE", 1912 expressions=self._parse_csv(self._parse_struct_types), 1913 ) 1914 if not self._match(TokenType.GT): 1915 self.raise_error("Expecting >") 1916 else: 1917 value = self._parse_schema(exp.var("TABLE")) 1918 else: 1919 value = self._parse_types() 1920 1921 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1922 1923 def _parse_describe(self) -> exp.Describe: 1924 kind = self._match_set(self.CREATABLES) and self._prev.text 1925 this = self._parse_table(schema=True) 1926 properties = self._parse_properties() 1927 expressions = properties.expressions if properties else None 1928 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1929 1930 def _parse_insert(self) -> exp.Insert: 1931 comments = ensure_list(self._prev_comments) 1932 overwrite = self._match(TokenType.OVERWRITE) 1933 ignore = self._match(TokenType.IGNORE) 1934 local = self._match_text_seq("LOCAL") 1935 alternative = None 1936 1937 if self._match_text_seq("DIRECTORY"): 1938 this: t.Optional[exp.Expression] = self.expression( 1939 exp.Directory, 1940 this=self._parse_var_or_string(), 1941 local=local, 1942 row_format=self._parse_row_format(match_row=True), 1943 ) 1944 else: 1945 if self._match(TokenType.OR): 1946 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1947 1948 self._match(TokenType.INTO) 1949 comments += ensure_list(self._prev_comments) 1950 self._match(TokenType.TABLE) 1951 this = self._parse_table(schema=True) 1952 1953 returning = self._parse_returning() 1954 1955 return self.expression( 1956 exp.Insert, 1957 comments=comments, 1958 this=this, 1959 by_name=self._match_text_seq("BY", "NAME"), 1960 exists=self._parse_exists(), 1961 partition=self._parse_partition(), 1962 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1963 and self._parse_conjunction(), 1964 expression=self._parse_ddl_select(), 1965 conflict=self._parse_on_conflict(), 1966 returning=returning or self._parse_returning(), 1967 overwrite=overwrite, 1968 alternative=alternative, 1969 ignore=ignore, 1970 ) 1971 1972 def _parse_kill(self) -> exp.Kill: 1973 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1974 1975 return self.expression( 1976 exp.Kill, 1977 this=self._parse_primary(), 1978 kind=kind, 1979 ) 1980 1981 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1982 conflict = self._match_text_seq("ON", "CONFLICT") 1983 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1984 1985 if not conflict and not duplicate: 1986 return None 1987 1988 nothing = None 1989 expressions = None 1990 key = None 1991 constraint = None 1992 1993 if conflict: 1994 if self._match_text_seq("ON", "CONSTRAINT"): 1995 constraint = self._parse_id_var() 1996 else: 1997 key = self._parse_csv(self._parse_value) 1998 1999 self._match_text_seq("DO") 2000 if self._match_text_seq("NOTHING"): 2001 nothing = True 2002 else: 2003 self._match(TokenType.UPDATE) 2004 self._match(TokenType.SET) 2005 expressions = self._parse_csv(self._parse_equality) 2006 2007 return self.expression( 2008 exp.OnConflict, 2009 duplicate=duplicate, 2010 expressions=expressions, 2011 nothing=nothing, 2012 key=key, 2013 constraint=constraint, 2014 ) 2015 2016 def _parse_returning(self) -> t.Optional[exp.Returning]: 2017 if not self._match(TokenType.RETURNING): 2018 return None 2019 return self.expression( 2020 exp.Returning, 2021 expressions=self._parse_csv(self._parse_expression), 2022 into=self._match(TokenType.INTO) and self._parse_table_part(), 2023 ) 2024 2025 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2026 if not self._match(TokenType.FORMAT): 2027 return None 2028 return self._parse_row_format() 2029 2030 def _parse_row_format( 2031 self, match_row: bool = False 2032 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2033 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2034 return None 2035 2036 if self._match_text_seq("SERDE"): 2037 this = self._parse_string() 2038 2039 serde_properties = None 2040 if self._match(TokenType.SERDE_PROPERTIES): 2041 serde_properties = self.expression( 2042 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2043 ) 2044 2045 return self.expression( 2046 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2047 ) 2048 2049 self._match_text_seq("DELIMITED") 2050 2051 kwargs = {} 2052 2053 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2054 kwargs["fields"] = self._parse_string() 2055 if self._match_text_seq("ESCAPED", "BY"): 2056 kwargs["escaped"] = self._parse_string() 2057 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2058 kwargs["collection_items"] = self._parse_string() 2059 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2060 kwargs["map_keys"] = self._parse_string() 2061 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2062 kwargs["lines"] = self._parse_string() 2063 if self._match_text_seq("NULL", "DEFINED", "AS"): 2064 kwargs["null"] = self._parse_string() 2065 2066 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2067 2068 def _parse_load(self) -> exp.LoadData | exp.Command: 2069 if self._match_text_seq("DATA"): 2070 local = self._match_text_seq("LOCAL") 2071 self._match_text_seq("INPATH") 2072 inpath = self._parse_string() 2073 overwrite = self._match(TokenType.OVERWRITE) 2074 self._match_pair(TokenType.INTO, TokenType.TABLE) 2075 2076 return self.expression( 2077 exp.LoadData, 2078 this=self._parse_table(schema=True), 2079 local=local, 2080 overwrite=overwrite, 2081 inpath=inpath, 2082 partition=self._parse_partition(), 2083 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2084 serde=self._match_text_seq("SERDE") and self._parse_string(), 2085 ) 2086 return self._parse_as_command(self._prev) 2087 2088 def _parse_delete(self) -> exp.Delete: 2089 # This handles MySQL's "Multiple-Table Syntax" 2090 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2091 tables = None 2092 comments = self._prev_comments 2093 if not self._match(TokenType.FROM, advance=False): 2094 tables = self._parse_csv(self._parse_table) or None 2095 2096 returning = self._parse_returning() 2097 2098 return self.expression( 2099 exp.Delete, 2100 comments=comments, 2101 tables=tables, 2102 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2103 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2104 where=self._parse_where(), 2105 returning=returning or self._parse_returning(), 2106 limit=self._parse_limit(), 2107 ) 2108 2109 def _parse_update(self) -> exp.Update: 2110 comments = self._prev_comments 2111 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2112 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2113 returning = self._parse_returning() 2114 return self.expression( 2115 exp.Update, 2116 comments=comments, 2117 **{ # type: ignore 2118 "this": this, 2119 "expressions": expressions, 2120 "from": self._parse_from(joins=True), 2121 "where": self._parse_where(), 2122 "returning": returning or self._parse_returning(), 2123 "order": self._parse_order(), 2124 "limit": self._parse_limit(), 2125 }, 2126 ) 2127 2128 def _parse_uncache(self) -> exp.Uncache: 2129 if not self._match(TokenType.TABLE): 2130 self.raise_error("Expecting TABLE after UNCACHE") 2131 2132 return self.expression( 2133 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2134 ) 2135 2136 def _parse_cache(self) -> exp.Cache: 2137 lazy = self._match_text_seq("LAZY") 2138 self._match(TokenType.TABLE) 2139 table = self._parse_table(schema=True) 2140 2141 options = [] 2142 if self._match_text_seq("OPTIONS"): 2143 self._match_l_paren() 2144 k = self._parse_string() 2145 self._match(TokenType.EQ) 2146 v = self._parse_string() 2147 options = [k, v] 2148 self._match_r_paren() 2149 2150 self._match(TokenType.ALIAS) 2151 return self.expression( 2152 exp.Cache, 2153 this=table, 2154 lazy=lazy, 2155 options=options, 2156 expression=self._parse_select(nested=True), 2157 ) 2158 2159 def _parse_partition(self) -> t.Optional[exp.Partition]: 2160 if not self._match(TokenType.PARTITION): 2161 return None 2162 2163 return self.expression( 2164 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2165 ) 2166 2167 def _parse_value(self) -> exp.Tuple: 2168 if self._match(TokenType.L_PAREN): 2169 expressions = self._parse_csv(self._parse_conjunction) 2170 self._match_r_paren() 2171 return self.expression(exp.Tuple, expressions=expressions) 2172 2173 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2174 # https://prestodb.io/docs/current/sql/values.html 2175 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2176 2177 def _parse_projections(self) -> t.List[exp.Expression]: 2178 return self._parse_expressions() 2179 2180 def _parse_select( 2181 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2182 ) -> t.Optional[exp.Expression]: 2183 cte = self._parse_with() 2184 2185 if cte: 2186 this = self._parse_statement() 2187 2188 if not this: 2189 self.raise_error("Failed to parse any statement following CTE") 2190 return cte 2191 2192 if "with" in this.arg_types: 2193 this.set("with", cte) 2194 else: 2195 self.raise_error(f"{this.key} does not support CTE") 2196 this = cte 2197 2198 return this 2199 2200 # duckdb supports leading with FROM x 2201 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2202 2203 if self._match(TokenType.SELECT): 2204 comments = self._prev_comments 2205 2206 hint = self._parse_hint() 2207 all_ = self._match(TokenType.ALL) 2208 distinct = self._match_set(self.DISTINCT_TOKENS) 2209 2210 kind = ( 2211 self._match(TokenType.ALIAS) 2212 and self._match_texts(("STRUCT", "VALUE")) 2213 and self._prev.text 2214 ) 2215 2216 if distinct: 2217 distinct = self.expression( 2218 exp.Distinct, 2219 on=self._parse_value() if self._match(TokenType.ON) else None, 2220 ) 2221 2222 if all_ and distinct: 2223 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2224 2225 limit = self._parse_limit(top=True) 2226 projections = self._parse_projections() 2227 2228 this = self.expression( 2229 exp.Select, 2230 kind=kind, 2231 hint=hint, 2232 distinct=distinct, 2233 expressions=projections, 2234 limit=limit, 2235 ) 2236 this.comments = comments 2237 2238 into = self._parse_into() 2239 if into: 2240 this.set("into", into) 2241 2242 if not from_: 2243 from_ = self._parse_from() 2244 2245 if from_: 2246 this.set("from", from_) 2247 2248 this = self._parse_query_modifiers(this) 2249 elif (table or nested) and self._match(TokenType.L_PAREN): 2250 if self._match(TokenType.PIVOT): 2251 this = self._parse_simplified_pivot() 2252 elif self._match(TokenType.FROM): 2253 this = exp.select("*").from_( 2254 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2255 ) 2256 else: 2257 this = self._parse_table() if table else self._parse_select(nested=True) 2258 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2259 2260 self._match_r_paren() 2261 2262 # We return early here so that the UNION isn't attached to the subquery by the 2263 # following call to _parse_set_operations, but instead becomes the parent node 2264 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2265 elif self._match(TokenType.VALUES): 2266 this = self.expression( 2267 exp.Values, 2268 expressions=self._parse_csv(self._parse_value), 2269 alias=self._parse_table_alias(), 2270 ) 2271 elif from_: 2272 this = exp.select("*").from_(from_.this, copy=False) 2273 else: 2274 this = None 2275 2276 return self._parse_set_operations(this) 2277 2278 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2279 if not skip_with_token and not self._match(TokenType.WITH): 2280 return None 2281 2282 comments = self._prev_comments 2283 recursive = self._match(TokenType.RECURSIVE) 2284 2285 expressions = [] 2286 while True: 2287 expressions.append(self._parse_cte()) 2288 2289 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2290 break 2291 else: 2292 self._match(TokenType.WITH) 2293 2294 return self.expression( 2295 exp.With, comments=comments, expressions=expressions, recursive=recursive 2296 ) 2297 2298 def _parse_cte(self) -> exp.CTE: 2299 alias = self._parse_table_alias() 2300 if not alias or not alias.this: 2301 self.raise_error("Expected CTE to have alias") 2302 2303 self._match(TokenType.ALIAS) 2304 return self.expression( 2305 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2306 ) 2307 2308 def _parse_table_alias( 2309 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2310 ) -> t.Optional[exp.TableAlias]: 2311 any_token = self._match(TokenType.ALIAS) 2312 alias = ( 2313 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2314 or self._parse_string_as_identifier() 2315 ) 2316 2317 index = self._index 2318 if self._match(TokenType.L_PAREN): 2319 columns = self._parse_csv(self._parse_function_parameter) 2320 self._match_r_paren() if columns else self._retreat(index) 2321 else: 2322 columns = None 2323 2324 if not alias and not columns: 2325 return None 2326 2327 return self.expression(exp.TableAlias, this=alias, columns=columns) 2328 2329 def _parse_subquery( 2330 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2331 ) -> t.Optional[exp.Subquery]: 2332 if not this: 2333 return None 2334 2335 return self.expression( 2336 exp.Subquery, 2337 this=this, 2338 pivots=self._parse_pivots(), 2339 alias=self._parse_table_alias() if parse_alias else None, 2340 ) 2341 2342 def _parse_query_modifiers( 2343 self, this: t.Optional[exp.Expression] 2344 ) -> t.Optional[exp.Expression]: 2345 if isinstance(this, self.MODIFIABLES): 2346 for join in iter(self._parse_join, None): 2347 this.append("joins", join) 2348 for lateral in iter(self._parse_lateral, None): 2349 this.append("laterals", lateral) 2350 2351 while True: 2352 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2353 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2354 key, expression = parser(self) 2355 2356 if expression: 2357 this.set(key, expression) 2358 if key == "limit": 2359 offset = expression.args.pop("offset", None) 2360 if offset: 2361 this.set("offset", exp.Offset(expression=offset)) 2362 continue 2363 break 2364 return this 2365 2366 def _parse_hint(self) -> t.Optional[exp.Hint]: 2367 if self._match(TokenType.HINT): 2368 hints = [] 2369 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2370 hints.extend(hint) 2371 2372 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2373 self.raise_error("Expected */ after HINT") 2374 2375 return self.expression(exp.Hint, expressions=hints) 2376 2377 return None 2378 2379 def _parse_into(self) -> t.Optional[exp.Into]: 2380 if not self._match(TokenType.INTO): 2381 return None 2382 2383 temp = self._match(TokenType.TEMPORARY) 2384 unlogged = self._match_text_seq("UNLOGGED") 2385 self._match(TokenType.TABLE) 2386 2387 return self.expression( 2388 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2389 ) 2390 2391 def _parse_from( 2392 self, joins: bool = False, skip_from_token: bool = False 2393 ) -> t.Optional[exp.From]: 2394 if not skip_from_token and not self._match(TokenType.FROM): 2395 return None 2396 2397 return self.expression( 2398 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2399 ) 2400 2401 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2402 if not self._match(TokenType.MATCH_RECOGNIZE): 2403 return None 2404 2405 self._match_l_paren() 2406 2407 partition = self._parse_partition_by() 2408 order = self._parse_order() 2409 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2410 2411 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2412 rows = exp.var("ONE ROW PER MATCH") 2413 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2414 text = "ALL ROWS PER MATCH" 2415 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2416 text += f" SHOW EMPTY MATCHES" 2417 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2418 text += f" OMIT EMPTY MATCHES" 2419 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2420 text += f" WITH UNMATCHED ROWS" 2421 rows = exp.var(text) 2422 else: 2423 rows = None 2424 2425 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2426 text = "AFTER MATCH SKIP" 2427 if self._match_text_seq("PAST", "LAST", "ROW"): 2428 text += f" PAST LAST ROW" 2429 elif self._match_text_seq("TO", "NEXT", "ROW"): 2430 text += f" TO NEXT ROW" 2431 elif self._match_text_seq("TO", "FIRST"): 2432 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2433 elif self._match_text_seq("TO", "LAST"): 2434 text += f" TO LAST {self._advance_any().text}" # type: ignore 2435 after = exp.var(text) 2436 else: 2437 after = None 2438 2439 if self._match_text_seq("PATTERN"): 2440 self._match_l_paren() 2441 2442 if not self._curr: 2443 self.raise_error("Expecting )", self._curr) 2444 2445 paren = 1 2446 start = self._curr 2447 2448 while self._curr and paren > 0: 2449 if self._curr.token_type == TokenType.L_PAREN: 2450 paren += 1 2451 if self._curr.token_type == TokenType.R_PAREN: 2452 paren -= 1 2453 2454 end = self._prev 2455 self._advance() 2456 2457 if paren > 0: 2458 self.raise_error("Expecting )", self._curr) 2459 2460 pattern = exp.var(self._find_sql(start, end)) 2461 else: 2462 pattern = None 2463 2464 define = ( 2465 self._parse_csv( 2466 lambda: self.expression( 2467 exp.Alias, 2468 alias=self._parse_id_var(any_token=True), 2469 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2470 ) 2471 ) 2472 if self._match_text_seq("DEFINE") 2473 else None 2474 ) 2475 2476 self._match_r_paren() 2477 2478 return self.expression( 2479 exp.MatchRecognize, 2480 partition_by=partition, 2481 order=order, 2482 measures=measures, 2483 rows=rows, 2484 after=after, 2485 pattern=pattern, 2486 define=define, 2487 alias=self._parse_table_alias(), 2488 ) 2489 2490 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2491 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2492 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2493 2494 if outer_apply or cross_apply: 2495 this = self._parse_select(table=True) 2496 view = None 2497 outer = not cross_apply 2498 elif self._match(TokenType.LATERAL): 2499 this = self._parse_select(table=True) 2500 view = self._match(TokenType.VIEW) 2501 outer = self._match(TokenType.OUTER) 2502 else: 2503 return None 2504 2505 if not this: 2506 this = ( 2507 self._parse_unnest() 2508 or self._parse_function() 2509 or self._parse_id_var(any_token=False) 2510 ) 2511 2512 while self._match(TokenType.DOT): 2513 this = exp.Dot( 2514 this=this, 2515 expression=self._parse_function() or self._parse_id_var(any_token=False), 2516 ) 2517 2518 if view: 2519 table = self._parse_id_var(any_token=False) 2520 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2521 table_alias: t.Optional[exp.TableAlias] = self.expression( 2522 exp.TableAlias, this=table, columns=columns 2523 ) 2524 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2525 # We move the alias from the lateral's child node to the lateral itself 2526 table_alias = this.args["alias"].pop() 2527 else: 2528 table_alias = self._parse_table_alias() 2529 2530 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2531 2532 def _parse_join_parts( 2533 self, 2534 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2535 return ( 2536 self._match_set(self.JOIN_METHODS) and self._prev, 2537 self._match_set(self.JOIN_SIDES) and self._prev, 2538 self._match_set(self.JOIN_KINDS) and self._prev, 2539 ) 2540 2541 def _parse_join( 2542 self, skip_join_token: bool = False, parse_bracket: bool = False 2543 ) -> t.Optional[exp.Join]: 2544 if self._match(TokenType.COMMA): 2545 return self.expression(exp.Join, this=self._parse_table()) 2546 2547 index = self._index 2548 method, side, kind = self._parse_join_parts() 2549 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2550 join = self._match(TokenType.JOIN) 2551 2552 if not skip_join_token and not join: 2553 self._retreat(index) 2554 kind = None 2555 method = None 2556 side = None 2557 2558 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2559 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2560 2561 if not skip_join_token and not join and not outer_apply and not cross_apply: 2562 return None 2563 2564 if outer_apply: 2565 side = Token(TokenType.LEFT, "LEFT") 2566 2567 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2568 2569 if method: 2570 kwargs["method"] = method.text 2571 if side: 2572 kwargs["side"] = side.text 2573 if kind: 2574 kwargs["kind"] = kind.text 2575 if hint: 2576 kwargs["hint"] = hint 2577 2578 if self._match(TokenType.ON): 2579 kwargs["on"] = self._parse_conjunction() 2580 elif self._match(TokenType.USING): 2581 kwargs["using"] = self._parse_wrapped_id_vars() 2582 elif not (kind and kind.token_type == TokenType.CROSS): 2583 index = self._index 2584 join = self._parse_join() 2585 2586 if join and self._match(TokenType.ON): 2587 kwargs["on"] = self._parse_conjunction() 2588 elif join and self._match(TokenType.USING): 2589 kwargs["using"] = self._parse_wrapped_id_vars() 2590 else: 2591 join = None 2592 self._retreat(index) 2593 2594 kwargs["this"].set("joins", [join] if join else None) 2595 2596 comments = [c for token in (method, side, kind) if token for c in token.comments] 2597 return self.expression(exp.Join, comments=comments, **kwargs) 2598 2599 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2600 this = self._parse_conjunction() 2601 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2602 return this 2603 2604 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2605 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2606 2607 return this 2608 2609 def _parse_index( 2610 self, 2611 index: t.Optional[exp.Expression] = None, 2612 ) -> t.Optional[exp.Index]: 2613 if index: 2614 unique = None 2615 primary = None 2616 amp = None 2617 2618 self._match(TokenType.ON) 2619 self._match(TokenType.TABLE) # hive 2620 table = self._parse_table_parts(schema=True) 2621 else: 2622 unique = self._match(TokenType.UNIQUE) 2623 primary = self._match_text_seq("PRIMARY") 2624 amp = self._match_text_seq("AMP") 2625 2626 if not self._match(TokenType.INDEX): 2627 return None 2628 2629 index = self._parse_id_var() 2630 table = None 2631 2632 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2633 2634 if self._match(TokenType.L_PAREN, advance=False): 2635 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2636 else: 2637 columns = None 2638 2639 return self.expression( 2640 exp.Index, 2641 this=index, 2642 table=table, 2643 using=using, 2644 columns=columns, 2645 unique=unique, 2646 primary=primary, 2647 amp=amp, 2648 partition_by=self._parse_partition_by(), 2649 where=self._parse_where(), 2650 ) 2651 2652 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2653 hints: t.List[exp.Expression] = [] 2654 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2655 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2656 hints.append( 2657 self.expression( 2658 exp.WithTableHint, 2659 expressions=self._parse_csv( 2660 lambda: self._parse_function() or self._parse_var(any_token=True) 2661 ), 2662 ) 2663 ) 2664 self._match_r_paren() 2665 else: 2666 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2667 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2668 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2669 2670 self._match_texts(("INDEX", "KEY")) 2671 if self._match(TokenType.FOR): 2672 hint.set("target", self._advance_any() and self._prev.text.upper()) 2673 2674 hint.set("expressions", self._parse_wrapped_id_vars()) 2675 hints.append(hint) 2676 2677 return hints or None 2678 2679 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2680 return ( 2681 (not schema and self._parse_function(optional_parens=False)) 2682 or self._parse_id_var(any_token=False) 2683 or self._parse_string_as_identifier() 2684 or self._parse_placeholder() 2685 ) 2686 2687 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2688 catalog = None 2689 db = None 2690 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2691 2692 while self._match(TokenType.DOT): 2693 if catalog: 2694 # This allows nesting the table in arbitrarily many dot expressions if needed 2695 table = self.expression( 2696 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2697 ) 2698 else: 2699 catalog = db 2700 db = table 2701 table = self._parse_table_part(schema=schema) or "" 2702 2703 if not table: 2704 self.raise_error(f"Expected table name but got {self._curr}") 2705 2706 return self.expression( 2707 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2708 ) 2709 2710 def _parse_table( 2711 self, 2712 schema: bool = False, 2713 joins: bool = False, 2714 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2715 parse_bracket: bool = False, 2716 ) -> t.Optional[exp.Expression]: 2717 lateral = self._parse_lateral() 2718 if lateral: 2719 return lateral 2720 2721 unnest = self._parse_unnest() 2722 if unnest: 2723 return unnest 2724 2725 values = self._parse_derived_table_values() 2726 if values: 2727 return values 2728 2729 subquery = self._parse_select(table=True) 2730 if subquery: 2731 if not subquery.args.get("pivots"): 2732 subquery.set("pivots", self._parse_pivots()) 2733 return subquery 2734 2735 bracket = parse_bracket and self._parse_bracket(None) 2736 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2737 this = t.cast( 2738 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2739 ) 2740 2741 if schema: 2742 return self._parse_schema(this=this) 2743 2744 version = self._parse_version() 2745 2746 if version: 2747 this.set("version", version) 2748 2749 if self.ALIAS_POST_TABLESAMPLE: 2750 table_sample = self._parse_table_sample() 2751 2752 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2753 if alias: 2754 this.set("alias", alias) 2755 2756 if self._match_text_seq("AT"): 2757 this.set("index", self._parse_id_var()) 2758 2759 this.set("hints", self._parse_table_hints()) 2760 2761 if not this.args.get("pivots"): 2762 this.set("pivots", self._parse_pivots()) 2763 2764 if not self.ALIAS_POST_TABLESAMPLE: 2765 table_sample = self._parse_table_sample() 2766 2767 if table_sample: 2768 table_sample.set("this", this) 2769 this = table_sample 2770 2771 if joins: 2772 for join in iter(self._parse_join, None): 2773 this.append("joins", join) 2774 2775 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2776 this.set("ordinality", True) 2777 this.set("alias", self._parse_table_alias()) 2778 2779 return this 2780 2781 def _parse_version(self) -> t.Optional[exp.Version]: 2782 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2783 this = "TIMESTAMP" 2784 elif self._match(TokenType.VERSION_SNAPSHOT): 2785 this = "VERSION" 2786 else: 2787 return None 2788 2789 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2790 kind = self._prev.text.upper() 2791 start = self._parse_bitwise() 2792 self._match_texts(("TO", "AND")) 2793 end = self._parse_bitwise() 2794 expression: t.Optional[exp.Expression] = self.expression( 2795 exp.Tuple, expressions=[start, end] 2796 ) 2797 elif self._match_text_seq("CONTAINED", "IN"): 2798 kind = "CONTAINED IN" 2799 expression = self.expression( 2800 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2801 ) 2802 elif self._match(TokenType.ALL): 2803 kind = "ALL" 2804 expression = None 2805 else: 2806 self._match_text_seq("AS", "OF") 2807 kind = "AS OF" 2808 expression = self._parse_type() 2809 2810 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2811 2812 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2813 if not self._match(TokenType.UNNEST): 2814 return None 2815 2816 expressions = self._parse_wrapped_csv(self._parse_equality) 2817 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2818 2819 alias = self._parse_table_alias() if with_alias else None 2820 2821 if alias: 2822 if self.UNNEST_COLUMN_ONLY: 2823 if alias.args.get("columns"): 2824 self.raise_error("Unexpected extra column alias in unnest.") 2825 2826 alias.set("columns", [alias.this]) 2827 alias.set("this", None) 2828 2829 columns = alias.args.get("columns") or [] 2830 if offset and len(expressions) < len(columns): 2831 offset = columns.pop() 2832 2833 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2834 self._match(TokenType.ALIAS) 2835 offset = self._parse_id_var( 2836 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2837 ) or exp.to_identifier("offset") 2838 2839 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2840 2841 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2842 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2843 if not is_derived and not self._match(TokenType.VALUES): 2844 return None 2845 2846 expressions = self._parse_csv(self._parse_value) 2847 alias = self._parse_table_alias() 2848 2849 if is_derived: 2850 self._match_r_paren() 2851 2852 return self.expression( 2853 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2854 ) 2855 2856 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2857 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2858 as_modifier and self._match_text_seq("USING", "SAMPLE") 2859 ): 2860 return None 2861 2862 bucket_numerator = None 2863 bucket_denominator = None 2864 bucket_field = None 2865 percent = None 2866 rows = None 2867 size = None 2868 seed = None 2869 2870 kind = ( 2871 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2872 ) 2873 method = self._parse_var(tokens=(TokenType.ROW,)) 2874 2875 matched_l_paren = self._match(TokenType.L_PAREN) 2876 2877 if self.TABLESAMPLE_CSV: 2878 num = None 2879 expressions = self._parse_csv(self._parse_primary) 2880 else: 2881 expressions = None 2882 num = ( 2883 self._parse_factor() 2884 if self._match(TokenType.NUMBER, advance=False) 2885 else self._parse_primary() or self._parse_placeholder() 2886 ) 2887 2888 if self._match_text_seq("BUCKET"): 2889 bucket_numerator = self._parse_number() 2890 self._match_text_seq("OUT", "OF") 2891 bucket_denominator = bucket_denominator = self._parse_number() 2892 self._match(TokenType.ON) 2893 bucket_field = self._parse_field() 2894 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2895 percent = num 2896 elif self._match(TokenType.ROWS): 2897 rows = num 2898 elif num: 2899 size = num 2900 2901 if matched_l_paren: 2902 self._match_r_paren() 2903 2904 if self._match(TokenType.L_PAREN): 2905 method = self._parse_var() 2906 seed = self._match(TokenType.COMMA) and self._parse_number() 2907 self._match_r_paren() 2908 elif self._match_texts(("SEED", "REPEATABLE")): 2909 seed = self._parse_wrapped(self._parse_number) 2910 2911 return self.expression( 2912 exp.TableSample, 2913 expressions=expressions, 2914 method=method, 2915 bucket_numerator=bucket_numerator, 2916 bucket_denominator=bucket_denominator, 2917 bucket_field=bucket_field, 2918 percent=percent, 2919 rows=rows, 2920 size=size, 2921 seed=seed, 2922 kind=kind, 2923 ) 2924 2925 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2926 return list(iter(self._parse_pivot, None)) or None 2927 2928 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2929 return list(iter(self._parse_join, None)) or None 2930 2931 # https://duckdb.org/docs/sql/statements/pivot 2932 def _parse_simplified_pivot(self) -> exp.Pivot: 2933 def _parse_on() -> t.Optional[exp.Expression]: 2934 this = self._parse_bitwise() 2935 return self._parse_in(this) if self._match(TokenType.IN) else this 2936 2937 this = self._parse_table() 2938 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2939 using = self._match(TokenType.USING) and self._parse_csv( 2940 lambda: self._parse_alias(self._parse_function()) 2941 ) 2942 group = self._parse_group() 2943 return self.expression( 2944 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2945 ) 2946 2947 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2948 index = self._index 2949 include_nulls = None 2950 2951 if self._match(TokenType.PIVOT): 2952 unpivot = False 2953 elif self._match(TokenType.UNPIVOT): 2954 unpivot = True 2955 2956 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2957 if self._match_text_seq("INCLUDE", "NULLS"): 2958 include_nulls = True 2959 elif self._match_text_seq("EXCLUDE", "NULLS"): 2960 include_nulls = False 2961 else: 2962 return None 2963 2964 expressions = [] 2965 field = None 2966 2967 if not self._match(TokenType.L_PAREN): 2968 self._retreat(index) 2969 return None 2970 2971 if unpivot: 2972 expressions = self._parse_csv(self._parse_column) 2973 else: 2974 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2975 2976 if not expressions: 2977 self.raise_error("Failed to parse PIVOT's aggregation list") 2978 2979 if not self._match(TokenType.FOR): 2980 self.raise_error("Expecting FOR") 2981 2982 value = self._parse_column() 2983 2984 if not self._match(TokenType.IN): 2985 self.raise_error("Expecting IN") 2986 2987 field = self._parse_in(value, alias=True) 2988 2989 self._match_r_paren() 2990 2991 pivot = self.expression( 2992 exp.Pivot, 2993 expressions=expressions, 2994 field=field, 2995 unpivot=unpivot, 2996 include_nulls=include_nulls, 2997 ) 2998 2999 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3000 pivot.set("alias", self._parse_table_alias()) 3001 3002 if not unpivot: 3003 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3004 3005 columns: t.List[exp.Expression] = [] 3006 for fld in pivot.args["field"].expressions: 3007 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3008 for name in names: 3009 if self.PREFIXED_PIVOT_COLUMNS: 3010 name = f"{name}_{field_name}" if name else field_name 3011 else: 3012 name = f"{field_name}_{name}" if name else field_name 3013 3014 columns.append(exp.to_identifier(name)) 3015 3016 pivot.set("columns", columns) 3017 3018 return pivot 3019 3020 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3021 return [agg.alias for agg in aggregations] 3022 3023 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3024 if not skip_where_token and not self._match(TokenType.WHERE): 3025 return None 3026 3027 return self.expression( 3028 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3029 ) 3030 3031 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3032 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3033 return None 3034 3035 elements = defaultdict(list) 3036 3037 if self._match(TokenType.ALL): 3038 return self.expression(exp.Group, all=True) 3039 3040 while True: 3041 expressions = self._parse_csv(self._parse_conjunction) 3042 if expressions: 3043 elements["expressions"].extend(expressions) 3044 3045 grouping_sets = self._parse_grouping_sets() 3046 if grouping_sets: 3047 elements["grouping_sets"].extend(grouping_sets) 3048 3049 rollup = None 3050 cube = None 3051 totals = None 3052 3053 index = self._index 3054 with_ = self._match(TokenType.WITH) 3055 if self._match(TokenType.ROLLUP): 3056 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3057 elements["rollup"].extend(ensure_list(rollup)) 3058 3059 if self._match(TokenType.CUBE): 3060 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3061 elements["cube"].extend(ensure_list(cube)) 3062 3063 if self._match_text_seq("TOTALS"): 3064 totals = True 3065 elements["totals"] = True # type: ignore 3066 3067 if not (grouping_sets or rollup or cube or totals): 3068 if with_: 3069 self._retreat(index) 3070 break 3071 3072 return self.expression(exp.Group, **elements) # type: ignore 3073 3074 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3075 if not self._match(TokenType.GROUPING_SETS): 3076 return None 3077 3078 return self._parse_wrapped_csv(self._parse_grouping_set) 3079 3080 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3081 if self._match(TokenType.L_PAREN): 3082 grouping_set = self._parse_csv(self._parse_column) 3083 self._match_r_paren() 3084 return self.expression(exp.Tuple, expressions=grouping_set) 3085 3086 return self._parse_column() 3087 3088 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3089 if not skip_having_token and not self._match(TokenType.HAVING): 3090 return None 3091 return self.expression(exp.Having, this=self._parse_conjunction()) 3092 3093 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3094 if not self._match(TokenType.QUALIFY): 3095 return None 3096 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3097 3098 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3099 if skip_start_token: 3100 start = None 3101 elif self._match(TokenType.START_WITH): 3102 start = self._parse_conjunction() 3103 else: 3104 return None 3105 3106 self._match(TokenType.CONNECT_BY) 3107 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3108 exp.Prior, this=self._parse_bitwise() 3109 ) 3110 connect = self._parse_conjunction() 3111 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3112 3113 if not start and self._match(TokenType.START_WITH): 3114 start = self._parse_conjunction() 3115 3116 return self.expression(exp.Connect, start=start, connect=connect) 3117 3118 def _parse_order( 3119 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3120 ) -> t.Optional[exp.Expression]: 3121 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3122 return this 3123 3124 return self.expression( 3125 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3126 ) 3127 3128 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3129 if not self._match(token): 3130 return None 3131 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3132 3133 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3134 this = parse_method() if parse_method else self._parse_conjunction() 3135 3136 asc = self._match(TokenType.ASC) 3137 desc = self._match(TokenType.DESC) or (asc and False) 3138 3139 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3140 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3141 3142 nulls_first = is_nulls_first or False 3143 explicitly_null_ordered = is_nulls_first or is_nulls_last 3144 3145 if ( 3146 not explicitly_null_ordered 3147 and ( 3148 (not desc and self.NULL_ORDERING == "nulls_are_small") 3149 or (desc and self.NULL_ORDERING != "nulls_are_small") 3150 ) 3151 and self.NULL_ORDERING != "nulls_are_last" 3152 ): 3153 nulls_first = True 3154 3155 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3156 3157 def _parse_limit( 3158 self, this: t.Optional[exp.Expression] = None, top: bool = False 3159 ) -> t.Optional[exp.Expression]: 3160 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3161 comments = self._prev_comments 3162 if top: 3163 limit_paren = self._match(TokenType.L_PAREN) 3164 expression = self._parse_term() if limit_paren else self._parse_number() 3165 3166 if limit_paren: 3167 self._match_r_paren() 3168 else: 3169 expression = self._parse_term() 3170 3171 if self._match(TokenType.COMMA): 3172 offset = expression 3173 expression = self._parse_term() 3174 else: 3175 offset = None 3176 3177 limit_exp = self.expression( 3178 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3179 ) 3180 3181 return limit_exp 3182 3183 if self._match(TokenType.FETCH): 3184 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3185 direction = self._prev.text if direction else "FIRST" 3186 3187 count = self._parse_field(tokens=self.FETCH_TOKENS) 3188 percent = self._match(TokenType.PERCENT) 3189 3190 self._match_set((TokenType.ROW, TokenType.ROWS)) 3191 3192 only = self._match_text_seq("ONLY") 3193 with_ties = self._match_text_seq("WITH", "TIES") 3194 3195 if only and with_ties: 3196 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3197 3198 return self.expression( 3199 exp.Fetch, 3200 direction=direction, 3201 count=count, 3202 percent=percent, 3203 with_ties=with_ties, 3204 ) 3205 3206 return this 3207 3208 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3209 if not self._match(TokenType.OFFSET): 3210 return this 3211 3212 count = self._parse_term() 3213 self._match_set((TokenType.ROW, TokenType.ROWS)) 3214 return self.expression(exp.Offset, this=this, expression=count) 3215 3216 def _parse_locks(self) -> t.List[exp.Lock]: 3217 locks = [] 3218 while True: 3219 if self._match_text_seq("FOR", "UPDATE"): 3220 update = True 3221 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3222 "LOCK", "IN", "SHARE", "MODE" 3223 ): 3224 update = False 3225 else: 3226 break 3227 3228 expressions = None 3229 if self._match_text_seq("OF"): 3230 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3231 3232 wait: t.Optional[bool | exp.Expression] = None 3233 if self._match_text_seq("NOWAIT"): 3234 wait = True 3235 elif self._match_text_seq("WAIT"): 3236 wait = self._parse_primary() 3237 elif self._match_text_seq("SKIP", "LOCKED"): 3238 wait = False 3239 3240 locks.append( 3241 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3242 ) 3243 3244 return locks 3245 3246 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3247 if not self._match_set(self.SET_OPERATIONS): 3248 return this 3249 3250 token_type = self._prev.token_type 3251 3252 if token_type == TokenType.UNION: 3253 expression = exp.Union 3254 elif token_type == TokenType.EXCEPT: 3255 expression = exp.Except 3256 else: 3257 expression = exp.Intersect 3258 3259 return self.expression( 3260 expression, 3261 comments=self._prev.comments, 3262 this=this, 3263 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3264 by_name=self._match_text_seq("BY", "NAME"), 3265 expression=self._parse_set_operations(self._parse_select(nested=True)), 3266 ) 3267 3268 def _parse_expression(self) -> t.Optional[exp.Expression]: 3269 return self._parse_alias(self._parse_conjunction()) 3270 3271 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3272 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3273 3274 def _parse_equality(self) -> t.Optional[exp.Expression]: 3275 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3276 3277 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3278 return self._parse_tokens(self._parse_range, self.COMPARISON) 3279 3280 def _parse_range(self) -> t.Optional[exp.Expression]: 3281 this = self._parse_bitwise() 3282 negate = self._match(TokenType.NOT) 3283 3284 if self._match_set(self.RANGE_PARSERS): 3285 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3286 if not expression: 3287 return this 3288 3289 this = expression 3290 elif self._match(TokenType.ISNULL): 3291 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3292 3293 # Postgres supports ISNULL and NOTNULL for conditions. 3294 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3295 if self._match(TokenType.NOTNULL): 3296 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3297 this = self.expression(exp.Not, this=this) 3298 3299 if negate: 3300 this = self.expression(exp.Not, this=this) 3301 3302 if self._match(TokenType.IS): 3303 this = self._parse_is(this) 3304 3305 return this 3306 3307 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3308 index = self._index - 1 3309 negate = self._match(TokenType.NOT) 3310 3311 if self._match_text_seq("DISTINCT", "FROM"): 3312 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3313 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3314 3315 expression = self._parse_null() or self._parse_boolean() 3316 if not expression: 3317 self._retreat(index) 3318 return None 3319 3320 this = self.expression(exp.Is, this=this, expression=expression) 3321 return self.expression(exp.Not, this=this) if negate else this 3322 3323 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3324 unnest = self._parse_unnest(with_alias=False) 3325 if unnest: 3326 this = self.expression(exp.In, this=this, unnest=unnest) 3327 elif self._match(TokenType.L_PAREN): 3328 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3329 3330 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3331 this = self.expression(exp.In, this=this, query=expressions[0]) 3332 else: 3333 this = self.expression(exp.In, this=this, expressions=expressions) 3334 3335 self._match_r_paren(this) 3336 else: 3337 this = self.expression(exp.In, this=this, field=self._parse_field()) 3338 3339 return this 3340 3341 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3342 low = self._parse_bitwise() 3343 self._match(TokenType.AND) 3344 high = self._parse_bitwise() 3345 return self.expression(exp.Between, this=this, low=low, high=high) 3346 3347 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3348 if not self._match(TokenType.ESCAPE): 3349 return this 3350 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3351 3352 def _parse_interval(self) -> t.Optional[exp.Interval]: 3353 index = self._index 3354 3355 if not self._match(TokenType.INTERVAL): 3356 return None 3357 3358 if self._match(TokenType.STRING, advance=False): 3359 this = self._parse_primary() 3360 else: 3361 this = self._parse_term() 3362 3363 if not this: 3364 self._retreat(index) 3365 return None 3366 3367 unit = self._parse_function() or self._parse_var(any_token=True) 3368 3369 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3370 # each INTERVAL expression into this canonical form so it's easy to transpile 3371 if this and this.is_number: 3372 this = exp.Literal.string(this.name) 3373 elif this and this.is_string: 3374 parts = this.name.split() 3375 3376 if len(parts) == 2: 3377 if unit: 3378 # This is not actually a unit, it's something else (e.g. a "window side") 3379 unit = None 3380 self._retreat(self._index - 1) 3381 3382 this = exp.Literal.string(parts[0]) 3383 unit = self.expression(exp.Var, this=parts[1]) 3384 3385 return self.expression(exp.Interval, this=this, unit=unit) 3386 3387 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3388 this = self._parse_term() 3389 3390 while True: 3391 if self._match_set(self.BITWISE): 3392 this = self.expression( 3393 self.BITWISE[self._prev.token_type], 3394 this=this, 3395 expression=self._parse_term(), 3396 ) 3397 elif self._match(TokenType.DQMARK): 3398 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3399 elif self._match_pair(TokenType.LT, TokenType.LT): 3400 this = self.expression( 3401 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3402 ) 3403 elif self._match_pair(TokenType.GT, TokenType.GT): 3404 this = self.expression( 3405 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3406 ) 3407 else: 3408 break 3409 3410 return this 3411 3412 def _parse_term(self) -> t.Optional[exp.Expression]: 3413 return self._parse_tokens(self._parse_factor, self.TERM) 3414 3415 def _parse_factor(self) -> t.Optional[exp.Expression]: 3416 if self.EXPONENT: 3417 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3418 else: 3419 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3420 if isinstance(factor, exp.Div): 3421 factor.args["typed"] = self.TYPED_DIVISION 3422 factor.args["safe"] = self.SAFE_DIVISION 3423 return factor 3424 3425 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3426 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3427 3428 def _parse_unary(self) -> t.Optional[exp.Expression]: 3429 if self._match_set(self.UNARY_PARSERS): 3430 return self.UNARY_PARSERS[self._prev.token_type](self) 3431 return self._parse_at_time_zone(self._parse_type()) 3432 3433 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3434 interval = parse_interval and self._parse_interval() 3435 if interval: 3436 return interval 3437 3438 index = self._index 3439 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3440 this = self._parse_column() 3441 3442 if data_type: 3443 if isinstance(this, exp.Literal): 3444 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3445 if parser: 3446 return parser(self, this, data_type) 3447 return self.expression(exp.Cast, this=this, to=data_type) 3448 if not data_type.expressions: 3449 self._retreat(index) 3450 return self._parse_column() 3451 return self._parse_column_ops(data_type) 3452 3453 return this and self._parse_column_ops(this) 3454 3455 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3456 this = self._parse_type() 3457 if not this: 3458 return None 3459 3460 return self.expression( 3461 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3462 ) 3463 3464 def _parse_types( 3465 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3466 ) -> t.Optional[exp.Expression]: 3467 index = self._index 3468 3469 prefix = self._match_text_seq("SYSUDTLIB", ".") 3470 3471 if not self._match_set(self.TYPE_TOKENS): 3472 identifier = allow_identifiers and self._parse_id_var( 3473 any_token=False, tokens=(TokenType.VAR,) 3474 ) 3475 3476 if identifier: 3477 tokens = self._tokenizer.tokenize(identifier.name) 3478 3479 if len(tokens) != 1: 3480 self.raise_error("Unexpected identifier", self._prev) 3481 3482 if tokens[0].token_type in self.TYPE_TOKENS: 3483 self._prev = tokens[0] 3484 elif self.SUPPORTS_USER_DEFINED_TYPES: 3485 type_name = identifier.name 3486 3487 while self._match(TokenType.DOT): 3488 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3489 3490 return exp.DataType.build(type_name, udt=True) 3491 else: 3492 return None 3493 else: 3494 return None 3495 3496 type_token = self._prev.token_type 3497 3498 if type_token == TokenType.PSEUDO_TYPE: 3499 return self.expression(exp.PseudoType, this=self._prev.text) 3500 3501 if type_token == TokenType.OBJECT_IDENTIFIER: 3502 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3503 3504 nested = type_token in self.NESTED_TYPE_TOKENS 3505 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3506 expressions = None 3507 maybe_func = False 3508 3509 if self._match(TokenType.L_PAREN): 3510 if is_struct: 3511 expressions = self._parse_csv(self._parse_struct_types) 3512 elif nested: 3513 expressions = self._parse_csv( 3514 lambda: self._parse_types( 3515 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3516 ) 3517 ) 3518 elif type_token in self.ENUM_TYPE_TOKENS: 3519 expressions = self._parse_csv(self._parse_equality) 3520 else: 3521 expressions = self._parse_csv(self._parse_type_size) 3522 3523 if not expressions or not self._match(TokenType.R_PAREN): 3524 self._retreat(index) 3525 return None 3526 3527 maybe_func = True 3528 3529 this: t.Optional[exp.Expression] = None 3530 values: t.Optional[t.List[exp.Expression]] = None 3531 3532 if nested and self._match(TokenType.LT): 3533 if is_struct: 3534 expressions = self._parse_csv(self._parse_struct_types) 3535 else: 3536 expressions = self._parse_csv( 3537 lambda: self._parse_types( 3538 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3539 ) 3540 ) 3541 3542 if not self._match(TokenType.GT): 3543 self.raise_error("Expecting >") 3544 3545 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3546 values = self._parse_csv(self._parse_conjunction) 3547 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3548 3549 if type_token in self.TIMESTAMPS: 3550 if self._match_text_seq("WITH", "TIME", "ZONE"): 3551 maybe_func = False 3552 tz_type = ( 3553 exp.DataType.Type.TIMETZ 3554 if type_token in self.TIMES 3555 else exp.DataType.Type.TIMESTAMPTZ 3556 ) 3557 this = exp.DataType(this=tz_type, expressions=expressions) 3558 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3559 maybe_func = False 3560 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3561 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3562 maybe_func = False 3563 elif type_token == TokenType.INTERVAL: 3564 unit = self._parse_var() 3565 3566 if self._match_text_seq("TO"): 3567 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3568 else: 3569 span = None 3570 3571 if span or not unit: 3572 this = self.expression( 3573 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3574 ) 3575 else: 3576 this = self.expression(exp.Interval, unit=unit) 3577 3578 if maybe_func and check_func: 3579 index2 = self._index 3580 peek = self._parse_string() 3581 3582 if not peek: 3583 self._retreat(index) 3584 return None 3585 3586 self._retreat(index2) 3587 3588 if not this: 3589 if self._match_text_seq("UNSIGNED"): 3590 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3591 if not unsigned_type_token: 3592 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3593 3594 type_token = unsigned_type_token or type_token 3595 3596 this = exp.DataType( 3597 this=exp.DataType.Type[type_token.value], 3598 expressions=expressions, 3599 nested=nested, 3600 values=values, 3601 prefix=prefix, 3602 ) 3603 3604 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3605 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3606 3607 return this 3608 3609 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3610 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3611 self._match(TokenType.COLON) 3612 return self._parse_column_def(this) 3613 3614 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3615 if not self._match_text_seq("AT", "TIME", "ZONE"): 3616 return this 3617 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3618 3619 def _parse_column(self) -> t.Optional[exp.Expression]: 3620 this = self._parse_field() 3621 if isinstance(this, exp.Identifier): 3622 this = self.expression(exp.Column, this=this) 3623 elif not this: 3624 return self._parse_bracket(this) 3625 return self._parse_column_ops(this) 3626 3627 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3628 this = self._parse_bracket(this) 3629 3630 while self._match_set(self.COLUMN_OPERATORS): 3631 op_token = self._prev.token_type 3632 op = self.COLUMN_OPERATORS.get(op_token) 3633 3634 if op_token == TokenType.DCOLON: 3635 field = self._parse_types() 3636 if not field: 3637 self.raise_error("Expected type") 3638 elif op and self._curr: 3639 self._advance() 3640 value = self._prev.text 3641 field = ( 3642 exp.Literal.number(value) 3643 if self._prev.token_type == TokenType.NUMBER 3644 else exp.Literal.string(value) 3645 ) 3646 else: 3647 field = self._parse_field(anonymous_func=True, any_token=True) 3648 3649 if isinstance(field, exp.Func): 3650 # bigquery allows function calls like x.y.count(...) 3651 # SAFE.SUBSTR(...) 3652 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3653 this = self._replace_columns_with_dots(this) 3654 3655 if op: 3656 this = op(self, this, field) 3657 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3658 this = self.expression( 3659 exp.Column, 3660 this=field, 3661 table=this.this, 3662 db=this.args.get("table"), 3663 catalog=this.args.get("db"), 3664 ) 3665 else: 3666 this = self.expression(exp.Dot, this=this, expression=field) 3667 this = self._parse_bracket(this) 3668 return this 3669 3670 def _parse_primary(self) -> t.Optional[exp.Expression]: 3671 if self._match_set(self.PRIMARY_PARSERS): 3672 token_type = self._prev.token_type 3673 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3674 3675 if token_type == TokenType.STRING: 3676 expressions = [primary] 3677 while self._match(TokenType.STRING): 3678 expressions.append(exp.Literal.string(self._prev.text)) 3679 3680 if len(expressions) > 1: 3681 return self.expression(exp.Concat, expressions=expressions) 3682 3683 return primary 3684 3685 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3686 return exp.Literal.number(f"0.{self._prev.text}") 3687 3688 if self._match(TokenType.L_PAREN): 3689 comments = self._prev_comments 3690 query = self._parse_select() 3691 3692 if query: 3693 expressions = [query] 3694 else: 3695 expressions = self._parse_expressions() 3696 3697 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3698 3699 if isinstance(this, exp.Subqueryable): 3700 this = self._parse_set_operations( 3701 self._parse_subquery(this=this, parse_alias=False) 3702 ) 3703 elif len(expressions) > 1: 3704 this = self.expression(exp.Tuple, expressions=expressions) 3705 else: 3706 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3707 3708 if this: 3709 this.add_comments(comments) 3710 3711 self._match_r_paren(expression=this) 3712 return this 3713 3714 return None 3715 3716 def _parse_field( 3717 self, 3718 any_token: bool = False, 3719 tokens: t.Optional[t.Collection[TokenType]] = None, 3720 anonymous_func: bool = False, 3721 ) -> t.Optional[exp.Expression]: 3722 return ( 3723 self._parse_primary() 3724 or self._parse_function(anonymous=anonymous_func) 3725 or self._parse_id_var(any_token=any_token, tokens=tokens) 3726 ) 3727 3728 def _parse_function( 3729 self, 3730 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3731 anonymous: bool = False, 3732 optional_parens: bool = True, 3733 ) -> t.Optional[exp.Expression]: 3734 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3735 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3736 fn_syntax = False 3737 if ( 3738 self._match(TokenType.L_BRACE, advance=False) 3739 and self._next 3740 and self._next.text.upper() == "FN" 3741 ): 3742 self._advance(2) 3743 fn_syntax = True 3744 3745 func = self._parse_function_call( 3746 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3747 ) 3748 3749 if fn_syntax: 3750 self._match(TokenType.R_BRACE) 3751 3752 return func 3753 3754 def _parse_function_call( 3755 self, 3756 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3757 anonymous: bool = False, 3758 optional_parens: bool = True, 3759 ) -> t.Optional[exp.Expression]: 3760 if not self._curr: 3761 return None 3762 3763 token_type = self._curr.token_type 3764 this = self._curr.text 3765 upper = this.upper() 3766 3767 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3768 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3769 self._advance() 3770 return parser(self) 3771 3772 if not self._next or self._next.token_type != TokenType.L_PAREN: 3773 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3774 self._advance() 3775 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3776 3777 return None 3778 3779 if token_type not in self.FUNC_TOKENS: 3780 return None 3781 3782 self._advance(2) 3783 3784 parser = self.FUNCTION_PARSERS.get(upper) 3785 if parser and not anonymous: 3786 this = parser(self) 3787 else: 3788 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3789 3790 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3791 this = self.expression(subquery_predicate, this=self._parse_select()) 3792 self._match_r_paren() 3793 return this 3794 3795 if functions is None: 3796 functions = self.FUNCTIONS 3797 3798 function = functions.get(upper) 3799 3800 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3801 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3802 3803 if function and not anonymous: 3804 func = self.validate_expression(function(args), args) 3805 if not self.NORMALIZE_FUNCTIONS: 3806 func.meta["name"] = this 3807 this = func 3808 else: 3809 this = self.expression(exp.Anonymous, this=this, expressions=args) 3810 3811 self._match_r_paren(this) 3812 return self._parse_window(this) 3813 3814 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3815 return self._parse_column_def(self._parse_id_var()) 3816 3817 def _parse_user_defined_function( 3818 self, kind: t.Optional[TokenType] = None 3819 ) -> t.Optional[exp.Expression]: 3820 this = self._parse_id_var() 3821 3822 while self._match(TokenType.DOT): 3823 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3824 3825 if not self._match(TokenType.L_PAREN): 3826 return this 3827 3828 expressions = self._parse_csv(self._parse_function_parameter) 3829 self._match_r_paren() 3830 return self.expression( 3831 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3832 ) 3833 3834 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3835 literal = self._parse_primary() 3836 if literal: 3837 return self.expression(exp.Introducer, this=token.text, expression=literal) 3838 3839 return self.expression(exp.Identifier, this=token.text) 3840 3841 def _parse_session_parameter(self) -> exp.SessionParameter: 3842 kind = None 3843 this = self._parse_id_var() or self._parse_primary() 3844 3845 if this and self._match(TokenType.DOT): 3846 kind = this.name 3847 this = self._parse_var() or self._parse_primary() 3848 3849 return self.expression(exp.SessionParameter, this=this, kind=kind) 3850 3851 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3852 index = self._index 3853 3854 if self._match(TokenType.L_PAREN): 3855 expressions = t.cast( 3856 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3857 ) 3858 3859 if not self._match(TokenType.R_PAREN): 3860 self._retreat(index) 3861 else: 3862 expressions = [self._parse_id_var()] 3863 3864 if self._match_set(self.LAMBDAS): 3865 return self.LAMBDAS[self._prev.token_type](self, expressions) 3866 3867 self._retreat(index) 3868 3869 this: t.Optional[exp.Expression] 3870 3871 if self._match(TokenType.DISTINCT): 3872 this = self.expression( 3873 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3874 ) 3875 else: 3876 this = self._parse_select_or_expression(alias=alias) 3877 3878 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3879 3880 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3881 index = self._index 3882 3883 if not self.errors: 3884 try: 3885 if self._parse_select(nested=True): 3886 return this 3887 except ParseError: 3888 pass 3889 finally: 3890 self.errors.clear() 3891 self._retreat(index) 3892 3893 if not self._match(TokenType.L_PAREN): 3894 return this 3895 3896 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3897 3898 self._match_r_paren() 3899 return self.expression(exp.Schema, this=this, expressions=args) 3900 3901 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3902 return self._parse_column_def(self._parse_field(any_token=True)) 3903 3904 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3905 # column defs are not really columns, they're identifiers 3906 if isinstance(this, exp.Column): 3907 this = this.this 3908 3909 kind = self._parse_types(schema=True) 3910 3911 if self._match_text_seq("FOR", "ORDINALITY"): 3912 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3913 3914 constraints: t.List[exp.Expression] = [] 3915 3916 if not kind and self._match(TokenType.ALIAS): 3917 constraints.append( 3918 self.expression( 3919 exp.ComputedColumnConstraint, 3920 this=self._parse_conjunction(), 3921 persisted=self._match_text_seq("PERSISTED"), 3922 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3923 ) 3924 ) 3925 3926 while True: 3927 constraint = self._parse_column_constraint() 3928 if not constraint: 3929 break 3930 constraints.append(constraint) 3931 3932 if not kind and not constraints: 3933 return this 3934 3935 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3936 3937 def _parse_auto_increment( 3938 self, 3939 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3940 start = None 3941 increment = None 3942 3943 if self._match(TokenType.L_PAREN, advance=False): 3944 args = self._parse_wrapped_csv(self._parse_bitwise) 3945 start = seq_get(args, 0) 3946 increment = seq_get(args, 1) 3947 elif self._match_text_seq("START"): 3948 start = self._parse_bitwise() 3949 self._match_text_seq("INCREMENT") 3950 increment = self._parse_bitwise() 3951 3952 if start and increment: 3953 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3954 3955 return exp.AutoIncrementColumnConstraint() 3956 3957 def _parse_compress(self) -> exp.CompressColumnConstraint: 3958 if self._match(TokenType.L_PAREN, advance=False): 3959 return self.expression( 3960 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3961 ) 3962 3963 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3964 3965 def _parse_generated_as_identity( 3966 self, 3967 ) -> ( 3968 exp.GeneratedAsIdentityColumnConstraint 3969 | exp.ComputedColumnConstraint 3970 | exp.GeneratedAsRowColumnConstraint 3971 ): 3972 if self._match_text_seq("BY", "DEFAULT"): 3973 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3974 this = self.expression( 3975 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3976 ) 3977 else: 3978 self._match_text_seq("ALWAYS") 3979 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3980 3981 self._match(TokenType.ALIAS) 3982 3983 if self._match_text_seq("ROW"): 3984 start = self._match_text_seq("START") 3985 if not start: 3986 self._match(TokenType.END) 3987 hidden = self._match_text_seq("HIDDEN") 3988 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 3989 3990 identity = self._match_text_seq("IDENTITY") 3991 3992 if self._match(TokenType.L_PAREN): 3993 if self._match(TokenType.START_WITH): 3994 this.set("start", self._parse_bitwise()) 3995 if self._match_text_seq("INCREMENT", "BY"): 3996 this.set("increment", self._parse_bitwise()) 3997 if self._match_text_seq("MINVALUE"): 3998 this.set("minvalue", self._parse_bitwise()) 3999 if self._match_text_seq("MAXVALUE"): 4000 this.set("maxvalue", self._parse_bitwise()) 4001 4002 if self._match_text_seq("CYCLE"): 4003 this.set("cycle", True) 4004 elif self._match_text_seq("NO", "CYCLE"): 4005 this.set("cycle", False) 4006 4007 if not identity: 4008 this.set("expression", self._parse_bitwise()) 4009 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4010 args = self._parse_csv(self._parse_bitwise) 4011 this.set("start", seq_get(args, 0)) 4012 this.set("increment", seq_get(args, 1)) 4013 4014 self._match_r_paren() 4015 4016 return this 4017 4018 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4019 self._match_text_seq("LENGTH") 4020 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4021 4022 def _parse_not_constraint( 4023 self, 4024 ) -> t.Optional[exp.Expression]: 4025 if self._match_text_seq("NULL"): 4026 return self.expression(exp.NotNullColumnConstraint) 4027 if self._match_text_seq("CASESPECIFIC"): 4028 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4029 if self._match_text_seq("FOR", "REPLICATION"): 4030 return self.expression(exp.NotForReplicationColumnConstraint) 4031 return None 4032 4033 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4034 if self._match(TokenType.CONSTRAINT): 4035 this = self._parse_id_var() 4036 else: 4037 this = None 4038 4039 if self._match_texts(self.CONSTRAINT_PARSERS): 4040 return self.expression( 4041 exp.ColumnConstraint, 4042 this=this, 4043 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4044 ) 4045 4046 return this 4047 4048 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4049 if not self._match(TokenType.CONSTRAINT): 4050 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4051 4052 this = self._parse_id_var() 4053 expressions = [] 4054 4055 while True: 4056 constraint = self._parse_unnamed_constraint() or self._parse_function() 4057 if not constraint: 4058 break 4059 expressions.append(constraint) 4060 4061 return self.expression(exp.Constraint, this=this, expressions=expressions) 4062 4063 def _parse_unnamed_constraint( 4064 self, constraints: t.Optional[t.Collection[str]] = None 4065 ) -> t.Optional[exp.Expression]: 4066 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4067 constraints or self.CONSTRAINT_PARSERS 4068 ): 4069 return None 4070 4071 constraint = self._prev.text.upper() 4072 if constraint not in self.CONSTRAINT_PARSERS: 4073 self.raise_error(f"No parser found for schema constraint {constraint}.") 4074 4075 return self.CONSTRAINT_PARSERS[constraint](self) 4076 4077 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4078 self._match_text_seq("KEY") 4079 return self.expression( 4080 exp.UniqueColumnConstraint, 4081 this=self._parse_schema(self._parse_id_var(any_token=False)), 4082 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4083 ) 4084 4085 def _parse_key_constraint_options(self) -> t.List[str]: 4086 options = [] 4087 while True: 4088 if not self._curr: 4089 break 4090 4091 if self._match(TokenType.ON): 4092 action = None 4093 on = self._advance_any() and self._prev.text 4094 4095 if self._match_text_seq("NO", "ACTION"): 4096 action = "NO ACTION" 4097 elif self._match_text_seq("CASCADE"): 4098 action = "CASCADE" 4099 elif self._match_text_seq("RESTRICT"): 4100 action = "RESTRICT" 4101 elif self._match_pair(TokenType.SET, TokenType.NULL): 4102 action = "SET NULL" 4103 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4104 action = "SET DEFAULT" 4105 else: 4106 self.raise_error("Invalid key constraint") 4107 4108 options.append(f"ON {on} {action}") 4109 elif self._match_text_seq("NOT", "ENFORCED"): 4110 options.append("NOT ENFORCED") 4111 elif self._match_text_seq("DEFERRABLE"): 4112 options.append("DEFERRABLE") 4113 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4114 options.append("INITIALLY DEFERRED") 4115 elif self._match_text_seq("NORELY"): 4116 options.append("NORELY") 4117 elif self._match_text_seq("MATCH", "FULL"): 4118 options.append("MATCH FULL") 4119 else: 4120 break 4121 4122 return options 4123 4124 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4125 if match and not self._match(TokenType.REFERENCES): 4126 return None 4127 4128 expressions = None 4129 this = self._parse_table(schema=True) 4130 options = self._parse_key_constraint_options() 4131 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4132 4133 def _parse_foreign_key(self) -> exp.ForeignKey: 4134 expressions = self._parse_wrapped_id_vars() 4135 reference = self._parse_references() 4136 options = {} 4137 4138 while self._match(TokenType.ON): 4139 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4140 self.raise_error("Expected DELETE or UPDATE") 4141 4142 kind = self._prev.text.lower() 4143 4144 if self._match_text_seq("NO", "ACTION"): 4145 action = "NO ACTION" 4146 elif self._match(TokenType.SET): 4147 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4148 action = "SET " + self._prev.text.upper() 4149 else: 4150 self._advance() 4151 action = self._prev.text.upper() 4152 4153 options[kind] = action 4154 4155 return self.expression( 4156 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4157 ) 4158 4159 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4160 return self._parse_field() 4161 4162 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4163 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4164 4165 id_vars = self._parse_wrapped_id_vars() 4166 return self.expression( 4167 exp.PeriodForSystemTimeConstraint, 4168 this=seq_get(id_vars, 0), 4169 expression=seq_get(id_vars, 1), 4170 ) 4171 4172 def _parse_primary_key( 4173 self, wrapped_optional: bool = False, in_props: bool = False 4174 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4175 desc = ( 4176 self._match_set((TokenType.ASC, TokenType.DESC)) 4177 and self._prev.token_type == TokenType.DESC 4178 ) 4179 4180 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4181 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4182 4183 expressions = self._parse_wrapped_csv( 4184 self._parse_primary_key_part, optional=wrapped_optional 4185 ) 4186 options = self._parse_key_constraint_options() 4187 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4188 4189 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4190 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4191 return this 4192 4193 bracket_kind = self._prev.token_type 4194 4195 if self._match(TokenType.COLON): 4196 expressions: t.List[exp.Expression] = [ 4197 self.expression(exp.Slice, expression=self._parse_conjunction()) 4198 ] 4199 else: 4200 expressions = self._parse_csv( 4201 lambda: self._parse_slice( 4202 self._parse_alias(self._parse_conjunction(), explicit=True) 4203 ) 4204 ) 4205 4206 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4207 self.raise_error("Expected ]") 4208 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4209 self.raise_error("Expected }") 4210 4211 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4212 if bracket_kind == TokenType.L_BRACE: 4213 this = self.expression(exp.Struct, expressions=expressions) 4214 elif not this or this.name.upper() == "ARRAY": 4215 this = self.expression(exp.Array, expressions=expressions) 4216 else: 4217 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4218 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4219 4220 self._add_comments(this) 4221 return self._parse_bracket(this) 4222 4223 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4224 if self._match(TokenType.COLON): 4225 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4226 return this 4227 4228 def _parse_case(self) -> t.Optional[exp.Expression]: 4229 ifs = [] 4230 default = None 4231 4232 comments = self._prev_comments 4233 expression = self._parse_conjunction() 4234 4235 while self._match(TokenType.WHEN): 4236 this = self._parse_conjunction() 4237 self._match(TokenType.THEN) 4238 then = self._parse_conjunction() 4239 ifs.append(self.expression(exp.If, this=this, true=then)) 4240 4241 if self._match(TokenType.ELSE): 4242 default = self._parse_conjunction() 4243 4244 if not self._match(TokenType.END): 4245 self.raise_error("Expected END after CASE", self._prev) 4246 4247 return self._parse_window( 4248 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4249 ) 4250 4251 def _parse_if(self) -> t.Optional[exp.Expression]: 4252 if self._match(TokenType.L_PAREN): 4253 args = self._parse_csv(self._parse_conjunction) 4254 this = self.validate_expression(exp.If.from_arg_list(args), args) 4255 self._match_r_paren() 4256 else: 4257 index = self._index - 1 4258 condition = self._parse_conjunction() 4259 4260 if not condition: 4261 self._retreat(index) 4262 return None 4263 4264 self._match(TokenType.THEN) 4265 true = self._parse_conjunction() 4266 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4267 self._match(TokenType.END) 4268 this = self.expression(exp.If, this=condition, true=true, false=false) 4269 4270 return self._parse_window(this) 4271 4272 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4273 if not self._match_text_seq("VALUE", "FOR"): 4274 self._retreat(self._index - 1) 4275 return None 4276 4277 return self.expression( 4278 exp.NextValueFor, 4279 this=self._parse_column(), 4280 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4281 ) 4282 4283 def _parse_extract(self) -> exp.Extract: 4284 this = self._parse_function() or self._parse_var() or self._parse_type() 4285 4286 if self._match(TokenType.FROM): 4287 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4288 4289 if not self._match(TokenType.COMMA): 4290 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4291 4292 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4293 4294 def _parse_any_value(self) -> exp.AnyValue: 4295 this = self._parse_lambda() 4296 is_max = None 4297 having = None 4298 4299 if self._match(TokenType.HAVING): 4300 self._match_texts(("MAX", "MIN")) 4301 is_max = self._prev.text == "MAX" 4302 having = self._parse_column() 4303 4304 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4305 4306 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4307 this = self._parse_conjunction() 4308 4309 if not self._match(TokenType.ALIAS): 4310 if self._match(TokenType.COMMA): 4311 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4312 4313 self.raise_error("Expected AS after CAST") 4314 4315 fmt = None 4316 to = self._parse_types() 4317 4318 if self._match(TokenType.FORMAT): 4319 fmt_string = self._parse_string() 4320 fmt = self._parse_at_time_zone(fmt_string) 4321 4322 if not to: 4323 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4324 if to.this in exp.DataType.TEMPORAL_TYPES: 4325 this = self.expression( 4326 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4327 this=this, 4328 format=exp.Literal.string( 4329 format_time( 4330 fmt_string.this if fmt_string else "", 4331 self.FORMAT_MAPPING or self.TIME_MAPPING, 4332 self.FORMAT_TRIE or self.TIME_TRIE, 4333 ) 4334 ), 4335 ) 4336 4337 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4338 this.set("zone", fmt.args["zone"]) 4339 return this 4340 elif not to: 4341 self.raise_error("Expected TYPE after CAST") 4342 elif isinstance(to, exp.Identifier): 4343 to = exp.DataType.build(to.name, udt=True) 4344 elif to.this == exp.DataType.Type.CHAR: 4345 if self._match(TokenType.CHARACTER_SET): 4346 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4347 4348 return self.expression( 4349 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4350 ) 4351 4352 def _parse_concat(self) -> t.Optional[exp.Expression]: 4353 args = self._parse_csv(self._parse_conjunction) 4354 if self.CONCAT_NULL_OUTPUTS_STRING: 4355 args = self._ensure_string_if_null(args) 4356 4357 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4358 # we find such a call we replace it with its argument. 4359 if len(args) == 1: 4360 return args[0] 4361 4362 return self.expression( 4363 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4364 ) 4365 4366 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4367 args = self._parse_csv(self._parse_conjunction) 4368 if len(args) < 2: 4369 return self.expression(exp.ConcatWs, expressions=args) 4370 delim, *values = args 4371 if self.CONCAT_NULL_OUTPUTS_STRING: 4372 values = self._ensure_string_if_null(values) 4373 4374 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4375 4376 def _parse_string_agg(self) -> exp.Expression: 4377 if self._match(TokenType.DISTINCT): 4378 args: t.List[t.Optional[exp.Expression]] = [ 4379 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4380 ] 4381 if self._match(TokenType.COMMA): 4382 args.extend(self._parse_csv(self._parse_conjunction)) 4383 else: 4384 args = self._parse_csv(self._parse_conjunction) # type: ignore 4385 4386 index = self._index 4387 if not self._match(TokenType.R_PAREN) and args: 4388 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4389 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4390 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4391 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4392 4393 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4394 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4395 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4396 if not self._match_text_seq("WITHIN", "GROUP"): 4397 self._retreat(index) 4398 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4399 4400 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4401 order = self._parse_order(this=seq_get(args, 0)) 4402 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4403 4404 def _parse_convert( 4405 self, strict: bool, safe: t.Optional[bool] = None 4406 ) -> t.Optional[exp.Expression]: 4407 this = self._parse_bitwise() 4408 4409 if self._match(TokenType.USING): 4410 to: t.Optional[exp.Expression] = self.expression( 4411 exp.CharacterSet, this=self._parse_var() 4412 ) 4413 elif self._match(TokenType.COMMA): 4414 to = self._parse_types() 4415 else: 4416 to = None 4417 4418 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4419 4420 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4421 """ 4422 There are generally two variants of the DECODE function: 4423 4424 - DECODE(bin, charset) 4425 - DECODE(expression, search, result [, search, result] ... [, default]) 4426 4427 The second variant will always be parsed into a CASE expression. Note that NULL 4428 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4429 instead of relying on pattern matching. 4430 """ 4431 args = self._parse_csv(self._parse_conjunction) 4432 4433 if len(args) < 3: 4434 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4435 4436 expression, *expressions = args 4437 if not expression: 4438 return None 4439 4440 ifs = [] 4441 for search, result in zip(expressions[::2], expressions[1::2]): 4442 if not search or not result: 4443 return None 4444 4445 if isinstance(search, exp.Literal): 4446 ifs.append( 4447 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4448 ) 4449 elif isinstance(search, exp.Null): 4450 ifs.append( 4451 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4452 ) 4453 else: 4454 cond = exp.or_( 4455 exp.EQ(this=expression.copy(), expression=search), 4456 exp.and_( 4457 exp.Is(this=expression.copy(), expression=exp.Null()), 4458 exp.Is(this=search.copy(), expression=exp.Null()), 4459 copy=False, 4460 ), 4461 copy=False, 4462 ) 4463 ifs.append(exp.If(this=cond, true=result)) 4464 4465 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4466 4467 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4468 self._match_text_seq("KEY") 4469 key = self._parse_column() 4470 self._match_set((TokenType.COLON, TokenType.COMMA)) 4471 self._match_text_seq("VALUE") 4472 value = self._parse_bitwise() 4473 4474 if not key and not value: 4475 return None 4476 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4477 4478 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4479 if not this or not self._match_text_seq("FORMAT", "JSON"): 4480 return this 4481 4482 return self.expression(exp.FormatJson, this=this) 4483 4484 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4485 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4486 for value in values: 4487 if self._match_text_seq(value, "ON", on): 4488 return f"{value} ON {on}" 4489 4490 return None 4491 4492 def _parse_json_object(self) -> exp.JSONObject: 4493 star = self._parse_star() 4494 expressions = ( 4495 [star] 4496 if star 4497 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4498 ) 4499 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4500 4501 unique_keys = None 4502 if self._match_text_seq("WITH", "UNIQUE"): 4503 unique_keys = True 4504 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4505 unique_keys = False 4506 4507 self._match_text_seq("KEYS") 4508 4509 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4510 self._parse_type() 4511 ) 4512 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4513 4514 return self.expression( 4515 exp.JSONObject, 4516 expressions=expressions, 4517 null_handling=null_handling, 4518 unique_keys=unique_keys, 4519 return_type=return_type, 4520 encoding=encoding, 4521 ) 4522 4523 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4524 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4525 if not self._match_text_seq("NESTED"): 4526 this = self._parse_id_var() 4527 kind = self._parse_types(allow_identifiers=False) 4528 nested = None 4529 else: 4530 this = None 4531 kind = None 4532 nested = True 4533 4534 path = self._match_text_seq("PATH") and self._parse_string() 4535 nested_schema = nested and self._parse_json_schema() 4536 4537 return self.expression( 4538 exp.JSONColumnDef, 4539 this=this, 4540 kind=kind, 4541 path=path, 4542 nested_schema=nested_schema, 4543 ) 4544 4545 def _parse_json_schema(self) -> exp.JSONSchema: 4546 self._match_text_seq("COLUMNS") 4547 return self.expression( 4548 exp.JSONSchema, 4549 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4550 ) 4551 4552 def _parse_json_table(self) -> exp.JSONTable: 4553 this = self._parse_format_json(self._parse_bitwise()) 4554 path = self._match(TokenType.COMMA) and self._parse_string() 4555 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4556 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4557 schema = self._parse_json_schema() 4558 4559 return exp.JSONTable( 4560 this=this, 4561 schema=schema, 4562 path=path, 4563 error_handling=error_handling, 4564 empty_handling=empty_handling, 4565 ) 4566 4567 def _parse_logarithm(self) -> exp.Func: 4568 # Default argument order is base, expression 4569 args = self._parse_csv(self._parse_range) 4570 4571 if len(args) > 1: 4572 if not self.LOG_BASE_FIRST: 4573 args.reverse() 4574 return exp.Log.from_arg_list(args) 4575 4576 return self.expression( 4577 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4578 ) 4579 4580 def _parse_match_against(self) -> exp.MatchAgainst: 4581 expressions = self._parse_csv(self._parse_column) 4582 4583 self._match_text_seq(")", "AGAINST", "(") 4584 4585 this = self._parse_string() 4586 4587 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4588 modifier = "IN NATURAL LANGUAGE MODE" 4589 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4590 modifier = f"{modifier} WITH QUERY EXPANSION" 4591 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4592 modifier = "IN BOOLEAN MODE" 4593 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4594 modifier = "WITH QUERY EXPANSION" 4595 else: 4596 modifier = None 4597 4598 return self.expression( 4599 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4600 ) 4601 4602 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4603 def _parse_open_json(self) -> exp.OpenJSON: 4604 this = self._parse_bitwise() 4605 path = self._match(TokenType.COMMA) and self._parse_string() 4606 4607 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4608 this = self._parse_field(any_token=True) 4609 kind = self._parse_types() 4610 path = self._parse_string() 4611 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4612 4613 return self.expression( 4614 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4615 ) 4616 4617 expressions = None 4618 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4619 self._match_l_paren() 4620 expressions = self._parse_csv(_parse_open_json_column_def) 4621 4622 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4623 4624 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4625 args = self._parse_csv(self._parse_bitwise) 4626 4627 if self._match(TokenType.IN): 4628 return self.expression( 4629 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4630 ) 4631 4632 if haystack_first: 4633 haystack = seq_get(args, 0) 4634 needle = seq_get(args, 1) 4635 else: 4636 needle = seq_get(args, 0) 4637 haystack = seq_get(args, 1) 4638 4639 return self.expression( 4640 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4641 ) 4642 4643 def _parse_predict(self) -> exp.Predict: 4644 self._match_text_seq("MODEL") 4645 this = self._parse_table() 4646 4647 self._match(TokenType.COMMA) 4648 self._match_text_seq("TABLE") 4649 4650 return self.expression( 4651 exp.Predict, 4652 this=this, 4653 expression=self._parse_table(), 4654 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4655 ) 4656 4657 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4658 args = self._parse_csv(self._parse_table) 4659 return exp.JoinHint(this=func_name.upper(), expressions=args) 4660 4661 def _parse_substring(self) -> exp.Substring: 4662 # Postgres supports the form: substring(string [from int] [for int]) 4663 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4664 4665 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4666 4667 if self._match(TokenType.FROM): 4668 args.append(self._parse_bitwise()) 4669 if self._match(TokenType.FOR): 4670 args.append(self._parse_bitwise()) 4671 4672 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4673 4674 def _parse_trim(self) -> exp.Trim: 4675 # https://www.w3resource.com/sql/character-functions/trim.php 4676 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4677 4678 position = None 4679 collation = None 4680 expression = None 4681 4682 if self._match_texts(self.TRIM_TYPES): 4683 position = self._prev.text.upper() 4684 4685 this = self._parse_bitwise() 4686 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4687 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4688 expression = self._parse_bitwise() 4689 4690 if invert_order: 4691 this, expression = expression, this 4692 4693 if self._match(TokenType.COLLATE): 4694 collation = self._parse_bitwise() 4695 4696 return self.expression( 4697 exp.Trim, this=this, position=position, expression=expression, collation=collation 4698 ) 4699 4700 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4701 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4702 4703 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4704 return self._parse_window(self._parse_id_var(), alias=True) 4705 4706 def _parse_respect_or_ignore_nulls( 4707 self, this: t.Optional[exp.Expression] 4708 ) -> t.Optional[exp.Expression]: 4709 if self._match_text_seq("IGNORE", "NULLS"): 4710 return self.expression(exp.IgnoreNulls, this=this) 4711 if self._match_text_seq("RESPECT", "NULLS"): 4712 return self.expression(exp.RespectNulls, this=this) 4713 return this 4714 4715 def _parse_window( 4716 self, this: t.Optional[exp.Expression], alias: bool = False 4717 ) -> t.Optional[exp.Expression]: 4718 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4719 self._match(TokenType.WHERE) 4720 this = self.expression( 4721 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4722 ) 4723 self._match_r_paren() 4724 4725 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4726 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4727 if self._match_text_seq("WITHIN", "GROUP"): 4728 order = self._parse_wrapped(self._parse_order) 4729 this = self.expression(exp.WithinGroup, this=this, expression=order) 4730 4731 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4732 # Some dialects choose to implement and some do not. 4733 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4734 4735 # There is some code above in _parse_lambda that handles 4736 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4737 4738 # The below changes handle 4739 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4740 4741 # Oracle allows both formats 4742 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4743 # and Snowflake chose to do the same for familiarity 4744 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4745 this = self._parse_respect_or_ignore_nulls(this) 4746 4747 # bigquery select from window x AS (partition by ...) 4748 if alias: 4749 over = None 4750 self._match(TokenType.ALIAS) 4751 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4752 return this 4753 else: 4754 over = self._prev.text.upper() 4755 4756 if not self._match(TokenType.L_PAREN): 4757 return self.expression( 4758 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4759 ) 4760 4761 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4762 4763 first = self._match(TokenType.FIRST) 4764 if self._match_text_seq("LAST"): 4765 first = False 4766 4767 partition, order = self._parse_partition_and_order() 4768 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4769 4770 if kind: 4771 self._match(TokenType.BETWEEN) 4772 start = self._parse_window_spec() 4773 self._match(TokenType.AND) 4774 end = self._parse_window_spec() 4775 4776 spec = self.expression( 4777 exp.WindowSpec, 4778 kind=kind, 4779 start=start["value"], 4780 start_side=start["side"], 4781 end=end["value"], 4782 end_side=end["side"], 4783 ) 4784 else: 4785 spec = None 4786 4787 self._match_r_paren() 4788 4789 window = self.expression( 4790 exp.Window, 4791 this=this, 4792 partition_by=partition, 4793 order=order, 4794 spec=spec, 4795 alias=window_alias, 4796 over=over, 4797 first=first, 4798 ) 4799 4800 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4801 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4802 return self._parse_window(window, alias=alias) 4803 4804 return window 4805 4806 def _parse_partition_and_order( 4807 self, 4808 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4809 return self._parse_partition_by(), self._parse_order() 4810 4811 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4812 self._match(TokenType.BETWEEN) 4813 4814 return { 4815 "value": ( 4816 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4817 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4818 or self._parse_bitwise() 4819 ), 4820 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4821 } 4822 4823 def _parse_alias( 4824 self, this: t.Optional[exp.Expression], explicit: bool = False 4825 ) -> t.Optional[exp.Expression]: 4826 any_token = self._match(TokenType.ALIAS) 4827 4828 if explicit and not any_token: 4829 return this 4830 4831 if self._match(TokenType.L_PAREN): 4832 aliases = self.expression( 4833 exp.Aliases, 4834 this=this, 4835 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4836 ) 4837 self._match_r_paren(aliases) 4838 return aliases 4839 4840 alias = self._parse_id_var(any_token) 4841 4842 if alias: 4843 return self.expression(exp.Alias, this=this, alias=alias) 4844 4845 return this 4846 4847 def _parse_id_var( 4848 self, 4849 any_token: bool = True, 4850 tokens: t.Optional[t.Collection[TokenType]] = None, 4851 ) -> t.Optional[exp.Expression]: 4852 identifier = self._parse_identifier() 4853 4854 if identifier: 4855 return identifier 4856 4857 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4858 quoted = self._prev.token_type == TokenType.STRING 4859 return exp.Identifier(this=self._prev.text, quoted=quoted) 4860 4861 return None 4862 4863 def _parse_string(self) -> t.Optional[exp.Expression]: 4864 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4865 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4866 return self._parse_placeholder() 4867 4868 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4869 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4870 4871 def _parse_number(self) -> t.Optional[exp.Expression]: 4872 if self._match(TokenType.NUMBER): 4873 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4874 return self._parse_placeholder() 4875 4876 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4877 if self._match(TokenType.IDENTIFIER): 4878 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4879 return self._parse_placeholder() 4880 4881 def _parse_var( 4882 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4883 ) -> t.Optional[exp.Expression]: 4884 if ( 4885 (any_token and self._advance_any()) 4886 or self._match(TokenType.VAR) 4887 or (self._match_set(tokens) if tokens else False) 4888 ): 4889 return self.expression(exp.Var, this=self._prev.text) 4890 return self._parse_placeholder() 4891 4892 def _advance_any(self) -> t.Optional[Token]: 4893 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4894 self._advance() 4895 return self._prev 4896 return None 4897 4898 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4899 return self._parse_var() or self._parse_string() 4900 4901 def _parse_null(self) -> t.Optional[exp.Expression]: 4902 if self._match_set(self.NULL_TOKENS): 4903 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4904 return self._parse_placeholder() 4905 4906 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4907 if self._match(TokenType.TRUE): 4908 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4909 if self._match(TokenType.FALSE): 4910 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4911 return self._parse_placeholder() 4912 4913 def _parse_star(self) -> t.Optional[exp.Expression]: 4914 if self._match(TokenType.STAR): 4915 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4916 return self._parse_placeholder() 4917 4918 def _parse_parameter(self) -> exp.Parameter: 4919 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4920 return ( 4921 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4922 ) 4923 4924 self._match(TokenType.L_BRACE) 4925 this = _parse_parameter_part() 4926 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4927 self._match(TokenType.R_BRACE) 4928 4929 return self.expression(exp.Parameter, this=this, expression=expression) 4930 4931 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4932 if self._match_set(self.PLACEHOLDER_PARSERS): 4933 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4934 if placeholder: 4935 return placeholder 4936 self._advance(-1) 4937 return None 4938 4939 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4940 if not self._match(TokenType.EXCEPT): 4941 return None 4942 if self._match(TokenType.L_PAREN, advance=False): 4943 return self._parse_wrapped_csv(self._parse_column) 4944 4945 except_column = self._parse_column() 4946 return [except_column] if except_column else None 4947 4948 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4949 if not self._match(TokenType.REPLACE): 4950 return None 4951 if self._match(TokenType.L_PAREN, advance=False): 4952 return self._parse_wrapped_csv(self._parse_expression) 4953 4954 replace_expression = self._parse_expression() 4955 return [replace_expression] if replace_expression else None 4956 4957 def _parse_csv( 4958 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4959 ) -> t.List[exp.Expression]: 4960 parse_result = parse_method() 4961 items = [parse_result] if parse_result is not None else [] 4962 4963 while self._match(sep): 4964 self._add_comments(parse_result) 4965 parse_result = parse_method() 4966 if parse_result is not None: 4967 items.append(parse_result) 4968 4969 return items 4970 4971 def _parse_tokens( 4972 self, parse_method: t.Callable, expressions: t.Dict 4973 ) -> t.Optional[exp.Expression]: 4974 this = parse_method() 4975 4976 while self._match_set(expressions): 4977 this = self.expression( 4978 expressions[self._prev.token_type], 4979 this=this, 4980 comments=self._prev_comments, 4981 expression=parse_method(), 4982 ) 4983 4984 return this 4985 4986 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4987 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4988 4989 def _parse_wrapped_csv( 4990 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4991 ) -> t.List[exp.Expression]: 4992 return self._parse_wrapped( 4993 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4994 ) 4995 4996 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4997 wrapped = self._match(TokenType.L_PAREN) 4998 if not wrapped and not optional: 4999 self.raise_error("Expecting (") 5000 parse_result = parse_method() 5001 if wrapped: 5002 self._match_r_paren() 5003 return parse_result 5004 5005 def _parse_expressions(self) -> t.List[exp.Expression]: 5006 return self._parse_csv(self._parse_expression) 5007 5008 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5009 return self._parse_select() or self._parse_set_operations( 5010 self._parse_expression() if alias else self._parse_conjunction() 5011 ) 5012 5013 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5014 return self._parse_query_modifiers( 5015 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5016 ) 5017 5018 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5019 this = None 5020 if self._match_texts(self.TRANSACTION_KIND): 5021 this = self._prev.text 5022 5023 self._match_texts(("TRANSACTION", "WORK")) 5024 5025 modes = [] 5026 while True: 5027 mode = [] 5028 while self._match(TokenType.VAR): 5029 mode.append(self._prev.text) 5030 5031 if mode: 5032 modes.append(" ".join(mode)) 5033 if not self._match(TokenType.COMMA): 5034 break 5035 5036 return self.expression(exp.Transaction, this=this, modes=modes) 5037 5038 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5039 chain = None 5040 savepoint = None 5041 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5042 5043 self._match_texts(("TRANSACTION", "WORK")) 5044 5045 if self._match_text_seq("TO"): 5046 self._match_text_seq("SAVEPOINT") 5047 savepoint = self._parse_id_var() 5048 5049 if self._match(TokenType.AND): 5050 chain = not self._match_text_seq("NO") 5051 self._match_text_seq("CHAIN") 5052 5053 if is_rollback: 5054 return self.expression(exp.Rollback, savepoint=savepoint) 5055 5056 return self.expression(exp.Commit, chain=chain) 5057 5058 def _parse_refresh(self) -> exp.Refresh: 5059 self._match(TokenType.TABLE) 5060 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5061 5062 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5063 if not self._match_text_seq("ADD"): 5064 return None 5065 5066 self._match(TokenType.COLUMN) 5067 exists_column = self._parse_exists(not_=True) 5068 expression = self._parse_field_def() 5069 5070 if expression: 5071 expression.set("exists", exists_column) 5072 5073 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5074 if self._match_texts(("FIRST", "AFTER")): 5075 position = self._prev.text 5076 column_position = self.expression( 5077 exp.ColumnPosition, this=self._parse_column(), position=position 5078 ) 5079 expression.set("position", column_position) 5080 5081 return expression 5082 5083 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5084 drop = self._match(TokenType.DROP) and self._parse_drop() 5085 if drop and not isinstance(drop, exp.Command): 5086 drop.set("kind", drop.args.get("kind", "COLUMN")) 5087 return drop 5088 5089 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5090 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5091 return self.expression( 5092 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5093 ) 5094 5095 def _parse_add_constraint(self) -> exp.AddConstraint: 5096 this = None 5097 kind = self._prev.token_type 5098 5099 if kind == TokenType.CONSTRAINT: 5100 this = self._parse_id_var() 5101 5102 if self._match_text_seq("CHECK"): 5103 expression = self._parse_wrapped(self._parse_conjunction) 5104 enforced = self._match_text_seq("ENFORCED") 5105 5106 return self.expression( 5107 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5108 ) 5109 5110 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5111 expression = self._parse_foreign_key() 5112 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5113 expression = self._parse_primary_key() 5114 else: 5115 expression = None 5116 5117 return self.expression(exp.AddConstraint, this=this, expression=expression) 5118 5119 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5120 index = self._index - 1 5121 5122 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5123 return self._parse_csv(self._parse_add_constraint) 5124 5125 self._retreat(index) 5126 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5127 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5128 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5129 5130 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5131 self._match(TokenType.COLUMN) 5132 column = self._parse_field(any_token=True) 5133 5134 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5135 return self.expression(exp.AlterColumn, this=column, drop=True) 5136 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5137 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5138 5139 self._match_text_seq("SET", "DATA") 5140 return self.expression( 5141 exp.AlterColumn, 5142 this=column, 5143 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5144 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5145 using=self._match(TokenType.USING) and self._parse_conjunction(), 5146 ) 5147 5148 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5149 index = self._index - 1 5150 5151 partition_exists = self._parse_exists() 5152 if self._match(TokenType.PARTITION, advance=False): 5153 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5154 5155 self._retreat(index) 5156 return self._parse_csv(self._parse_drop_column) 5157 5158 def _parse_alter_table_rename(self) -> exp.RenameTable: 5159 self._match_text_seq("TO") 5160 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5161 5162 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5163 start = self._prev 5164 5165 if not self._match(TokenType.TABLE): 5166 return self._parse_as_command(start) 5167 5168 exists = self._parse_exists() 5169 only = self._match_text_seq("ONLY") 5170 this = self._parse_table(schema=True) 5171 5172 if self._next: 5173 self._advance() 5174 5175 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5176 if parser: 5177 actions = ensure_list(parser(self)) 5178 5179 if not self._curr: 5180 return self.expression( 5181 exp.AlterTable, 5182 this=this, 5183 exists=exists, 5184 actions=actions, 5185 only=only, 5186 ) 5187 5188 return self._parse_as_command(start) 5189 5190 def _parse_merge(self) -> exp.Merge: 5191 self._match(TokenType.INTO) 5192 target = self._parse_table() 5193 5194 if target and self._match(TokenType.ALIAS, advance=False): 5195 target.set("alias", self._parse_table_alias()) 5196 5197 self._match(TokenType.USING) 5198 using = self._parse_table() 5199 5200 self._match(TokenType.ON) 5201 on = self._parse_conjunction() 5202 5203 return self.expression( 5204 exp.Merge, 5205 this=target, 5206 using=using, 5207 on=on, 5208 expressions=self._parse_when_matched(), 5209 ) 5210 5211 def _parse_when_matched(self) -> t.List[exp.When]: 5212 whens = [] 5213 5214 while self._match(TokenType.WHEN): 5215 matched = not self._match(TokenType.NOT) 5216 self._match_text_seq("MATCHED") 5217 source = ( 5218 False 5219 if self._match_text_seq("BY", "TARGET") 5220 else self._match_text_seq("BY", "SOURCE") 5221 ) 5222 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5223 5224 self._match(TokenType.THEN) 5225 5226 if self._match(TokenType.INSERT): 5227 _this = self._parse_star() 5228 if _this: 5229 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5230 else: 5231 then = self.expression( 5232 exp.Insert, 5233 this=self._parse_value(), 5234 expression=self._match(TokenType.VALUES) and self._parse_value(), 5235 ) 5236 elif self._match(TokenType.UPDATE): 5237 expressions = self._parse_star() 5238 if expressions: 5239 then = self.expression(exp.Update, expressions=expressions) 5240 else: 5241 then = self.expression( 5242 exp.Update, 5243 expressions=self._match(TokenType.SET) 5244 and self._parse_csv(self._parse_equality), 5245 ) 5246 elif self._match(TokenType.DELETE): 5247 then = self.expression(exp.Var, this=self._prev.text) 5248 else: 5249 then = None 5250 5251 whens.append( 5252 self.expression( 5253 exp.When, 5254 matched=matched, 5255 source=source, 5256 condition=condition, 5257 then=then, 5258 ) 5259 ) 5260 return whens 5261 5262 def _parse_show(self) -> t.Optional[exp.Expression]: 5263 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5264 if parser: 5265 return parser(self) 5266 return self._parse_as_command(self._prev) 5267 5268 def _parse_set_item_assignment( 5269 self, kind: t.Optional[str] = None 5270 ) -> t.Optional[exp.Expression]: 5271 index = self._index 5272 5273 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5274 return self._parse_set_transaction(global_=kind == "GLOBAL") 5275 5276 left = self._parse_primary() or self._parse_id_var() 5277 assignment_delimiter = self._match_texts(("=", "TO")) 5278 5279 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5280 self._retreat(index) 5281 return None 5282 5283 right = self._parse_statement() or self._parse_id_var() 5284 this = self.expression(exp.EQ, this=left, expression=right) 5285 5286 return self.expression(exp.SetItem, this=this, kind=kind) 5287 5288 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5289 self._match_text_seq("TRANSACTION") 5290 characteristics = self._parse_csv( 5291 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5292 ) 5293 return self.expression( 5294 exp.SetItem, 5295 expressions=characteristics, 5296 kind="TRANSACTION", 5297 **{"global": global_}, # type: ignore 5298 ) 5299 5300 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5301 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5302 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5303 5304 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5305 index = self._index 5306 set_ = self.expression( 5307 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5308 ) 5309 5310 if self._curr: 5311 self._retreat(index) 5312 return self._parse_as_command(self._prev) 5313 5314 return set_ 5315 5316 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5317 for option in options: 5318 if self._match_text_seq(*option.split(" ")): 5319 return exp.var(option) 5320 return None 5321 5322 def _parse_as_command(self, start: Token) -> exp.Command: 5323 while self._curr: 5324 self._advance() 5325 text = self._find_sql(start, self._prev) 5326 size = len(start.text) 5327 return exp.Command(this=text[:size], expression=text[size:]) 5328 5329 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5330 settings = [] 5331 5332 self._match_l_paren() 5333 kind = self._parse_id_var() 5334 5335 if self._match(TokenType.L_PAREN): 5336 while True: 5337 key = self._parse_id_var() 5338 value = self._parse_primary() 5339 5340 if not key and value is None: 5341 break 5342 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5343 self._match(TokenType.R_PAREN) 5344 5345 self._match_r_paren() 5346 5347 return self.expression( 5348 exp.DictProperty, 5349 this=this, 5350 kind=kind.this if kind else None, 5351 settings=settings, 5352 ) 5353 5354 def _parse_dict_range(self, this: str) -> exp.DictRange: 5355 self._match_l_paren() 5356 has_min = self._match_text_seq("MIN") 5357 if has_min: 5358 min = self._parse_var() or self._parse_primary() 5359 self._match_text_seq("MAX") 5360 max = self._parse_var() or self._parse_primary() 5361 else: 5362 max = self._parse_var() or self._parse_primary() 5363 min = exp.Literal.number(0) 5364 self._match_r_paren() 5365 return self.expression(exp.DictRange, this=this, min=min, max=max) 5366 5367 def _parse_comprehension( 5368 self, this: t.Optional[exp.Expression] 5369 ) -> t.Optional[exp.Comprehension]: 5370 index = self._index 5371 expression = self._parse_column() 5372 if not self._match(TokenType.IN): 5373 self._retreat(index - 1) 5374 return None 5375 iterator = self._parse_column() 5376 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5377 return self.expression( 5378 exp.Comprehension, 5379 this=this, 5380 expression=expression, 5381 iterator=iterator, 5382 condition=condition, 5383 ) 5384 5385 def _find_parser( 5386 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5387 ) -> t.Optional[t.Callable]: 5388 if not self._curr: 5389 return None 5390 5391 index = self._index 5392 this = [] 5393 while True: 5394 # The current token might be multiple words 5395 curr = self._curr.text.upper() 5396 key = curr.split(" ") 5397 this.append(curr) 5398 5399 self._advance() 5400 result, trie = in_trie(trie, key) 5401 if result == TrieResult.FAILED: 5402 break 5403 5404 if result == TrieResult.EXISTS: 5405 subparser = parsers[" ".join(this)] 5406 return subparser 5407 5408 self._retreat(index) 5409 return None 5410 5411 def _match(self, token_type, advance=True, expression=None): 5412 if not self._curr: 5413 return None 5414 5415 if self._curr.token_type == token_type: 5416 if advance: 5417 self._advance() 5418 self._add_comments(expression) 5419 return True 5420 5421 return None 5422 5423 def _match_set(self, types, advance=True): 5424 if not self._curr: 5425 return None 5426 5427 if self._curr.token_type in types: 5428 if advance: 5429 self._advance() 5430 return True 5431 5432 return None 5433 5434 def _match_pair(self, token_type_a, token_type_b, advance=True): 5435 if not self._curr or not self._next: 5436 return None 5437 5438 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5439 if advance: 5440 self._advance(2) 5441 return True 5442 5443 return None 5444 5445 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5446 if not self._match(TokenType.L_PAREN, expression=expression): 5447 self.raise_error("Expecting (") 5448 5449 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5450 if not self._match(TokenType.R_PAREN, expression=expression): 5451 self.raise_error("Expecting )") 5452 5453 def _match_texts(self, texts, advance=True): 5454 if self._curr and self._curr.text.upper() in texts: 5455 if advance: 5456 self._advance() 5457 return True 5458 return False 5459 5460 def _match_text_seq(self, *texts, advance=True): 5461 index = self._index 5462 for text in texts: 5463 if self._curr and self._curr.text.upper() == text: 5464 self._advance() 5465 else: 5466 self._retreat(index) 5467 return False 5468 5469 if not advance: 5470 self._retreat(index) 5471 5472 return True 5473 5474 @t.overload 5475 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5476 ... 5477 5478 @t.overload 5479 def _replace_columns_with_dots( 5480 self, this: t.Optional[exp.Expression] 5481 ) -> t.Optional[exp.Expression]: 5482 ... 5483 5484 def _replace_columns_with_dots(self, this): 5485 if isinstance(this, exp.Dot): 5486 exp.replace_children(this, self._replace_columns_with_dots) 5487 elif isinstance(this, exp.Column): 5488 exp.replace_children(this, self._replace_columns_with_dots) 5489 table = this.args.get("table") 5490 this = ( 5491 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5492 ) 5493 5494 return this 5495 5496 def _replace_lambda( 5497 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5498 ) -> t.Optional[exp.Expression]: 5499 if not node: 5500 return node 5501 5502 for column in node.find_all(exp.Column): 5503 if column.parts[0].name in lambda_variables: 5504 dot_or_id = column.to_dot() if column.table else column.this 5505 parent = column.parent 5506 5507 while isinstance(parent, exp.Dot): 5508 if not isinstance(parent.parent, exp.Dot): 5509 parent.replace(dot_or_id) 5510 break 5511 parent = parent.parent 5512 else: 5513 if column is node: 5514 node = dot_or_id 5515 else: 5516 column.replace(dot_or_id) 5517 return node 5518 5519 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5520 return [ 5521 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5522 for value in values 5523 if value 5524 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
973 def __init__( 974 self, 975 error_level: t.Optional[ErrorLevel] = None, 976 error_message_context: int = 100, 977 max_errors: int = 3, 978 ): 979 self.error_level = error_level or ErrorLevel.IMMEDIATE 980 self.error_message_context = error_message_context 981 self.max_errors = max_errors 982 self._tokenizer = self.TOKENIZER_CLASS() 983 self.reset()
995 def parse( 996 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 997 ) -> t.List[t.Optional[exp.Expression]]: 998 """ 999 Parses a list of tokens and returns a list of syntax trees, one tree 1000 per parsed SQL statement. 1001 1002 Args: 1003 raw_tokens: The list of tokens. 1004 sql: The original SQL string, used to produce helpful debug messages. 1005 1006 Returns: 1007 The list of the produced syntax trees. 1008 """ 1009 return self._parse( 1010 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1011 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1013 def parse_into( 1014 self, 1015 expression_types: exp.IntoType, 1016 raw_tokens: t.List[Token], 1017 sql: t.Optional[str] = None, 1018 ) -> t.List[t.Optional[exp.Expression]]: 1019 """ 1020 Parses a list of tokens into a given Expression type. If a collection of Expression 1021 types is given instead, this method will try to parse the token list into each one 1022 of them, stopping at the first for which the parsing succeeds. 1023 1024 Args: 1025 expression_types: The expression type(s) to try and parse the token list into. 1026 raw_tokens: The list of tokens. 1027 sql: The original SQL string, used to produce helpful debug messages. 1028 1029 Returns: 1030 The target Expression. 1031 """ 1032 errors = [] 1033 for expression_type in ensure_list(expression_types): 1034 parser = self.EXPRESSION_PARSERS.get(expression_type) 1035 if not parser: 1036 raise TypeError(f"No parser registered for {expression_type}") 1037 1038 try: 1039 return self._parse(parser, raw_tokens, sql) 1040 except ParseError as e: 1041 e.errors[0]["into_expression"] = expression_type 1042 errors.append(e) 1043 1044 raise ParseError( 1045 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1046 errors=merge_errors(errors), 1047 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1084 def check_errors(self) -> None: 1085 """Logs or raises any found errors, depending on the chosen error level setting.""" 1086 if self.error_level == ErrorLevel.WARN: 1087 for error in self.errors: 1088 logger.error(str(error)) 1089 elif self.error_level == ErrorLevel.RAISE and self.errors: 1090 raise ParseError( 1091 concat_messages(self.errors, self.max_errors), 1092 errors=merge_errors(self.errors), 1093 )
Logs or raises any found errors, depending on the chosen error level setting.
1095 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1096 """ 1097 Appends an error in the list of recorded errors or raises it, depending on the chosen 1098 error level setting. 1099 """ 1100 token = token or self._curr or self._prev or Token.string("") 1101 start = token.start 1102 end = token.end + 1 1103 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1104 highlight = self.sql[start:end] 1105 end_context = self.sql[end : end + self.error_message_context] 1106 1107 error = ParseError.new( 1108 f"{message}. Line {token.line}, Col: {token.col}.\n" 1109 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1110 description=message, 1111 line=token.line, 1112 col=token.col, 1113 start_context=start_context, 1114 highlight=highlight, 1115 end_context=end_context, 1116 ) 1117 1118 if self.error_level == ErrorLevel.IMMEDIATE: 1119 raise error 1120 1121 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1123 def expression( 1124 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1125 ) -> E: 1126 """ 1127 Creates a new, validated Expression. 1128 1129 Args: 1130 exp_class: The expression class to instantiate. 1131 comments: An optional list of comments to attach to the expression. 1132 kwargs: The arguments to set for the expression along with their respective values. 1133 1134 Returns: 1135 The target expression. 1136 """ 1137 instance = exp_class(**kwargs) 1138 instance.add_comments(comments) if comments else self._add_comments(instance) 1139 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1146 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1147 """ 1148 Validates an Expression, making sure that all its mandatory arguments are set. 1149 1150 Args: 1151 expression: The expression to validate. 1152 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1153 1154 Returns: 1155 The validated expression. 1156 """ 1157 if self.error_level != ErrorLevel.IGNORE: 1158 for error_message in expression.error_messages(args): 1159 self.raise_error(error_message) 1160 1161 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.