sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.CONSTRAINT, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 *DB_CREATABLES, 253 } 254 255 # Tokens that can represent identifiers 256 ID_VAR_TOKENS = { 257 TokenType.VAR, 258 TokenType.ANTI, 259 TokenType.APPLY, 260 TokenType.ASC, 261 TokenType.AUTO_INCREMENT, 262 TokenType.BEGIN, 263 TokenType.CACHE, 264 TokenType.CASE, 265 TokenType.COLLATE, 266 TokenType.COMMAND, 267 TokenType.COMMENT, 268 TokenType.COMMIT, 269 TokenType.CONSTRAINT, 270 TokenType.DEFAULT, 271 TokenType.DELETE, 272 TokenType.DESC, 273 TokenType.DESCRIBE, 274 TokenType.DICTIONARY, 275 TokenType.DIV, 276 TokenType.END, 277 TokenType.EXECUTE, 278 TokenType.ESCAPE, 279 TokenType.FALSE, 280 TokenType.FIRST, 281 TokenType.FILTER, 282 TokenType.FORMAT, 283 TokenType.FULL, 284 TokenType.IS, 285 TokenType.ISNULL, 286 TokenType.INTERVAL, 287 TokenType.KEEP, 288 TokenType.KILL, 289 TokenType.LEFT, 290 TokenType.LOAD, 291 TokenType.MERGE, 292 TokenType.NATURAL, 293 TokenType.NEXT, 294 TokenType.OFFSET, 295 TokenType.ORDINALITY, 296 TokenType.OVERLAPS, 297 TokenType.OVERWRITE, 298 TokenType.PARTITION, 299 TokenType.PERCENT, 300 TokenType.PIVOT, 301 TokenType.PRAGMA, 302 TokenType.RANGE, 303 TokenType.RECURSIVE, 304 TokenType.REFERENCES, 305 TokenType.RIGHT, 306 TokenType.ROW, 307 TokenType.ROWS, 308 TokenType.SEMI, 309 TokenType.SET, 310 TokenType.SETTINGS, 311 TokenType.SHOW, 312 TokenType.TEMPORARY, 313 TokenType.TOP, 314 TokenType.TRUE, 315 TokenType.UNIQUE, 316 TokenType.UNPIVOT, 317 TokenType.UPDATE, 318 TokenType.USE, 319 TokenType.VOLATILE, 320 TokenType.WINDOW, 321 *CREATABLES, 322 *SUBQUERY_PREDICATES, 323 *TYPE_TOKENS, 324 *NO_PAREN_FUNCTIONS, 325 } 326 327 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 328 329 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 330 TokenType.ANTI, 331 TokenType.APPLY, 332 TokenType.ASOF, 333 TokenType.FULL, 334 TokenType.LEFT, 335 TokenType.LOCK, 336 TokenType.NATURAL, 337 TokenType.OFFSET, 338 TokenType.RIGHT, 339 TokenType.SEMI, 340 TokenType.WINDOW, 341 } 342 343 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 344 345 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 346 347 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 348 349 FUNC_TOKENS = { 350 TokenType.COLLATE, 351 TokenType.COMMAND, 352 TokenType.CURRENT_DATE, 353 TokenType.CURRENT_DATETIME, 354 TokenType.CURRENT_TIMESTAMP, 355 TokenType.CURRENT_TIME, 356 TokenType.CURRENT_USER, 357 TokenType.FILTER, 358 TokenType.FIRST, 359 TokenType.FORMAT, 360 TokenType.GLOB, 361 TokenType.IDENTIFIER, 362 TokenType.INDEX, 363 TokenType.ISNULL, 364 TokenType.ILIKE, 365 TokenType.INSERT, 366 TokenType.LIKE, 367 TokenType.MERGE, 368 TokenType.OFFSET, 369 TokenType.PRIMARY_KEY, 370 TokenType.RANGE, 371 TokenType.REPLACE, 372 TokenType.RLIKE, 373 TokenType.ROW, 374 TokenType.UNNEST, 375 TokenType.VAR, 376 TokenType.LEFT, 377 TokenType.RIGHT, 378 TokenType.DATE, 379 TokenType.DATETIME, 380 TokenType.TABLE, 381 TokenType.TIMESTAMP, 382 TokenType.TIMESTAMPTZ, 383 TokenType.WINDOW, 384 TokenType.XOR, 385 *TYPE_TOKENS, 386 *SUBQUERY_PREDICATES, 387 } 388 389 CONJUNCTION = { 390 TokenType.AND: exp.And, 391 TokenType.OR: exp.Or, 392 } 393 394 EQUALITY = { 395 TokenType.COLON_EQ: exp.PropertyEQ, 396 TokenType.EQ: exp.EQ, 397 TokenType.NEQ: exp.NEQ, 398 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 399 } 400 401 COMPARISON = { 402 TokenType.GT: exp.GT, 403 TokenType.GTE: exp.GTE, 404 TokenType.LT: exp.LT, 405 TokenType.LTE: exp.LTE, 406 } 407 408 BITWISE = { 409 TokenType.AMP: exp.BitwiseAnd, 410 TokenType.CARET: exp.BitwiseXor, 411 TokenType.PIPE: exp.BitwiseOr, 412 TokenType.DPIPE: exp.DPipe, 413 } 414 415 TERM = { 416 TokenType.DASH: exp.Sub, 417 TokenType.PLUS: exp.Add, 418 TokenType.MOD: exp.Mod, 419 TokenType.COLLATE: exp.Collate, 420 } 421 422 FACTOR = { 423 TokenType.DIV: exp.IntDiv, 424 TokenType.LR_ARROW: exp.Distance, 425 TokenType.SLASH: exp.Div, 426 TokenType.STAR: exp.Mul, 427 } 428 429 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 430 431 TIMES = { 432 TokenType.TIME, 433 TokenType.TIMETZ, 434 } 435 436 TIMESTAMPS = { 437 TokenType.TIMESTAMP, 438 TokenType.TIMESTAMPTZ, 439 TokenType.TIMESTAMPLTZ, 440 *TIMES, 441 } 442 443 SET_OPERATIONS = { 444 TokenType.UNION, 445 TokenType.INTERSECT, 446 TokenType.EXCEPT, 447 } 448 449 JOIN_METHODS = { 450 TokenType.NATURAL, 451 TokenType.ASOF, 452 } 453 454 JOIN_SIDES = { 455 TokenType.LEFT, 456 TokenType.RIGHT, 457 TokenType.FULL, 458 } 459 460 JOIN_KINDS = { 461 TokenType.INNER, 462 TokenType.OUTER, 463 TokenType.CROSS, 464 TokenType.SEMI, 465 TokenType.ANTI, 466 } 467 468 JOIN_HINTS: t.Set[str] = set() 469 470 LAMBDAS = { 471 TokenType.ARROW: lambda self, expressions: self.expression( 472 exp.Lambda, 473 this=self._replace_lambda( 474 self._parse_conjunction(), 475 {node.name for node in expressions}, 476 ), 477 expressions=expressions, 478 ), 479 TokenType.FARROW: lambda self, expressions: self.expression( 480 exp.Kwarg, 481 this=exp.var(expressions[0].name), 482 expression=self._parse_conjunction(), 483 ), 484 } 485 486 COLUMN_OPERATORS = { 487 TokenType.DOT: None, 488 TokenType.DCOLON: lambda self, this, to: self.expression( 489 exp.Cast if self.STRICT_CAST else exp.TryCast, 490 this=this, 491 to=to, 492 ), 493 TokenType.ARROW: lambda self, this, path: self.expression( 494 exp.JSONExtract, 495 this=this, 496 expression=path, 497 ), 498 TokenType.DARROW: lambda self, this, path: self.expression( 499 exp.JSONExtractScalar, 500 this=this, 501 expression=path, 502 ), 503 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 504 exp.JSONBExtract, 505 this=this, 506 expression=path, 507 ), 508 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 509 exp.JSONBExtractScalar, 510 this=this, 511 expression=path, 512 ), 513 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 514 exp.JSONBContains, 515 this=this, 516 expression=key, 517 ), 518 } 519 520 EXPRESSION_PARSERS = { 521 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 522 exp.Column: lambda self: self._parse_column(), 523 exp.Condition: lambda self: self._parse_conjunction(), 524 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 525 exp.Expression: lambda self: self._parse_statement(), 526 exp.From: lambda self: self._parse_from(), 527 exp.Group: lambda self: self._parse_group(), 528 exp.Having: lambda self: self._parse_having(), 529 exp.Identifier: lambda self: self._parse_id_var(), 530 exp.Join: lambda self: self._parse_join(), 531 exp.Lambda: lambda self: self._parse_lambda(), 532 exp.Lateral: lambda self: self._parse_lateral(), 533 exp.Limit: lambda self: self._parse_limit(), 534 exp.Offset: lambda self: self._parse_offset(), 535 exp.Order: lambda self: self._parse_order(), 536 exp.Ordered: lambda self: self._parse_ordered(), 537 exp.Properties: lambda self: self._parse_properties(), 538 exp.Qualify: lambda self: self._parse_qualify(), 539 exp.Returning: lambda self: self._parse_returning(), 540 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 541 exp.Table: lambda self: self._parse_table_parts(), 542 exp.TableAlias: lambda self: self._parse_table_alias(), 543 exp.Where: lambda self: self._parse_where(), 544 exp.Window: lambda self: self._parse_named_window(), 545 exp.With: lambda self: self._parse_with(), 546 "JOIN_TYPE": lambda self: self._parse_join_parts(), 547 } 548 549 STATEMENT_PARSERS = { 550 TokenType.ALTER: lambda self: self._parse_alter(), 551 TokenType.BEGIN: lambda self: self._parse_transaction(), 552 TokenType.CACHE: lambda self: self._parse_cache(), 553 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 554 TokenType.COMMENT: lambda self: self._parse_comment(), 555 TokenType.CREATE: lambda self: self._parse_create(), 556 TokenType.DELETE: lambda self: self._parse_delete(), 557 TokenType.DESC: lambda self: self._parse_describe(), 558 TokenType.DESCRIBE: lambda self: self._parse_describe(), 559 TokenType.DROP: lambda self: self._parse_drop(), 560 TokenType.INSERT: lambda self: self._parse_insert(), 561 TokenType.KILL: lambda self: self._parse_kill(), 562 TokenType.LOAD: lambda self: self._parse_load(), 563 TokenType.MERGE: lambda self: self._parse_merge(), 564 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 565 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 566 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 567 TokenType.SET: lambda self: self._parse_set(), 568 TokenType.UNCACHE: lambda self: self._parse_uncache(), 569 TokenType.UPDATE: lambda self: self._parse_update(), 570 TokenType.USE: lambda self: self.expression( 571 exp.Use, 572 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 573 and exp.var(self._prev.text), 574 this=self._parse_table(schema=False), 575 ), 576 } 577 578 UNARY_PARSERS = { 579 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 580 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 581 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 582 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 583 } 584 585 PRIMARY_PARSERS = { 586 TokenType.STRING: lambda self, token: self.expression( 587 exp.Literal, this=token.text, is_string=True 588 ), 589 TokenType.NUMBER: lambda self, token: self.expression( 590 exp.Literal, this=token.text, is_string=False 591 ), 592 TokenType.STAR: lambda self, _: self.expression( 593 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 594 ), 595 TokenType.NULL: lambda self, _: self.expression(exp.Null), 596 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 597 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 598 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 599 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 600 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 601 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 602 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 603 exp.National, this=token.text 604 ), 605 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 606 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 607 exp.RawString, this=token.text 608 ), 609 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 610 } 611 612 PLACEHOLDER_PARSERS = { 613 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 614 TokenType.PARAMETER: lambda self: self._parse_parameter(), 615 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 616 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 617 else None, 618 } 619 620 RANGE_PARSERS = { 621 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 622 TokenType.GLOB: binary_range_parser(exp.Glob), 623 TokenType.ILIKE: binary_range_parser(exp.ILike), 624 TokenType.IN: lambda self, this: self._parse_in(this), 625 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 626 TokenType.IS: lambda self, this: self._parse_is(this), 627 TokenType.LIKE: binary_range_parser(exp.Like), 628 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 629 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 630 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 631 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 632 } 633 634 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 635 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 636 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 637 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 638 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 639 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 640 "CHECKSUM": lambda self: self._parse_checksum(), 641 "CLUSTER BY": lambda self: self._parse_cluster(), 642 "CLUSTERED": lambda self: self._parse_clustered_by(), 643 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 644 exp.CollateProperty, **kwargs 645 ), 646 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 647 "COPY": lambda self: self._parse_copy_property(), 648 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 649 "DEFINER": lambda self: self._parse_definer(), 650 "DETERMINISTIC": lambda self: self.expression( 651 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 652 ), 653 "DISTKEY": lambda self: self._parse_distkey(), 654 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 655 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 656 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 657 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 658 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 659 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 660 "FREESPACE": lambda self: self._parse_freespace(), 661 "HEAP": lambda self: self.expression(exp.HeapProperty), 662 "IMMUTABLE": lambda self: self.expression( 663 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 664 ), 665 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 666 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 667 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 668 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 669 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 670 "LIKE": lambda self: self._parse_create_like(), 671 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 672 "LOCK": lambda self: self._parse_locking(), 673 "LOCKING": lambda self: self._parse_locking(), 674 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 675 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 676 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 677 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 678 "NO": lambda self: self._parse_no_property(), 679 "ON": lambda self: self._parse_on_property(), 680 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 681 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 682 "PARTITION": lambda self: self._parse_partitioned_of(), 683 "PARTITION BY": lambda self: self._parse_partitioned_by(), 684 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 685 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 687 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 688 "REMOTE": lambda self: self._parse_remote_with_connection(), 689 "RETURNS": lambda self: self._parse_returns(), 690 "ROW": lambda self: self._parse_row(), 691 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 692 "SAMPLE": lambda self: self.expression( 693 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 694 ), 695 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 696 "SETTINGS": lambda self: self.expression( 697 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 698 ), 699 "SORTKEY": lambda self: self._parse_sortkey(), 700 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 701 "STABLE": lambda self: self.expression( 702 exp.StabilityProperty, this=exp.Literal.string("STABLE") 703 ), 704 "STORED": lambda self: self._parse_stored(), 705 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 706 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 707 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 708 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 709 "TO": lambda self: self._parse_to_table(), 710 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 711 "TRANSFORM": lambda self: self.expression( 712 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 713 ), 714 "TTL": lambda self: self._parse_ttl(), 715 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 716 "VOLATILE": lambda self: self._parse_volatile_property(), 717 "WITH": lambda self: self._parse_with_property(), 718 } 719 720 CONSTRAINT_PARSERS = { 721 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 722 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 723 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 724 "CHARACTER SET": lambda self: self.expression( 725 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 726 ), 727 "CHECK": lambda self: self.expression( 728 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 729 ), 730 "COLLATE": lambda self: self.expression( 731 exp.CollateColumnConstraint, this=self._parse_var() 732 ), 733 "COMMENT": lambda self: self.expression( 734 exp.CommentColumnConstraint, this=self._parse_string() 735 ), 736 "COMPRESS": lambda self: self._parse_compress(), 737 "CLUSTERED": lambda self: self.expression( 738 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 739 ), 740 "NONCLUSTERED": lambda self: self.expression( 741 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 742 ), 743 "DEFAULT": lambda self: self.expression( 744 exp.DefaultColumnConstraint, this=self._parse_bitwise() 745 ), 746 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 747 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 748 "FORMAT": lambda self: self.expression( 749 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 750 ), 751 "GENERATED": lambda self: self._parse_generated_as_identity(), 752 "IDENTITY": lambda self: self._parse_auto_increment(), 753 "INLINE": lambda self: self._parse_inline(), 754 "LIKE": lambda self: self._parse_create_like(), 755 "NOT": lambda self: self._parse_not_constraint(), 756 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 757 "ON": lambda self: ( 758 self._match(TokenType.UPDATE) 759 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 760 ) 761 or self.expression(exp.OnProperty, this=self._parse_id_var()), 762 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 763 "PERIOD": lambda self: self._parse_period_for_system_time(), 764 "PRIMARY KEY": lambda self: self._parse_primary_key(), 765 "REFERENCES": lambda self: self._parse_references(match=False), 766 "TITLE": lambda self: self.expression( 767 exp.TitleColumnConstraint, this=self._parse_var_or_string() 768 ), 769 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 770 "UNIQUE": lambda self: self._parse_unique(), 771 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 772 "WITH": lambda self: self.expression( 773 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 774 ), 775 } 776 777 ALTER_PARSERS = { 778 "ADD": lambda self: self._parse_alter_table_add(), 779 "ALTER": lambda self: self._parse_alter_table_alter(), 780 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 781 "DROP": lambda self: self._parse_alter_table_drop(), 782 "RENAME": lambda self: self._parse_alter_table_rename(), 783 } 784 785 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 786 787 NO_PAREN_FUNCTION_PARSERS = { 788 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 789 "CASE": lambda self: self._parse_case(), 790 "IF": lambda self: self._parse_if(), 791 "NEXT": lambda self: self._parse_next_value_for(), 792 } 793 794 INVALID_FUNC_NAME_TOKENS = { 795 TokenType.IDENTIFIER, 796 TokenType.STRING, 797 } 798 799 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 800 801 FUNCTION_PARSERS = { 802 "ANY_VALUE": lambda self: self._parse_any_value(), 803 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 804 "CONCAT": lambda self: self._parse_concat(), 805 "CONCAT_WS": lambda self: self._parse_concat_ws(), 806 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 807 "DECODE": lambda self: self._parse_decode(), 808 "EXTRACT": lambda self: self._parse_extract(), 809 "JSON_OBJECT": lambda self: self._parse_json_object(), 810 "JSON_TABLE": lambda self: self._parse_json_table(), 811 "LOG": lambda self: self._parse_logarithm(), 812 "MATCH": lambda self: self._parse_match_against(), 813 "OPENJSON": lambda self: self._parse_open_json(), 814 "POSITION": lambda self: self._parse_position(), 815 "PREDICT": lambda self: self._parse_predict(), 816 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 817 "STRING_AGG": lambda self: self._parse_string_agg(), 818 "SUBSTRING": lambda self: self._parse_substring(), 819 "TRIM": lambda self: self._parse_trim(), 820 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 821 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 822 } 823 824 QUERY_MODIFIER_PARSERS = { 825 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 826 TokenType.WHERE: lambda self: ("where", self._parse_where()), 827 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 828 TokenType.HAVING: lambda self: ("having", self._parse_having()), 829 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 830 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 831 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 832 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 833 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 834 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 835 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 836 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 837 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 838 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 839 TokenType.CLUSTER_BY: lambda self: ( 840 "cluster", 841 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 842 ), 843 TokenType.DISTRIBUTE_BY: lambda self: ( 844 "distribute", 845 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 846 ), 847 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 848 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 849 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 850 } 851 852 SET_PARSERS = { 853 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 854 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 855 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 856 "TRANSACTION": lambda self: self._parse_set_transaction(), 857 } 858 859 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 860 861 TYPE_LITERAL_PARSERS = { 862 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 863 } 864 865 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 866 867 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 868 869 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 870 871 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 872 TRANSACTION_CHARACTERISTICS = { 873 "ISOLATION LEVEL REPEATABLE READ", 874 "ISOLATION LEVEL READ COMMITTED", 875 "ISOLATION LEVEL READ UNCOMMITTED", 876 "ISOLATION LEVEL SERIALIZABLE", 877 "READ WRITE", 878 "READ ONLY", 879 } 880 881 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 882 883 CLONE_KEYWORDS = {"CLONE", "COPY"} 884 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 885 886 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 887 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 888 889 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 890 891 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 892 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 893 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 894 895 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 896 897 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 898 899 DISTINCT_TOKENS = {TokenType.DISTINCT} 900 901 NULL_TOKENS = {TokenType.NULL} 902 903 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 904 905 STRICT_CAST = True 906 907 # A NULL arg in CONCAT yields NULL by default 908 CONCAT_NULL_OUTPUTS_STRING = False 909 910 PREFIXED_PIVOT_COLUMNS = False 911 IDENTIFY_PIVOT_STRINGS = False 912 913 LOG_BASE_FIRST = True 914 LOG_DEFAULTS_TO_LN = False 915 916 # Whether or not ADD is present for each column added by ALTER TABLE 917 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 918 919 # Whether or not the table sample clause expects CSV syntax 920 TABLESAMPLE_CSV = False 921 922 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 923 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 924 925 # Whether the TRIM function expects the characters to trim as its first argument 926 TRIM_PATTERN_FIRST = False 927 928 # Whether the behavior of a / b depends on the types of a and b. 929 # False means a / b is always float division. 930 # True means a / b is integer division if both a and b are integers. 931 TYPED_DIVISION = False 932 933 # False means 1 / 0 throws an error. 934 # True means 1 / 0 returns null. 935 SAFE_DIVISION = False 936 937 __slots__ = ( 938 "error_level", 939 "error_message_context", 940 "max_errors", 941 "sql", 942 "errors", 943 "_tokens", 944 "_index", 945 "_curr", 946 "_next", 947 "_prev", 948 "_prev_comments", 949 "_tokenizer", 950 ) 951 952 # Autofilled 953 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 954 INDEX_OFFSET: int = 0 955 UNNEST_COLUMN_ONLY: bool = False 956 ALIAS_POST_TABLESAMPLE: bool = False 957 STRICT_STRING_CONCAT = False 958 SUPPORTS_USER_DEFINED_TYPES = True 959 NORMALIZE_FUNCTIONS = "upper" 960 NULL_ORDERING: str = "nulls_are_small" 961 SHOW_TRIE: t.Dict = {} 962 SET_TRIE: t.Dict = {} 963 FORMAT_MAPPING: t.Dict[str, str] = {} 964 FORMAT_TRIE: t.Dict = {} 965 TIME_MAPPING: t.Dict[str, str] = {} 966 TIME_TRIE: t.Dict = {} 967 968 def __init__( 969 self, 970 error_level: t.Optional[ErrorLevel] = None, 971 error_message_context: int = 100, 972 max_errors: int = 3, 973 ): 974 self.error_level = error_level or ErrorLevel.IMMEDIATE 975 self.error_message_context = error_message_context 976 self.max_errors = max_errors 977 self._tokenizer = self.TOKENIZER_CLASS() 978 self.reset() 979 980 def reset(self): 981 self.sql = "" 982 self.errors = [] 983 self._tokens = [] 984 self._index = 0 985 self._curr = None 986 self._next = None 987 self._prev = None 988 self._prev_comments = None 989 990 def parse( 991 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 992 ) -> t.List[t.Optional[exp.Expression]]: 993 """ 994 Parses a list of tokens and returns a list of syntax trees, one tree 995 per parsed SQL statement. 996 997 Args: 998 raw_tokens: The list of tokens. 999 sql: The original SQL string, used to produce helpful debug messages. 1000 1001 Returns: 1002 The list of the produced syntax trees. 1003 """ 1004 return self._parse( 1005 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1006 ) 1007 1008 def parse_into( 1009 self, 1010 expression_types: exp.IntoType, 1011 raw_tokens: t.List[Token], 1012 sql: t.Optional[str] = None, 1013 ) -> t.List[t.Optional[exp.Expression]]: 1014 """ 1015 Parses a list of tokens into a given Expression type. If a collection of Expression 1016 types is given instead, this method will try to parse the token list into each one 1017 of them, stopping at the first for which the parsing succeeds. 1018 1019 Args: 1020 expression_types: The expression type(s) to try and parse the token list into. 1021 raw_tokens: The list of tokens. 1022 sql: The original SQL string, used to produce helpful debug messages. 1023 1024 Returns: 1025 The target Expression. 1026 """ 1027 errors = [] 1028 for expression_type in ensure_list(expression_types): 1029 parser = self.EXPRESSION_PARSERS.get(expression_type) 1030 if not parser: 1031 raise TypeError(f"No parser registered for {expression_type}") 1032 1033 try: 1034 return self._parse(parser, raw_tokens, sql) 1035 except ParseError as e: 1036 e.errors[0]["into_expression"] = expression_type 1037 errors.append(e) 1038 1039 raise ParseError( 1040 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1041 errors=merge_errors(errors), 1042 ) from errors[-1] 1043 1044 def _parse( 1045 self, 1046 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1047 raw_tokens: t.List[Token], 1048 sql: t.Optional[str] = None, 1049 ) -> t.List[t.Optional[exp.Expression]]: 1050 self.reset() 1051 self.sql = sql or "" 1052 1053 total = len(raw_tokens) 1054 chunks: t.List[t.List[Token]] = [[]] 1055 1056 for i, token in enumerate(raw_tokens): 1057 if token.token_type == TokenType.SEMICOLON: 1058 if i < total - 1: 1059 chunks.append([]) 1060 else: 1061 chunks[-1].append(token) 1062 1063 expressions = [] 1064 1065 for tokens in chunks: 1066 self._index = -1 1067 self._tokens = tokens 1068 self._advance() 1069 1070 expressions.append(parse_method(self)) 1071 1072 if self._index < len(self._tokens): 1073 self.raise_error("Invalid expression / Unexpected token") 1074 1075 self.check_errors() 1076 1077 return expressions 1078 1079 def check_errors(self) -> None: 1080 """Logs or raises any found errors, depending on the chosen error level setting.""" 1081 if self.error_level == ErrorLevel.WARN: 1082 for error in self.errors: 1083 logger.error(str(error)) 1084 elif self.error_level == ErrorLevel.RAISE and self.errors: 1085 raise ParseError( 1086 concat_messages(self.errors, self.max_errors), 1087 errors=merge_errors(self.errors), 1088 ) 1089 1090 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1091 """ 1092 Appends an error in the list of recorded errors or raises it, depending on the chosen 1093 error level setting. 1094 """ 1095 token = token or self._curr or self._prev or Token.string("") 1096 start = token.start 1097 end = token.end + 1 1098 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1099 highlight = self.sql[start:end] 1100 end_context = self.sql[end : end + self.error_message_context] 1101 1102 error = ParseError.new( 1103 f"{message}. Line {token.line}, Col: {token.col}.\n" 1104 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1105 description=message, 1106 line=token.line, 1107 col=token.col, 1108 start_context=start_context, 1109 highlight=highlight, 1110 end_context=end_context, 1111 ) 1112 1113 if self.error_level == ErrorLevel.IMMEDIATE: 1114 raise error 1115 1116 self.errors.append(error) 1117 1118 def expression( 1119 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1120 ) -> E: 1121 """ 1122 Creates a new, validated Expression. 1123 1124 Args: 1125 exp_class: The expression class to instantiate. 1126 comments: An optional list of comments to attach to the expression. 1127 kwargs: The arguments to set for the expression along with their respective values. 1128 1129 Returns: 1130 The target expression. 1131 """ 1132 instance = exp_class(**kwargs) 1133 instance.add_comments(comments) if comments else self._add_comments(instance) 1134 return self.validate_expression(instance) 1135 1136 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1137 if expression and self._prev_comments: 1138 expression.add_comments(self._prev_comments) 1139 self._prev_comments = None 1140 1141 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1142 """ 1143 Validates an Expression, making sure that all its mandatory arguments are set. 1144 1145 Args: 1146 expression: The expression to validate. 1147 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1148 1149 Returns: 1150 The validated expression. 1151 """ 1152 if self.error_level != ErrorLevel.IGNORE: 1153 for error_message in expression.error_messages(args): 1154 self.raise_error(error_message) 1155 1156 return expression 1157 1158 def _find_sql(self, start: Token, end: Token) -> str: 1159 return self.sql[start.start : end.end + 1] 1160 1161 def _advance(self, times: int = 1) -> None: 1162 self._index += times 1163 self._curr = seq_get(self._tokens, self._index) 1164 self._next = seq_get(self._tokens, self._index + 1) 1165 1166 if self._index > 0: 1167 self._prev = self._tokens[self._index - 1] 1168 self._prev_comments = self._prev.comments 1169 else: 1170 self._prev = None 1171 self._prev_comments = None 1172 1173 def _retreat(self, index: int) -> None: 1174 if index != self._index: 1175 self._advance(index - self._index) 1176 1177 def _parse_command(self) -> exp.Command: 1178 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1179 1180 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1181 start = self._prev 1182 exists = self._parse_exists() if allow_exists else None 1183 1184 self._match(TokenType.ON) 1185 1186 kind = self._match_set(self.CREATABLES) and self._prev 1187 if not kind: 1188 return self._parse_as_command(start) 1189 1190 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1191 this = self._parse_user_defined_function(kind=kind.token_type) 1192 elif kind.token_type == TokenType.TABLE: 1193 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1194 elif kind.token_type == TokenType.COLUMN: 1195 this = self._parse_column() 1196 else: 1197 this = self._parse_id_var() 1198 1199 self._match(TokenType.IS) 1200 1201 return self.expression( 1202 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1203 ) 1204 1205 def _parse_to_table( 1206 self, 1207 ) -> exp.ToTableProperty: 1208 table = self._parse_table_parts(schema=True) 1209 return self.expression(exp.ToTableProperty, this=table) 1210 1211 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1212 def _parse_ttl(self) -> exp.Expression: 1213 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1214 this = self._parse_bitwise() 1215 1216 if self._match_text_seq("DELETE"): 1217 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1218 if self._match_text_seq("RECOMPRESS"): 1219 return self.expression( 1220 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1221 ) 1222 if self._match_text_seq("TO", "DISK"): 1223 return self.expression( 1224 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1225 ) 1226 if self._match_text_seq("TO", "VOLUME"): 1227 return self.expression( 1228 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1229 ) 1230 1231 return this 1232 1233 expressions = self._parse_csv(_parse_ttl_action) 1234 where = self._parse_where() 1235 group = self._parse_group() 1236 1237 aggregates = None 1238 if group and self._match(TokenType.SET): 1239 aggregates = self._parse_csv(self._parse_set_item) 1240 1241 return self.expression( 1242 exp.MergeTreeTTL, 1243 expressions=expressions, 1244 where=where, 1245 group=group, 1246 aggregates=aggregates, 1247 ) 1248 1249 def _parse_statement(self) -> t.Optional[exp.Expression]: 1250 if self._curr is None: 1251 return None 1252 1253 if self._match_set(self.STATEMENT_PARSERS): 1254 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1255 1256 if self._match_set(Tokenizer.COMMANDS): 1257 return self._parse_command() 1258 1259 expression = self._parse_expression() 1260 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1261 return self._parse_query_modifiers(expression) 1262 1263 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1264 start = self._prev 1265 temporary = self._match(TokenType.TEMPORARY) 1266 materialized = self._match_text_seq("MATERIALIZED") 1267 1268 kind = self._match_set(self.CREATABLES) and self._prev.text 1269 if not kind: 1270 return self._parse_as_command(start) 1271 1272 return self.expression( 1273 exp.Drop, 1274 comments=start.comments, 1275 exists=exists or self._parse_exists(), 1276 this=self._parse_table(schema=True), 1277 kind=kind, 1278 temporary=temporary, 1279 materialized=materialized, 1280 cascade=self._match_text_seq("CASCADE"), 1281 constraints=self._match_text_seq("CONSTRAINTS"), 1282 purge=self._match_text_seq("PURGE"), 1283 ) 1284 1285 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1286 return ( 1287 self._match_text_seq("IF") 1288 and (not not_ or self._match(TokenType.NOT)) 1289 and self._match(TokenType.EXISTS) 1290 ) 1291 1292 def _parse_create(self) -> exp.Create | exp.Command: 1293 # Note: this can't be None because we've matched a statement parser 1294 start = self._prev 1295 comments = self._prev_comments 1296 1297 replace = start.text.upper() == "REPLACE" or self._match_pair( 1298 TokenType.OR, TokenType.REPLACE 1299 ) 1300 unique = self._match(TokenType.UNIQUE) 1301 1302 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1303 self._advance() 1304 1305 properties = None 1306 create_token = self._match_set(self.CREATABLES) and self._prev 1307 1308 if not create_token: 1309 # exp.Properties.Location.POST_CREATE 1310 properties = self._parse_properties() 1311 create_token = self._match_set(self.CREATABLES) and self._prev 1312 1313 if not properties or not create_token: 1314 return self._parse_as_command(start) 1315 1316 exists = self._parse_exists(not_=True) 1317 this = None 1318 expression: t.Optional[exp.Expression] = None 1319 indexes = None 1320 no_schema_binding = None 1321 begin = None 1322 end = None 1323 clone = None 1324 1325 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1326 nonlocal properties 1327 if properties and temp_props: 1328 properties.expressions.extend(temp_props.expressions) 1329 elif temp_props: 1330 properties = temp_props 1331 1332 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1333 this = self._parse_user_defined_function(kind=create_token.token_type) 1334 1335 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1336 extend_props(self._parse_properties()) 1337 1338 self._match(TokenType.ALIAS) 1339 1340 if self._match(TokenType.COMMAND): 1341 expression = self._parse_as_command(self._prev) 1342 else: 1343 begin = self._match(TokenType.BEGIN) 1344 return_ = self._match_text_seq("RETURN") 1345 1346 if self._match(TokenType.STRING, advance=False): 1347 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1348 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1349 expression = self._parse_string() 1350 extend_props(self._parse_properties()) 1351 else: 1352 expression = self._parse_statement() 1353 1354 end = self._match_text_seq("END") 1355 1356 if return_: 1357 expression = self.expression(exp.Return, this=expression) 1358 elif create_token.token_type == TokenType.INDEX: 1359 this = self._parse_index(index=self._parse_id_var()) 1360 elif create_token.token_type in self.DB_CREATABLES: 1361 table_parts = self._parse_table_parts(schema=True) 1362 1363 # exp.Properties.Location.POST_NAME 1364 self._match(TokenType.COMMA) 1365 extend_props(self._parse_properties(before=True)) 1366 1367 this = self._parse_schema(this=table_parts) 1368 1369 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1370 extend_props(self._parse_properties()) 1371 1372 self._match(TokenType.ALIAS) 1373 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1374 # exp.Properties.Location.POST_ALIAS 1375 extend_props(self._parse_properties()) 1376 1377 expression = self._parse_ddl_select() 1378 1379 if create_token.token_type == TokenType.TABLE: 1380 # exp.Properties.Location.POST_EXPRESSION 1381 extend_props(self._parse_properties()) 1382 1383 indexes = [] 1384 while True: 1385 index = self._parse_index() 1386 1387 # exp.Properties.Location.POST_INDEX 1388 extend_props(self._parse_properties()) 1389 1390 if not index: 1391 break 1392 else: 1393 self._match(TokenType.COMMA) 1394 indexes.append(index) 1395 elif create_token.token_type == TokenType.VIEW: 1396 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1397 no_schema_binding = True 1398 1399 shallow = self._match_text_seq("SHALLOW") 1400 1401 if self._match_texts(self.CLONE_KEYWORDS): 1402 copy = self._prev.text.lower() == "copy" 1403 clone = self._parse_table(schema=True) 1404 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1405 clone_kind = ( 1406 self._match(TokenType.L_PAREN) 1407 and self._match_texts(self.CLONE_KINDS) 1408 and self._prev.text.upper() 1409 ) 1410 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1411 self._match(TokenType.R_PAREN) 1412 clone = self.expression( 1413 exp.Clone, 1414 this=clone, 1415 when=when, 1416 kind=clone_kind, 1417 shallow=shallow, 1418 expression=clone_expression, 1419 copy=copy, 1420 ) 1421 1422 return self.expression( 1423 exp.Create, 1424 comments=comments, 1425 this=this, 1426 kind=create_token.text, 1427 replace=replace, 1428 unique=unique, 1429 expression=expression, 1430 exists=exists, 1431 properties=properties, 1432 indexes=indexes, 1433 no_schema_binding=no_schema_binding, 1434 begin=begin, 1435 end=end, 1436 clone=clone, 1437 ) 1438 1439 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1440 # only used for teradata currently 1441 self._match(TokenType.COMMA) 1442 1443 kwargs = { 1444 "no": self._match_text_seq("NO"), 1445 "dual": self._match_text_seq("DUAL"), 1446 "before": self._match_text_seq("BEFORE"), 1447 "default": self._match_text_seq("DEFAULT"), 1448 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1449 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1450 "after": self._match_text_seq("AFTER"), 1451 "minimum": self._match_texts(("MIN", "MINIMUM")), 1452 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1453 } 1454 1455 if self._match_texts(self.PROPERTY_PARSERS): 1456 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1457 try: 1458 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1459 except TypeError: 1460 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1461 1462 return None 1463 1464 def _parse_property(self) -> t.Optional[exp.Expression]: 1465 if self._match_texts(self.PROPERTY_PARSERS): 1466 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1467 1468 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1469 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1470 1471 if self._match_text_seq("COMPOUND", "SORTKEY"): 1472 return self._parse_sortkey(compound=True) 1473 1474 if self._match_text_seq("SQL", "SECURITY"): 1475 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1476 1477 index = self._index 1478 key = self._parse_column() 1479 1480 if not self._match(TokenType.EQ): 1481 self._retreat(index) 1482 return None 1483 1484 return self.expression( 1485 exp.Property, 1486 this=key.to_dot() if isinstance(key, exp.Column) else key, 1487 value=self._parse_column() or self._parse_var(any_token=True), 1488 ) 1489 1490 def _parse_stored(self) -> exp.FileFormatProperty: 1491 self._match(TokenType.ALIAS) 1492 1493 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1494 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1495 1496 return self.expression( 1497 exp.FileFormatProperty, 1498 this=self.expression( 1499 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1500 ) 1501 if input_format or output_format 1502 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1503 ) 1504 1505 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1506 self._match(TokenType.EQ) 1507 self._match(TokenType.ALIAS) 1508 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1509 1510 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1511 properties = [] 1512 while True: 1513 if before: 1514 prop = self._parse_property_before() 1515 else: 1516 prop = self._parse_property() 1517 1518 if not prop: 1519 break 1520 for p in ensure_list(prop): 1521 properties.append(p) 1522 1523 if properties: 1524 return self.expression(exp.Properties, expressions=properties) 1525 1526 return None 1527 1528 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1529 return self.expression( 1530 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1531 ) 1532 1533 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1534 if self._index >= 2: 1535 pre_volatile_token = self._tokens[self._index - 2] 1536 else: 1537 pre_volatile_token = None 1538 1539 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1540 return exp.VolatileProperty() 1541 1542 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1543 1544 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1545 self._match_pair(TokenType.EQ, TokenType.ON) 1546 1547 prop = self.expression(exp.WithSystemVersioningProperty) 1548 if self._match(TokenType.L_PAREN): 1549 self._match_text_seq("HISTORY_TABLE", "=") 1550 prop.set("this", self._parse_table_parts()) 1551 1552 if self._match(TokenType.COMMA): 1553 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1554 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1555 1556 self._match_r_paren() 1557 1558 return prop 1559 1560 def _parse_with_property( 1561 self, 1562 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1563 if self._match(TokenType.L_PAREN, advance=False): 1564 return self._parse_wrapped_csv(self._parse_property) 1565 1566 if self._match_text_seq("JOURNAL"): 1567 return self._parse_withjournaltable() 1568 1569 if self._match_text_seq("DATA"): 1570 return self._parse_withdata(no=False) 1571 elif self._match_text_seq("NO", "DATA"): 1572 return self._parse_withdata(no=True) 1573 1574 if not self._next: 1575 return None 1576 1577 return self._parse_withisolatedloading() 1578 1579 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1580 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1581 self._match(TokenType.EQ) 1582 1583 user = self._parse_id_var() 1584 self._match(TokenType.PARAMETER) 1585 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1586 1587 if not user or not host: 1588 return None 1589 1590 return exp.DefinerProperty(this=f"{user}@{host}") 1591 1592 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1593 self._match(TokenType.TABLE) 1594 self._match(TokenType.EQ) 1595 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1596 1597 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1598 return self.expression(exp.LogProperty, no=no) 1599 1600 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1601 return self.expression(exp.JournalProperty, **kwargs) 1602 1603 def _parse_checksum(self) -> exp.ChecksumProperty: 1604 self._match(TokenType.EQ) 1605 1606 on = None 1607 if self._match(TokenType.ON): 1608 on = True 1609 elif self._match_text_seq("OFF"): 1610 on = False 1611 1612 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1613 1614 def _parse_cluster(self) -> exp.Cluster: 1615 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1616 1617 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1618 self._match_text_seq("BY") 1619 1620 self._match_l_paren() 1621 expressions = self._parse_csv(self._parse_column) 1622 self._match_r_paren() 1623 1624 if self._match_text_seq("SORTED", "BY"): 1625 self._match_l_paren() 1626 sorted_by = self._parse_csv(self._parse_ordered) 1627 self._match_r_paren() 1628 else: 1629 sorted_by = None 1630 1631 self._match(TokenType.INTO) 1632 buckets = self._parse_number() 1633 self._match_text_seq("BUCKETS") 1634 1635 return self.expression( 1636 exp.ClusteredByProperty, 1637 expressions=expressions, 1638 sorted_by=sorted_by, 1639 buckets=buckets, 1640 ) 1641 1642 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1643 if not self._match_text_seq("GRANTS"): 1644 self._retreat(self._index - 1) 1645 return None 1646 1647 return self.expression(exp.CopyGrantsProperty) 1648 1649 def _parse_freespace(self) -> exp.FreespaceProperty: 1650 self._match(TokenType.EQ) 1651 return self.expression( 1652 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1653 ) 1654 1655 def _parse_mergeblockratio( 1656 self, no: bool = False, default: bool = False 1657 ) -> exp.MergeBlockRatioProperty: 1658 if self._match(TokenType.EQ): 1659 return self.expression( 1660 exp.MergeBlockRatioProperty, 1661 this=self._parse_number(), 1662 percent=self._match(TokenType.PERCENT), 1663 ) 1664 1665 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1666 1667 def _parse_datablocksize( 1668 self, 1669 default: t.Optional[bool] = None, 1670 minimum: t.Optional[bool] = None, 1671 maximum: t.Optional[bool] = None, 1672 ) -> exp.DataBlocksizeProperty: 1673 self._match(TokenType.EQ) 1674 size = self._parse_number() 1675 1676 units = None 1677 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1678 units = self._prev.text 1679 1680 return self.expression( 1681 exp.DataBlocksizeProperty, 1682 size=size, 1683 units=units, 1684 default=default, 1685 minimum=minimum, 1686 maximum=maximum, 1687 ) 1688 1689 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1690 self._match(TokenType.EQ) 1691 always = self._match_text_seq("ALWAYS") 1692 manual = self._match_text_seq("MANUAL") 1693 never = self._match_text_seq("NEVER") 1694 default = self._match_text_seq("DEFAULT") 1695 1696 autotemp = None 1697 if self._match_text_seq("AUTOTEMP"): 1698 autotemp = self._parse_schema() 1699 1700 return self.expression( 1701 exp.BlockCompressionProperty, 1702 always=always, 1703 manual=manual, 1704 never=never, 1705 default=default, 1706 autotemp=autotemp, 1707 ) 1708 1709 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1710 no = self._match_text_seq("NO") 1711 concurrent = self._match_text_seq("CONCURRENT") 1712 self._match_text_seq("ISOLATED", "LOADING") 1713 for_all = self._match_text_seq("FOR", "ALL") 1714 for_insert = self._match_text_seq("FOR", "INSERT") 1715 for_none = self._match_text_seq("FOR", "NONE") 1716 return self.expression( 1717 exp.IsolatedLoadingProperty, 1718 no=no, 1719 concurrent=concurrent, 1720 for_all=for_all, 1721 for_insert=for_insert, 1722 for_none=for_none, 1723 ) 1724 1725 def _parse_locking(self) -> exp.LockingProperty: 1726 if self._match(TokenType.TABLE): 1727 kind = "TABLE" 1728 elif self._match(TokenType.VIEW): 1729 kind = "VIEW" 1730 elif self._match(TokenType.ROW): 1731 kind = "ROW" 1732 elif self._match_text_seq("DATABASE"): 1733 kind = "DATABASE" 1734 else: 1735 kind = None 1736 1737 if kind in ("DATABASE", "TABLE", "VIEW"): 1738 this = self._parse_table_parts() 1739 else: 1740 this = None 1741 1742 if self._match(TokenType.FOR): 1743 for_or_in = "FOR" 1744 elif self._match(TokenType.IN): 1745 for_or_in = "IN" 1746 else: 1747 for_or_in = None 1748 1749 if self._match_text_seq("ACCESS"): 1750 lock_type = "ACCESS" 1751 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1752 lock_type = "EXCLUSIVE" 1753 elif self._match_text_seq("SHARE"): 1754 lock_type = "SHARE" 1755 elif self._match_text_seq("READ"): 1756 lock_type = "READ" 1757 elif self._match_text_seq("WRITE"): 1758 lock_type = "WRITE" 1759 elif self._match_text_seq("CHECKSUM"): 1760 lock_type = "CHECKSUM" 1761 else: 1762 lock_type = None 1763 1764 override = self._match_text_seq("OVERRIDE") 1765 1766 return self.expression( 1767 exp.LockingProperty, 1768 this=this, 1769 kind=kind, 1770 for_or_in=for_or_in, 1771 lock_type=lock_type, 1772 override=override, 1773 ) 1774 1775 def _parse_partition_by(self) -> t.List[exp.Expression]: 1776 if self._match(TokenType.PARTITION_BY): 1777 return self._parse_csv(self._parse_conjunction) 1778 return [] 1779 1780 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1781 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1782 if self._match_text_seq("MINVALUE"): 1783 return exp.var("MINVALUE") 1784 if self._match_text_seq("MAXVALUE"): 1785 return exp.var("MAXVALUE") 1786 return self._parse_bitwise() 1787 1788 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1789 expression = None 1790 from_expressions = None 1791 to_expressions = None 1792 1793 if self._match(TokenType.IN): 1794 this = self._parse_wrapped_csv(self._parse_bitwise) 1795 elif self._match(TokenType.FROM): 1796 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1797 self._match_text_seq("TO") 1798 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1799 elif self._match_text_seq("WITH", "(", "MODULUS"): 1800 this = self._parse_number() 1801 self._match_text_seq(",", "REMAINDER") 1802 expression = self._parse_number() 1803 self._match_r_paren() 1804 else: 1805 self.raise_error("Failed to parse partition bound spec.") 1806 1807 return self.expression( 1808 exp.PartitionBoundSpec, 1809 this=this, 1810 expression=expression, 1811 from_expressions=from_expressions, 1812 to_expressions=to_expressions, 1813 ) 1814 1815 # https://www.postgresql.org/docs/current/sql-createtable.html 1816 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1817 if not self._match_text_seq("OF"): 1818 self._retreat(self._index - 1) 1819 return None 1820 1821 this = self._parse_table(schema=True) 1822 1823 if self._match(TokenType.DEFAULT): 1824 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1825 elif self._match_text_seq("FOR", "VALUES"): 1826 expression = self._parse_partition_bound_spec() 1827 else: 1828 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1829 1830 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1831 1832 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1833 self._match(TokenType.EQ) 1834 return self.expression( 1835 exp.PartitionedByProperty, 1836 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1837 ) 1838 1839 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1840 if self._match_text_seq("AND", "STATISTICS"): 1841 statistics = True 1842 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1843 statistics = False 1844 else: 1845 statistics = None 1846 1847 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1848 1849 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1850 if self._match_text_seq("PRIMARY", "INDEX"): 1851 return exp.NoPrimaryIndexProperty() 1852 return None 1853 1854 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1855 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1856 return exp.OnCommitProperty() 1857 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1858 return exp.OnCommitProperty(delete=True) 1859 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1860 1861 def _parse_distkey(self) -> exp.DistKeyProperty: 1862 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1863 1864 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1865 table = self._parse_table(schema=True) 1866 1867 options = [] 1868 while self._match_texts(("INCLUDING", "EXCLUDING")): 1869 this = self._prev.text.upper() 1870 1871 id_var = self._parse_id_var() 1872 if not id_var: 1873 return None 1874 1875 options.append( 1876 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1877 ) 1878 1879 return self.expression(exp.LikeProperty, this=table, expressions=options) 1880 1881 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1882 return self.expression( 1883 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1884 ) 1885 1886 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1887 self._match(TokenType.EQ) 1888 return self.expression( 1889 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1890 ) 1891 1892 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1893 self._match_text_seq("WITH", "CONNECTION") 1894 return self.expression( 1895 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1896 ) 1897 1898 def _parse_returns(self) -> exp.ReturnsProperty: 1899 value: t.Optional[exp.Expression] 1900 is_table = self._match(TokenType.TABLE) 1901 1902 if is_table: 1903 if self._match(TokenType.LT): 1904 value = self.expression( 1905 exp.Schema, 1906 this="TABLE", 1907 expressions=self._parse_csv(self._parse_struct_types), 1908 ) 1909 if not self._match(TokenType.GT): 1910 self.raise_error("Expecting >") 1911 else: 1912 value = self._parse_schema(exp.var("TABLE")) 1913 else: 1914 value = self._parse_types() 1915 1916 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1917 1918 def _parse_describe(self) -> exp.Describe: 1919 kind = self._match_set(self.CREATABLES) and self._prev.text 1920 this = self._parse_table(schema=True) 1921 properties = self._parse_properties() 1922 expressions = properties.expressions if properties else None 1923 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1924 1925 def _parse_insert(self) -> exp.Insert: 1926 comments = ensure_list(self._prev_comments) 1927 overwrite = self._match(TokenType.OVERWRITE) 1928 ignore = self._match(TokenType.IGNORE) 1929 local = self._match_text_seq("LOCAL") 1930 alternative = None 1931 1932 if self._match_text_seq("DIRECTORY"): 1933 this: t.Optional[exp.Expression] = self.expression( 1934 exp.Directory, 1935 this=self._parse_var_or_string(), 1936 local=local, 1937 row_format=self._parse_row_format(match_row=True), 1938 ) 1939 else: 1940 if self._match(TokenType.OR): 1941 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1942 1943 self._match(TokenType.INTO) 1944 comments += ensure_list(self._prev_comments) 1945 self._match(TokenType.TABLE) 1946 this = self._parse_table(schema=True) 1947 1948 returning = self._parse_returning() 1949 1950 return self.expression( 1951 exp.Insert, 1952 comments=comments, 1953 this=this, 1954 by_name=self._match_text_seq("BY", "NAME"), 1955 exists=self._parse_exists(), 1956 partition=self._parse_partition(), 1957 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1958 and self._parse_conjunction(), 1959 expression=self._parse_ddl_select(), 1960 conflict=self._parse_on_conflict(), 1961 returning=returning or self._parse_returning(), 1962 overwrite=overwrite, 1963 alternative=alternative, 1964 ignore=ignore, 1965 ) 1966 1967 def _parse_kill(self) -> exp.Kill: 1968 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1969 1970 return self.expression( 1971 exp.Kill, 1972 this=self._parse_primary(), 1973 kind=kind, 1974 ) 1975 1976 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1977 conflict = self._match_text_seq("ON", "CONFLICT") 1978 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1979 1980 if not conflict and not duplicate: 1981 return None 1982 1983 nothing = None 1984 expressions = None 1985 key = None 1986 constraint = None 1987 1988 if conflict: 1989 if self._match_text_seq("ON", "CONSTRAINT"): 1990 constraint = self._parse_id_var() 1991 else: 1992 key = self._parse_csv(self._parse_value) 1993 1994 self._match_text_seq("DO") 1995 if self._match_text_seq("NOTHING"): 1996 nothing = True 1997 else: 1998 self._match(TokenType.UPDATE) 1999 self._match(TokenType.SET) 2000 expressions = self._parse_csv(self._parse_equality) 2001 2002 return self.expression( 2003 exp.OnConflict, 2004 duplicate=duplicate, 2005 expressions=expressions, 2006 nothing=nothing, 2007 key=key, 2008 constraint=constraint, 2009 ) 2010 2011 def _parse_returning(self) -> t.Optional[exp.Returning]: 2012 if not self._match(TokenType.RETURNING): 2013 return None 2014 return self.expression( 2015 exp.Returning, 2016 expressions=self._parse_csv(self._parse_expression), 2017 into=self._match(TokenType.INTO) and self._parse_table_part(), 2018 ) 2019 2020 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2021 if not self._match(TokenType.FORMAT): 2022 return None 2023 return self._parse_row_format() 2024 2025 def _parse_row_format( 2026 self, match_row: bool = False 2027 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2028 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2029 return None 2030 2031 if self._match_text_seq("SERDE"): 2032 this = self._parse_string() 2033 2034 serde_properties = None 2035 if self._match(TokenType.SERDE_PROPERTIES): 2036 serde_properties = self.expression( 2037 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2038 ) 2039 2040 return self.expression( 2041 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2042 ) 2043 2044 self._match_text_seq("DELIMITED") 2045 2046 kwargs = {} 2047 2048 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2049 kwargs["fields"] = self._parse_string() 2050 if self._match_text_seq("ESCAPED", "BY"): 2051 kwargs["escaped"] = self._parse_string() 2052 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2053 kwargs["collection_items"] = self._parse_string() 2054 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2055 kwargs["map_keys"] = self._parse_string() 2056 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2057 kwargs["lines"] = self._parse_string() 2058 if self._match_text_seq("NULL", "DEFINED", "AS"): 2059 kwargs["null"] = self._parse_string() 2060 2061 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2062 2063 def _parse_load(self) -> exp.LoadData | exp.Command: 2064 if self._match_text_seq("DATA"): 2065 local = self._match_text_seq("LOCAL") 2066 self._match_text_seq("INPATH") 2067 inpath = self._parse_string() 2068 overwrite = self._match(TokenType.OVERWRITE) 2069 self._match_pair(TokenType.INTO, TokenType.TABLE) 2070 2071 return self.expression( 2072 exp.LoadData, 2073 this=self._parse_table(schema=True), 2074 local=local, 2075 overwrite=overwrite, 2076 inpath=inpath, 2077 partition=self._parse_partition(), 2078 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2079 serde=self._match_text_seq("SERDE") and self._parse_string(), 2080 ) 2081 return self._parse_as_command(self._prev) 2082 2083 def _parse_delete(self) -> exp.Delete: 2084 # This handles MySQL's "Multiple-Table Syntax" 2085 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2086 tables = None 2087 comments = self._prev_comments 2088 if not self._match(TokenType.FROM, advance=False): 2089 tables = self._parse_csv(self._parse_table) or None 2090 2091 returning = self._parse_returning() 2092 2093 return self.expression( 2094 exp.Delete, 2095 comments=comments, 2096 tables=tables, 2097 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2098 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2099 where=self._parse_where(), 2100 returning=returning or self._parse_returning(), 2101 limit=self._parse_limit(), 2102 ) 2103 2104 def _parse_update(self) -> exp.Update: 2105 comments = self._prev_comments 2106 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2107 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2108 returning = self._parse_returning() 2109 return self.expression( 2110 exp.Update, 2111 comments=comments, 2112 **{ # type: ignore 2113 "this": this, 2114 "expressions": expressions, 2115 "from": self._parse_from(joins=True), 2116 "where": self._parse_where(), 2117 "returning": returning or self._parse_returning(), 2118 "order": self._parse_order(), 2119 "limit": self._parse_limit(), 2120 }, 2121 ) 2122 2123 def _parse_uncache(self) -> exp.Uncache: 2124 if not self._match(TokenType.TABLE): 2125 self.raise_error("Expecting TABLE after UNCACHE") 2126 2127 return self.expression( 2128 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2129 ) 2130 2131 def _parse_cache(self) -> exp.Cache: 2132 lazy = self._match_text_seq("LAZY") 2133 self._match(TokenType.TABLE) 2134 table = self._parse_table(schema=True) 2135 2136 options = [] 2137 if self._match_text_seq("OPTIONS"): 2138 self._match_l_paren() 2139 k = self._parse_string() 2140 self._match(TokenType.EQ) 2141 v = self._parse_string() 2142 options = [k, v] 2143 self._match_r_paren() 2144 2145 self._match(TokenType.ALIAS) 2146 return self.expression( 2147 exp.Cache, 2148 this=table, 2149 lazy=lazy, 2150 options=options, 2151 expression=self._parse_select(nested=True), 2152 ) 2153 2154 def _parse_partition(self) -> t.Optional[exp.Partition]: 2155 if not self._match(TokenType.PARTITION): 2156 return None 2157 2158 return self.expression( 2159 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2160 ) 2161 2162 def _parse_value(self) -> exp.Tuple: 2163 if self._match(TokenType.L_PAREN): 2164 expressions = self._parse_csv(self._parse_conjunction) 2165 self._match_r_paren() 2166 return self.expression(exp.Tuple, expressions=expressions) 2167 2168 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2169 # https://prestodb.io/docs/current/sql/values.html 2170 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2171 2172 def _parse_projections(self) -> t.List[exp.Expression]: 2173 return self._parse_expressions() 2174 2175 def _parse_select( 2176 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2177 ) -> t.Optional[exp.Expression]: 2178 cte = self._parse_with() 2179 2180 if cte: 2181 this = self._parse_statement() 2182 2183 if not this: 2184 self.raise_error("Failed to parse any statement following CTE") 2185 return cte 2186 2187 if "with" in this.arg_types: 2188 this.set("with", cte) 2189 else: 2190 self.raise_error(f"{this.key} does not support CTE") 2191 this = cte 2192 2193 return this 2194 2195 # duckdb supports leading with FROM x 2196 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2197 2198 if self._match(TokenType.SELECT): 2199 comments = self._prev_comments 2200 2201 hint = self._parse_hint() 2202 all_ = self._match(TokenType.ALL) 2203 distinct = self._match_set(self.DISTINCT_TOKENS) 2204 2205 kind = ( 2206 self._match(TokenType.ALIAS) 2207 and self._match_texts(("STRUCT", "VALUE")) 2208 and self._prev.text 2209 ) 2210 2211 if distinct: 2212 distinct = self.expression( 2213 exp.Distinct, 2214 on=self._parse_value() if self._match(TokenType.ON) else None, 2215 ) 2216 2217 if all_ and distinct: 2218 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2219 2220 limit = self._parse_limit(top=True) 2221 projections = self._parse_projections() 2222 2223 this = self.expression( 2224 exp.Select, 2225 kind=kind, 2226 hint=hint, 2227 distinct=distinct, 2228 expressions=projections, 2229 limit=limit, 2230 ) 2231 this.comments = comments 2232 2233 into = self._parse_into() 2234 if into: 2235 this.set("into", into) 2236 2237 if not from_: 2238 from_ = self._parse_from() 2239 2240 if from_: 2241 this.set("from", from_) 2242 2243 this = self._parse_query_modifiers(this) 2244 elif (table or nested) and self._match(TokenType.L_PAREN): 2245 if self._match(TokenType.PIVOT): 2246 this = self._parse_simplified_pivot() 2247 elif self._match(TokenType.FROM): 2248 this = exp.select("*").from_( 2249 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2250 ) 2251 else: 2252 this = self._parse_table() if table else self._parse_select(nested=True) 2253 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2254 2255 self._match_r_paren() 2256 2257 # We return early here so that the UNION isn't attached to the subquery by the 2258 # following call to _parse_set_operations, but instead becomes the parent node 2259 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2260 elif self._match(TokenType.VALUES): 2261 this = self.expression( 2262 exp.Values, 2263 expressions=self._parse_csv(self._parse_value), 2264 alias=self._parse_table_alias(), 2265 ) 2266 elif from_: 2267 this = exp.select("*").from_(from_.this, copy=False) 2268 else: 2269 this = None 2270 2271 return self._parse_set_operations(this) 2272 2273 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2274 if not skip_with_token and not self._match(TokenType.WITH): 2275 return None 2276 2277 comments = self._prev_comments 2278 recursive = self._match(TokenType.RECURSIVE) 2279 2280 expressions = [] 2281 while True: 2282 expressions.append(self._parse_cte()) 2283 2284 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2285 break 2286 else: 2287 self._match(TokenType.WITH) 2288 2289 return self.expression( 2290 exp.With, comments=comments, expressions=expressions, recursive=recursive 2291 ) 2292 2293 def _parse_cte(self) -> exp.CTE: 2294 alias = self._parse_table_alias() 2295 if not alias or not alias.this: 2296 self.raise_error("Expected CTE to have alias") 2297 2298 self._match(TokenType.ALIAS) 2299 return self.expression( 2300 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2301 ) 2302 2303 def _parse_table_alias( 2304 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2305 ) -> t.Optional[exp.TableAlias]: 2306 any_token = self._match(TokenType.ALIAS) 2307 alias = ( 2308 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2309 or self._parse_string_as_identifier() 2310 ) 2311 2312 index = self._index 2313 if self._match(TokenType.L_PAREN): 2314 columns = self._parse_csv(self._parse_function_parameter) 2315 self._match_r_paren() if columns else self._retreat(index) 2316 else: 2317 columns = None 2318 2319 if not alias and not columns: 2320 return None 2321 2322 return self.expression(exp.TableAlias, this=alias, columns=columns) 2323 2324 def _parse_subquery( 2325 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2326 ) -> t.Optional[exp.Subquery]: 2327 if not this: 2328 return None 2329 2330 return self.expression( 2331 exp.Subquery, 2332 this=this, 2333 pivots=self._parse_pivots(), 2334 alias=self._parse_table_alias() if parse_alias else None, 2335 ) 2336 2337 def _parse_query_modifiers( 2338 self, this: t.Optional[exp.Expression] 2339 ) -> t.Optional[exp.Expression]: 2340 if isinstance(this, self.MODIFIABLES): 2341 for join in iter(self._parse_join, None): 2342 this.append("joins", join) 2343 for lateral in iter(self._parse_lateral, None): 2344 this.append("laterals", lateral) 2345 2346 while True: 2347 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2348 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2349 key, expression = parser(self) 2350 2351 if expression: 2352 this.set(key, expression) 2353 if key == "limit": 2354 offset = expression.args.pop("offset", None) 2355 if offset: 2356 this.set("offset", exp.Offset(expression=offset)) 2357 continue 2358 break 2359 return this 2360 2361 def _parse_hint(self) -> t.Optional[exp.Hint]: 2362 if self._match(TokenType.HINT): 2363 hints = [] 2364 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2365 hints.extend(hint) 2366 2367 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2368 self.raise_error("Expected */ after HINT") 2369 2370 return self.expression(exp.Hint, expressions=hints) 2371 2372 return None 2373 2374 def _parse_into(self) -> t.Optional[exp.Into]: 2375 if not self._match(TokenType.INTO): 2376 return None 2377 2378 temp = self._match(TokenType.TEMPORARY) 2379 unlogged = self._match_text_seq("UNLOGGED") 2380 self._match(TokenType.TABLE) 2381 2382 return self.expression( 2383 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2384 ) 2385 2386 def _parse_from( 2387 self, joins: bool = False, skip_from_token: bool = False 2388 ) -> t.Optional[exp.From]: 2389 if not skip_from_token and not self._match(TokenType.FROM): 2390 return None 2391 2392 return self.expression( 2393 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2394 ) 2395 2396 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2397 if not self._match(TokenType.MATCH_RECOGNIZE): 2398 return None 2399 2400 self._match_l_paren() 2401 2402 partition = self._parse_partition_by() 2403 order = self._parse_order() 2404 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2405 2406 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2407 rows = exp.var("ONE ROW PER MATCH") 2408 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2409 text = "ALL ROWS PER MATCH" 2410 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2411 text += f" SHOW EMPTY MATCHES" 2412 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2413 text += f" OMIT EMPTY MATCHES" 2414 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2415 text += f" WITH UNMATCHED ROWS" 2416 rows = exp.var(text) 2417 else: 2418 rows = None 2419 2420 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2421 text = "AFTER MATCH SKIP" 2422 if self._match_text_seq("PAST", "LAST", "ROW"): 2423 text += f" PAST LAST ROW" 2424 elif self._match_text_seq("TO", "NEXT", "ROW"): 2425 text += f" TO NEXT ROW" 2426 elif self._match_text_seq("TO", "FIRST"): 2427 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2428 elif self._match_text_seq("TO", "LAST"): 2429 text += f" TO LAST {self._advance_any().text}" # type: ignore 2430 after = exp.var(text) 2431 else: 2432 after = None 2433 2434 if self._match_text_seq("PATTERN"): 2435 self._match_l_paren() 2436 2437 if not self._curr: 2438 self.raise_error("Expecting )", self._curr) 2439 2440 paren = 1 2441 start = self._curr 2442 2443 while self._curr and paren > 0: 2444 if self._curr.token_type == TokenType.L_PAREN: 2445 paren += 1 2446 if self._curr.token_type == TokenType.R_PAREN: 2447 paren -= 1 2448 2449 end = self._prev 2450 self._advance() 2451 2452 if paren > 0: 2453 self.raise_error("Expecting )", self._curr) 2454 2455 pattern = exp.var(self._find_sql(start, end)) 2456 else: 2457 pattern = None 2458 2459 define = ( 2460 self._parse_csv( 2461 lambda: self.expression( 2462 exp.Alias, 2463 alias=self._parse_id_var(any_token=True), 2464 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2465 ) 2466 ) 2467 if self._match_text_seq("DEFINE") 2468 else None 2469 ) 2470 2471 self._match_r_paren() 2472 2473 return self.expression( 2474 exp.MatchRecognize, 2475 partition_by=partition, 2476 order=order, 2477 measures=measures, 2478 rows=rows, 2479 after=after, 2480 pattern=pattern, 2481 define=define, 2482 alias=self._parse_table_alias(), 2483 ) 2484 2485 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2486 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2487 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2488 2489 if outer_apply or cross_apply: 2490 this = self._parse_select(table=True) 2491 view = None 2492 outer = not cross_apply 2493 elif self._match(TokenType.LATERAL): 2494 this = self._parse_select(table=True) 2495 view = self._match(TokenType.VIEW) 2496 outer = self._match(TokenType.OUTER) 2497 else: 2498 return None 2499 2500 if not this: 2501 this = ( 2502 self._parse_unnest() 2503 or self._parse_function() 2504 or self._parse_id_var(any_token=False) 2505 ) 2506 2507 while self._match(TokenType.DOT): 2508 this = exp.Dot( 2509 this=this, 2510 expression=self._parse_function() or self._parse_id_var(any_token=False), 2511 ) 2512 2513 if view: 2514 table = self._parse_id_var(any_token=False) 2515 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2516 table_alias: t.Optional[exp.TableAlias] = self.expression( 2517 exp.TableAlias, this=table, columns=columns 2518 ) 2519 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2520 # We move the alias from the lateral's child node to the lateral itself 2521 table_alias = this.args["alias"].pop() 2522 else: 2523 table_alias = self._parse_table_alias() 2524 2525 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2526 2527 def _parse_join_parts( 2528 self, 2529 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2530 return ( 2531 self._match_set(self.JOIN_METHODS) and self._prev, 2532 self._match_set(self.JOIN_SIDES) and self._prev, 2533 self._match_set(self.JOIN_KINDS) and self._prev, 2534 ) 2535 2536 def _parse_join( 2537 self, skip_join_token: bool = False, parse_bracket: bool = False 2538 ) -> t.Optional[exp.Join]: 2539 if self._match(TokenType.COMMA): 2540 return self.expression(exp.Join, this=self._parse_table()) 2541 2542 index = self._index 2543 method, side, kind = self._parse_join_parts() 2544 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2545 join = self._match(TokenType.JOIN) 2546 2547 if not skip_join_token and not join: 2548 self._retreat(index) 2549 kind = None 2550 method = None 2551 side = None 2552 2553 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2554 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2555 2556 if not skip_join_token and not join and not outer_apply and not cross_apply: 2557 return None 2558 2559 if outer_apply: 2560 side = Token(TokenType.LEFT, "LEFT") 2561 2562 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2563 2564 if method: 2565 kwargs["method"] = method.text 2566 if side: 2567 kwargs["side"] = side.text 2568 if kind: 2569 kwargs["kind"] = kind.text 2570 if hint: 2571 kwargs["hint"] = hint 2572 2573 if self._match(TokenType.ON): 2574 kwargs["on"] = self._parse_conjunction() 2575 elif self._match(TokenType.USING): 2576 kwargs["using"] = self._parse_wrapped_id_vars() 2577 elif not (kind and kind.token_type == TokenType.CROSS): 2578 index = self._index 2579 join = self._parse_join() 2580 2581 if join and self._match(TokenType.ON): 2582 kwargs["on"] = self._parse_conjunction() 2583 elif join and self._match(TokenType.USING): 2584 kwargs["using"] = self._parse_wrapped_id_vars() 2585 else: 2586 join = None 2587 self._retreat(index) 2588 2589 kwargs["this"].set("joins", [join] if join else None) 2590 2591 comments = [c for token in (method, side, kind) if token for c in token.comments] 2592 return self.expression(exp.Join, comments=comments, **kwargs) 2593 2594 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2595 this = self._parse_conjunction() 2596 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2597 return this 2598 2599 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2600 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2601 2602 return this 2603 2604 def _parse_index( 2605 self, 2606 index: t.Optional[exp.Expression] = None, 2607 ) -> t.Optional[exp.Index]: 2608 if index: 2609 unique = None 2610 primary = None 2611 amp = None 2612 2613 self._match(TokenType.ON) 2614 self._match(TokenType.TABLE) # hive 2615 table = self._parse_table_parts(schema=True) 2616 else: 2617 unique = self._match(TokenType.UNIQUE) 2618 primary = self._match_text_seq("PRIMARY") 2619 amp = self._match_text_seq("AMP") 2620 2621 if not self._match(TokenType.INDEX): 2622 return None 2623 2624 index = self._parse_id_var() 2625 table = None 2626 2627 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2628 2629 if self._match(TokenType.L_PAREN, advance=False): 2630 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2631 else: 2632 columns = None 2633 2634 return self.expression( 2635 exp.Index, 2636 this=index, 2637 table=table, 2638 using=using, 2639 columns=columns, 2640 unique=unique, 2641 primary=primary, 2642 amp=amp, 2643 partition_by=self._parse_partition_by(), 2644 where=self._parse_where(), 2645 ) 2646 2647 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2648 hints: t.List[exp.Expression] = [] 2649 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2650 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2651 hints.append( 2652 self.expression( 2653 exp.WithTableHint, 2654 expressions=self._parse_csv( 2655 lambda: self._parse_function() or self._parse_var(any_token=True) 2656 ), 2657 ) 2658 ) 2659 self._match_r_paren() 2660 else: 2661 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2662 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2663 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2664 2665 self._match_texts(("INDEX", "KEY")) 2666 if self._match(TokenType.FOR): 2667 hint.set("target", self._advance_any() and self._prev.text.upper()) 2668 2669 hint.set("expressions", self._parse_wrapped_id_vars()) 2670 hints.append(hint) 2671 2672 return hints or None 2673 2674 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2675 return ( 2676 (not schema and self._parse_function(optional_parens=False)) 2677 or self._parse_id_var(any_token=False) 2678 or self._parse_string_as_identifier() 2679 or self._parse_placeholder() 2680 ) 2681 2682 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2683 catalog = None 2684 db = None 2685 table = self._parse_table_part(schema=schema) 2686 2687 while self._match(TokenType.DOT): 2688 if catalog: 2689 # This allows nesting the table in arbitrarily many dot expressions if needed 2690 table = self.expression( 2691 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2692 ) 2693 else: 2694 catalog = db 2695 db = table 2696 table = self._parse_table_part(schema=schema) 2697 2698 if not table: 2699 self.raise_error(f"Expected table name but got {self._curr}") 2700 2701 return self.expression( 2702 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2703 ) 2704 2705 def _parse_table( 2706 self, 2707 schema: bool = False, 2708 joins: bool = False, 2709 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2710 parse_bracket: bool = False, 2711 ) -> t.Optional[exp.Expression]: 2712 lateral = self._parse_lateral() 2713 if lateral: 2714 return lateral 2715 2716 unnest = self._parse_unnest() 2717 if unnest: 2718 return unnest 2719 2720 values = self._parse_derived_table_values() 2721 if values: 2722 return values 2723 2724 subquery = self._parse_select(table=True) 2725 if subquery: 2726 if not subquery.args.get("pivots"): 2727 subquery.set("pivots", self._parse_pivots()) 2728 return subquery 2729 2730 bracket = parse_bracket and self._parse_bracket(None) 2731 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2732 this = t.cast( 2733 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2734 ) 2735 2736 if schema: 2737 return self._parse_schema(this=this) 2738 2739 version = self._parse_version() 2740 2741 if version: 2742 this.set("version", version) 2743 2744 if self.ALIAS_POST_TABLESAMPLE: 2745 table_sample = self._parse_table_sample() 2746 2747 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2748 if alias: 2749 this.set("alias", alias) 2750 2751 if self._match_text_seq("AT"): 2752 this.set("index", self._parse_id_var()) 2753 2754 this.set("hints", self._parse_table_hints()) 2755 2756 if not this.args.get("pivots"): 2757 this.set("pivots", self._parse_pivots()) 2758 2759 if not self.ALIAS_POST_TABLESAMPLE: 2760 table_sample = self._parse_table_sample() 2761 2762 if table_sample: 2763 table_sample.set("this", this) 2764 this = table_sample 2765 2766 if joins: 2767 for join in iter(self._parse_join, None): 2768 this.append("joins", join) 2769 2770 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2771 this.set("ordinality", True) 2772 this.set("alias", self._parse_table_alias()) 2773 2774 return this 2775 2776 def _parse_version(self) -> t.Optional[exp.Version]: 2777 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2778 this = "TIMESTAMP" 2779 elif self._match(TokenType.VERSION_SNAPSHOT): 2780 this = "VERSION" 2781 else: 2782 return None 2783 2784 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2785 kind = self._prev.text.upper() 2786 start = self._parse_bitwise() 2787 self._match_texts(("TO", "AND")) 2788 end = self._parse_bitwise() 2789 expression: t.Optional[exp.Expression] = self.expression( 2790 exp.Tuple, expressions=[start, end] 2791 ) 2792 elif self._match_text_seq("CONTAINED", "IN"): 2793 kind = "CONTAINED IN" 2794 expression = self.expression( 2795 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2796 ) 2797 elif self._match(TokenType.ALL): 2798 kind = "ALL" 2799 expression = None 2800 else: 2801 self._match_text_seq("AS", "OF") 2802 kind = "AS OF" 2803 expression = self._parse_type() 2804 2805 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2806 2807 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2808 if not self._match(TokenType.UNNEST): 2809 return None 2810 2811 expressions = self._parse_wrapped_csv(self._parse_equality) 2812 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2813 2814 alias = self._parse_table_alias() if with_alias else None 2815 2816 if alias: 2817 if self.UNNEST_COLUMN_ONLY: 2818 if alias.args.get("columns"): 2819 self.raise_error("Unexpected extra column alias in unnest.") 2820 2821 alias.set("columns", [alias.this]) 2822 alias.set("this", None) 2823 2824 columns = alias.args.get("columns") or [] 2825 if offset and len(expressions) < len(columns): 2826 offset = columns.pop() 2827 2828 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2829 self._match(TokenType.ALIAS) 2830 offset = self._parse_id_var( 2831 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2832 ) or exp.to_identifier("offset") 2833 2834 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2835 2836 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2837 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2838 if not is_derived and not self._match(TokenType.VALUES): 2839 return None 2840 2841 expressions = self._parse_csv(self._parse_value) 2842 alias = self._parse_table_alias() 2843 2844 if is_derived: 2845 self._match_r_paren() 2846 2847 return self.expression( 2848 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2849 ) 2850 2851 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2852 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2853 as_modifier and self._match_text_seq("USING", "SAMPLE") 2854 ): 2855 return None 2856 2857 bucket_numerator = None 2858 bucket_denominator = None 2859 bucket_field = None 2860 percent = None 2861 rows = None 2862 size = None 2863 seed = None 2864 2865 kind = ( 2866 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2867 ) 2868 method = self._parse_var(tokens=(TokenType.ROW,)) 2869 2870 matched_l_paren = self._match(TokenType.L_PAREN) 2871 2872 if self.TABLESAMPLE_CSV: 2873 num = None 2874 expressions = self._parse_csv(self._parse_primary) 2875 else: 2876 expressions = None 2877 num = ( 2878 self._parse_factor() 2879 if self._match(TokenType.NUMBER, advance=False) 2880 else self._parse_primary() 2881 ) 2882 2883 if self._match_text_seq("BUCKET"): 2884 bucket_numerator = self._parse_number() 2885 self._match_text_seq("OUT", "OF") 2886 bucket_denominator = bucket_denominator = self._parse_number() 2887 self._match(TokenType.ON) 2888 bucket_field = self._parse_field() 2889 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2890 percent = num 2891 elif self._match(TokenType.ROWS): 2892 rows = num 2893 elif num: 2894 size = num 2895 2896 if matched_l_paren: 2897 self._match_r_paren() 2898 2899 if self._match(TokenType.L_PAREN): 2900 method = self._parse_var() 2901 seed = self._match(TokenType.COMMA) and self._parse_number() 2902 self._match_r_paren() 2903 elif self._match_texts(("SEED", "REPEATABLE")): 2904 seed = self._parse_wrapped(self._parse_number) 2905 2906 return self.expression( 2907 exp.TableSample, 2908 expressions=expressions, 2909 method=method, 2910 bucket_numerator=bucket_numerator, 2911 bucket_denominator=bucket_denominator, 2912 bucket_field=bucket_field, 2913 percent=percent, 2914 rows=rows, 2915 size=size, 2916 seed=seed, 2917 kind=kind, 2918 ) 2919 2920 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2921 return list(iter(self._parse_pivot, None)) or None 2922 2923 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2924 return list(iter(self._parse_join, None)) or None 2925 2926 # https://duckdb.org/docs/sql/statements/pivot 2927 def _parse_simplified_pivot(self) -> exp.Pivot: 2928 def _parse_on() -> t.Optional[exp.Expression]: 2929 this = self._parse_bitwise() 2930 return self._parse_in(this) if self._match(TokenType.IN) else this 2931 2932 this = self._parse_table() 2933 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2934 using = self._match(TokenType.USING) and self._parse_csv( 2935 lambda: self._parse_alias(self._parse_function()) 2936 ) 2937 group = self._parse_group() 2938 return self.expression( 2939 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2940 ) 2941 2942 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2943 index = self._index 2944 include_nulls = None 2945 2946 if self._match(TokenType.PIVOT): 2947 unpivot = False 2948 elif self._match(TokenType.UNPIVOT): 2949 unpivot = True 2950 2951 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2952 if self._match_text_seq("INCLUDE", "NULLS"): 2953 include_nulls = True 2954 elif self._match_text_seq("EXCLUDE", "NULLS"): 2955 include_nulls = False 2956 else: 2957 return None 2958 2959 expressions = [] 2960 field = None 2961 2962 if not self._match(TokenType.L_PAREN): 2963 self._retreat(index) 2964 return None 2965 2966 if unpivot: 2967 expressions = self._parse_csv(self._parse_column) 2968 else: 2969 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2970 2971 if not expressions: 2972 self.raise_error("Failed to parse PIVOT's aggregation list") 2973 2974 if not self._match(TokenType.FOR): 2975 self.raise_error("Expecting FOR") 2976 2977 value = self._parse_column() 2978 2979 if not self._match(TokenType.IN): 2980 self.raise_error("Expecting IN") 2981 2982 field = self._parse_in(value, alias=True) 2983 2984 self._match_r_paren() 2985 2986 pivot = self.expression( 2987 exp.Pivot, 2988 expressions=expressions, 2989 field=field, 2990 unpivot=unpivot, 2991 include_nulls=include_nulls, 2992 ) 2993 2994 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2995 pivot.set("alias", self._parse_table_alias()) 2996 2997 if not unpivot: 2998 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2999 3000 columns: t.List[exp.Expression] = [] 3001 for fld in pivot.args["field"].expressions: 3002 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3003 for name in names: 3004 if self.PREFIXED_PIVOT_COLUMNS: 3005 name = f"{name}_{field_name}" if name else field_name 3006 else: 3007 name = f"{field_name}_{name}" if name else field_name 3008 3009 columns.append(exp.to_identifier(name)) 3010 3011 pivot.set("columns", columns) 3012 3013 return pivot 3014 3015 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3016 return [agg.alias for agg in aggregations] 3017 3018 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3019 if not skip_where_token and not self._match(TokenType.WHERE): 3020 return None 3021 3022 return self.expression( 3023 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3024 ) 3025 3026 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3027 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3028 return None 3029 3030 elements = defaultdict(list) 3031 3032 if self._match(TokenType.ALL): 3033 return self.expression(exp.Group, all=True) 3034 3035 while True: 3036 expressions = self._parse_csv(self._parse_conjunction) 3037 if expressions: 3038 elements["expressions"].extend(expressions) 3039 3040 grouping_sets = self._parse_grouping_sets() 3041 if grouping_sets: 3042 elements["grouping_sets"].extend(grouping_sets) 3043 3044 rollup = None 3045 cube = None 3046 totals = None 3047 3048 index = self._index 3049 with_ = self._match(TokenType.WITH) 3050 if self._match(TokenType.ROLLUP): 3051 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3052 elements["rollup"].extend(ensure_list(rollup)) 3053 3054 if self._match(TokenType.CUBE): 3055 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3056 elements["cube"].extend(ensure_list(cube)) 3057 3058 if self._match_text_seq("TOTALS"): 3059 totals = True 3060 elements["totals"] = True # type: ignore 3061 3062 if not (grouping_sets or rollup or cube or totals): 3063 if with_: 3064 self._retreat(index) 3065 break 3066 3067 return self.expression(exp.Group, **elements) # type: ignore 3068 3069 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3070 if not self._match(TokenType.GROUPING_SETS): 3071 return None 3072 3073 return self._parse_wrapped_csv(self._parse_grouping_set) 3074 3075 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3076 if self._match(TokenType.L_PAREN): 3077 grouping_set = self._parse_csv(self._parse_column) 3078 self._match_r_paren() 3079 return self.expression(exp.Tuple, expressions=grouping_set) 3080 3081 return self._parse_column() 3082 3083 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3084 if not skip_having_token and not self._match(TokenType.HAVING): 3085 return None 3086 return self.expression(exp.Having, this=self._parse_conjunction()) 3087 3088 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3089 if not self._match(TokenType.QUALIFY): 3090 return None 3091 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3092 3093 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3094 if skip_start_token: 3095 start = None 3096 elif self._match(TokenType.START_WITH): 3097 start = self._parse_conjunction() 3098 else: 3099 return None 3100 3101 self._match(TokenType.CONNECT_BY) 3102 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3103 exp.Prior, this=self._parse_bitwise() 3104 ) 3105 connect = self._parse_conjunction() 3106 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3107 3108 if not start and self._match(TokenType.START_WITH): 3109 start = self._parse_conjunction() 3110 3111 return self.expression(exp.Connect, start=start, connect=connect) 3112 3113 def _parse_order( 3114 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3115 ) -> t.Optional[exp.Expression]: 3116 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3117 return this 3118 3119 return self.expression( 3120 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3121 ) 3122 3123 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3124 if not self._match(token): 3125 return None 3126 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3127 3128 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3129 this = parse_method() if parse_method else self._parse_conjunction() 3130 3131 asc = self._match(TokenType.ASC) 3132 desc = self._match(TokenType.DESC) or (asc and False) 3133 3134 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3135 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3136 3137 nulls_first = is_nulls_first or False 3138 explicitly_null_ordered = is_nulls_first or is_nulls_last 3139 3140 if ( 3141 not explicitly_null_ordered 3142 and ( 3143 (not desc and self.NULL_ORDERING == "nulls_are_small") 3144 or (desc and self.NULL_ORDERING != "nulls_are_small") 3145 ) 3146 and self.NULL_ORDERING != "nulls_are_last" 3147 ): 3148 nulls_first = True 3149 3150 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3151 3152 def _parse_limit( 3153 self, this: t.Optional[exp.Expression] = None, top: bool = False 3154 ) -> t.Optional[exp.Expression]: 3155 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3156 comments = self._prev_comments 3157 if top: 3158 limit_paren = self._match(TokenType.L_PAREN) 3159 expression = self._parse_number() 3160 3161 if limit_paren: 3162 self._match_r_paren() 3163 else: 3164 expression = self._parse_term() 3165 3166 if self._match(TokenType.COMMA): 3167 offset = expression 3168 expression = self._parse_term() 3169 else: 3170 offset = None 3171 3172 limit_exp = self.expression( 3173 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3174 ) 3175 3176 return limit_exp 3177 3178 if self._match(TokenType.FETCH): 3179 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3180 direction = self._prev.text if direction else "FIRST" 3181 3182 count = self._parse_field(tokens=self.FETCH_TOKENS) 3183 percent = self._match(TokenType.PERCENT) 3184 3185 self._match_set((TokenType.ROW, TokenType.ROWS)) 3186 3187 only = self._match_text_seq("ONLY") 3188 with_ties = self._match_text_seq("WITH", "TIES") 3189 3190 if only and with_ties: 3191 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3192 3193 return self.expression( 3194 exp.Fetch, 3195 direction=direction, 3196 count=count, 3197 percent=percent, 3198 with_ties=with_ties, 3199 ) 3200 3201 return this 3202 3203 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3204 if not self._match(TokenType.OFFSET): 3205 return this 3206 3207 count = self._parse_term() 3208 self._match_set((TokenType.ROW, TokenType.ROWS)) 3209 return self.expression(exp.Offset, this=this, expression=count) 3210 3211 def _parse_locks(self) -> t.List[exp.Lock]: 3212 locks = [] 3213 while True: 3214 if self._match_text_seq("FOR", "UPDATE"): 3215 update = True 3216 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3217 "LOCK", "IN", "SHARE", "MODE" 3218 ): 3219 update = False 3220 else: 3221 break 3222 3223 expressions = None 3224 if self._match_text_seq("OF"): 3225 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3226 3227 wait: t.Optional[bool | exp.Expression] = None 3228 if self._match_text_seq("NOWAIT"): 3229 wait = True 3230 elif self._match_text_seq("WAIT"): 3231 wait = self._parse_primary() 3232 elif self._match_text_seq("SKIP", "LOCKED"): 3233 wait = False 3234 3235 locks.append( 3236 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3237 ) 3238 3239 return locks 3240 3241 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3242 if not self._match_set(self.SET_OPERATIONS): 3243 return this 3244 3245 token_type = self._prev.token_type 3246 3247 if token_type == TokenType.UNION: 3248 expression = exp.Union 3249 elif token_type == TokenType.EXCEPT: 3250 expression = exp.Except 3251 else: 3252 expression = exp.Intersect 3253 3254 return self.expression( 3255 expression, 3256 comments=self._prev.comments, 3257 this=this, 3258 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3259 by_name=self._match_text_seq("BY", "NAME"), 3260 expression=self._parse_set_operations(self._parse_select(nested=True)), 3261 ) 3262 3263 def _parse_expression(self) -> t.Optional[exp.Expression]: 3264 return self._parse_alias(self._parse_conjunction()) 3265 3266 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3267 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3268 3269 def _parse_equality(self) -> t.Optional[exp.Expression]: 3270 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3271 3272 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3273 return self._parse_tokens(self._parse_range, self.COMPARISON) 3274 3275 def _parse_range(self) -> t.Optional[exp.Expression]: 3276 this = self._parse_bitwise() 3277 negate = self._match(TokenType.NOT) 3278 3279 if self._match_set(self.RANGE_PARSERS): 3280 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3281 if not expression: 3282 return this 3283 3284 this = expression 3285 elif self._match(TokenType.ISNULL): 3286 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3287 3288 # Postgres supports ISNULL and NOTNULL for conditions. 3289 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3290 if self._match(TokenType.NOTNULL): 3291 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3292 this = self.expression(exp.Not, this=this) 3293 3294 if negate: 3295 this = self.expression(exp.Not, this=this) 3296 3297 if self._match(TokenType.IS): 3298 this = self._parse_is(this) 3299 3300 return this 3301 3302 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3303 index = self._index - 1 3304 negate = self._match(TokenType.NOT) 3305 3306 if self._match_text_seq("DISTINCT", "FROM"): 3307 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3308 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3309 3310 expression = self._parse_null() or self._parse_boolean() 3311 if not expression: 3312 self._retreat(index) 3313 return None 3314 3315 this = self.expression(exp.Is, this=this, expression=expression) 3316 return self.expression(exp.Not, this=this) if negate else this 3317 3318 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3319 unnest = self._parse_unnest(with_alias=False) 3320 if unnest: 3321 this = self.expression(exp.In, this=this, unnest=unnest) 3322 elif self._match(TokenType.L_PAREN): 3323 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3324 3325 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3326 this = self.expression(exp.In, this=this, query=expressions[0]) 3327 else: 3328 this = self.expression(exp.In, this=this, expressions=expressions) 3329 3330 self._match_r_paren(this) 3331 else: 3332 this = self.expression(exp.In, this=this, field=self._parse_field()) 3333 3334 return this 3335 3336 def _parse_between(self, this: exp.Expression) -> exp.Between: 3337 low = self._parse_bitwise() 3338 self._match(TokenType.AND) 3339 high = self._parse_bitwise() 3340 return self.expression(exp.Between, this=this, low=low, high=high) 3341 3342 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3343 if not self._match(TokenType.ESCAPE): 3344 return this 3345 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3346 3347 def _parse_interval(self) -> t.Optional[exp.Interval]: 3348 index = self._index 3349 3350 if not self._match(TokenType.INTERVAL): 3351 return None 3352 3353 if self._match(TokenType.STRING, advance=False): 3354 this = self._parse_primary() 3355 else: 3356 this = self._parse_term() 3357 3358 if not this: 3359 self._retreat(index) 3360 return None 3361 3362 unit = self._parse_function() or self._parse_var(any_token=True) 3363 3364 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3365 # each INTERVAL expression into this canonical form so it's easy to transpile 3366 if this and this.is_number: 3367 this = exp.Literal.string(this.name) 3368 elif this and this.is_string: 3369 parts = this.name.split() 3370 3371 if len(parts) == 2: 3372 if unit: 3373 # This is not actually a unit, it's something else (e.g. a "window side") 3374 unit = None 3375 self._retreat(self._index - 1) 3376 3377 this = exp.Literal.string(parts[0]) 3378 unit = self.expression(exp.Var, this=parts[1]) 3379 3380 return self.expression(exp.Interval, this=this, unit=unit) 3381 3382 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3383 this = self._parse_term() 3384 3385 while True: 3386 if self._match_set(self.BITWISE): 3387 this = self.expression( 3388 self.BITWISE[self._prev.token_type], 3389 this=this, 3390 expression=self._parse_term(), 3391 ) 3392 elif self._match(TokenType.DQMARK): 3393 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3394 elif self._match_pair(TokenType.LT, TokenType.LT): 3395 this = self.expression( 3396 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3397 ) 3398 elif self._match_pair(TokenType.GT, TokenType.GT): 3399 this = self.expression( 3400 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3401 ) 3402 else: 3403 break 3404 3405 return this 3406 3407 def _parse_term(self) -> t.Optional[exp.Expression]: 3408 return self._parse_tokens(self._parse_factor, self.TERM) 3409 3410 def _parse_factor(self) -> t.Optional[exp.Expression]: 3411 if self.EXPONENT: 3412 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3413 else: 3414 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3415 if isinstance(factor, exp.Div): 3416 factor.args["typed"] = self.TYPED_DIVISION 3417 factor.args["safe"] = self.SAFE_DIVISION 3418 return factor 3419 3420 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3421 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3422 3423 def _parse_unary(self) -> t.Optional[exp.Expression]: 3424 if self._match_set(self.UNARY_PARSERS): 3425 return self.UNARY_PARSERS[self._prev.token_type](self) 3426 return self._parse_at_time_zone(self._parse_type()) 3427 3428 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3429 interval = parse_interval and self._parse_interval() 3430 if interval: 3431 return interval 3432 3433 index = self._index 3434 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3435 this = self._parse_column() 3436 3437 if data_type: 3438 if isinstance(this, exp.Literal): 3439 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3440 if parser: 3441 return parser(self, this, data_type) 3442 return self.expression(exp.Cast, this=this, to=data_type) 3443 if not data_type.expressions: 3444 self._retreat(index) 3445 return self._parse_column() 3446 return self._parse_column_ops(data_type) 3447 3448 return this and self._parse_column_ops(this) 3449 3450 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3451 this = self._parse_type() 3452 if not this: 3453 return None 3454 3455 return self.expression( 3456 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3457 ) 3458 3459 def _parse_types( 3460 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3461 ) -> t.Optional[exp.Expression]: 3462 index = self._index 3463 3464 prefix = self._match_text_seq("SYSUDTLIB", ".") 3465 3466 if not self._match_set(self.TYPE_TOKENS): 3467 identifier = allow_identifiers and self._parse_id_var( 3468 any_token=False, tokens=(TokenType.VAR,) 3469 ) 3470 3471 if identifier: 3472 tokens = self._tokenizer.tokenize(identifier.name) 3473 3474 if len(tokens) != 1: 3475 self.raise_error("Unexpected identifier", self._prev) 3476 3477 if tokens[0].token_type in self.TYPE_TOKENS: 3478 self._prev = tokens[0] 3479 elif self.SUPPORTS_USER_DEFINED_TYPES: 3480 type_name = identifier.name 3481 3482 while self._match(TokenType.DOT): 3483 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3484 3485 return exp.DataType.build(type_name, udt=True) 3486 else: 3487 return None 3488 else: 3489 return None 3490 3491 type_token = self._prev.token_type 3492 3493 if type_token == TokenType.PSEUDO_TYPE: 3494 return self.expression(exp.PseudoType, this=self._prev.text) 3495 3496 if type_token == TokenType.OBJECT_IDENTIFIER: 3497 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3498 3499 nested = type_token in self.NESTED_TYPE_TOKENS 3500 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3501 expressions = None 3502 maybe_func = False 3503 3504 if self._match(TokenType.L_PAREN): 3505 if is_struct: 3506 expressions = self._parse_csv(self._parse_struct_types) 3507 elif nested: 3508 expressions = self._parse_csv( 3509 lambda: self._parse_types( 3510 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3511 ) 3512 ) 3513 elif type_token in self.ENUM_TYPE_TOKENS: 3514 expressions = self._parse_csv(self._parse_equality) 3515 else: 3516 expressions = self._parse_csv(self._parse_type_size) 3517 3518 if not expressions or not self._match(TokenType.R_PAREN): 3519 self._retreat(index) 3520 return None 3521 3522 maybe_func = True 3523 3524 this: t.Optional[exp.Expression] = None 3525 values: t.Optional[t.List[exp.Expression]] = None 3526 3527 if nested and self._match(TokenType.LT): 3528 if is_struct: 3529 expressions = self._parse_csv(self._parse_struct_types) 3530 else: 3531 expressions = self._parse_csv( 3532 lambda: self._parse_types( 3533 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3534 ) 3535 ) 3536 3537 if not self._match(TokenType.GT): 3538 self.raise_error("Expecting >") 3539 3540 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3541 values = self._parse_csv(self._parse_conjunction) 3542 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3543 3544 if type_token in self.TIMESTAMPS: 3545 if self._match_text_seq("WITH", "TIME", "ZONE"): 3546 maybe_func = False 3547 tz_type = ( 3548 exp.DataType.Type.TIMETZ 3549 if type_token in self.TIMES 3550 else exp.DataType.Type.TIMESTAMPTZ 3551 ) 3552 this = exp.DataType(this=tz_type, expressions=expressions) 3553 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3554 maybe_func = False 3555 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3556 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3557 maybe_func = False 3558 elif type_token == TokenType.INTERVAL: 3559 unit = self._parse_var() 3560 3561 if self._match_text_seq("TO"): 3562 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3563 else: 3564 span = None 3565 3566 if span or not unit: 3567 this = self.expression( 3568 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3569 ) 3570 else: 3571 this = self.expression(exp.Interval, unit=unit) 3572 3573 if maybe_func and check_func: 3574 index2 = self._index 3575 peek = self._parse_string() 3576 3577 if not peek: 3578 self._retreat(index) 3579 return None 3580 3581 self._retreat(index2) 3582 3583 if not this: 3584 if self._match_text_seq("UNSIGNED"): 3585 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3586 if not unsigned_type_token: 3587 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3588 3589 type_token = unsigned_type_token or type_token 3590 3591 this = exp.DataType( 3592 this=exp.DataType.Type[type_token.value], 3593 expressions=expressions, 3594 nested=nested, 3595 values=values, 3596 prefix=prefix, 3597 ) 3598 3599 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3600 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3601 3602 return this 3603 3604 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3605 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3606 self._match(TokenType.COLON) 3607 return self._parse_column_def(this) 3608 3609 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3610 if not self._match_text_seq("AT", "TIME", "ZONE"): 3611 return this 3612 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3613 3614 def _parse_column(self) -> t.Optional[exp.Expression]: 3615 this = self._parse_field() 3616 if isinstance(this, exp.Identifier): 3617 this = self.expression(exp.Column, this=this) 3618 elif not this: 3619 return self._parse_bracket(this) 3620 return self._parse_column_ops(this) 3621 3622 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3623 this = self._parse_bracket(this) 3624 3625 while self._match_set(self.COLUMN_OPERATORS): 3626 op_token = self._prev.token_type 3627 op = self.COLUMN_OPERATORS.get(op_token) 3628 3629 if op_token == TokenType.DCOLON: 3630 field = self._parse_types() 3631 if not field: 3632 self.raise_error("Expected type") 3633 elif op and self._curr: 3634 self._advance() 3635 value = self._prev.text 3636 field = ( 3637 exp.Literal.number(value) 3638 if self._prev.token_type == TokenType.NUMBER 3639 else exp.Literal.string(value) 3640 ) 3641 else: 3642 field = self._parse_field(anonymous_func=True, any_token=True) 3643 3644 if isinstance(field, exp.Func): 3645 # bigquery allows function calls like x.y.count(...) 3646 # SAFE.SUBSTR(...) 3647 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3648 this = self._replace_columns_with_dots(this) 3649 3650 if op: 3651 this = op(self, this, field) 3652 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3653 this = self.expression( 3654 exp.Column, 3655 this=field, 3656 table=this.this, 3657 db=this.args.get("table"), 3658 catalog=this.args.get("db"), 3659 ) 3660 else: 3661 this = self.expression(exp.Dot, this=this, expression=field) 3662 this = self._parse_bracket(this) 3663 return this 3664 3665 def _parse_primary(self) -> t.Optional[exp.Expression]: 3666 if self._match_set(self.PRIMARY_PARSERS): 3667 token_type = self._prev.token_type 3668 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3669 3670 if token_type == TokenType.STRING: 3671 expressions = [primary] 3672 while self._match(TokenType.STRING): 3673 expressions.append(exp.Literal.string(self._prev.text)) 3674 3675 if len(expressions) > 1: 3676 return self.expression(exp.Concat, expressions=expressions) 3677 3678 return primary 3679 3680 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3681 return exp.Literal.number(f"0.{self._prev.text}") 3682 3683 if self._match(TokenType.L_PAREN): 3684 comments = self._prev_comments 3685 query = self._parse_select() 3686 3687 if query: 3688 expressions = [query] 3689 else: 3690 expressions = self._parse_expressions() 3691 3692 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3693 3694 if isinstance(this, exp.Subqueryable): 3695 this = self._parse_set_operations( 3696 self._parse_subquery(this=this, parse_alias=False) 3697 ) 3698 elif len(expressions) > 1: 3699 this = self.expression(exp.Tuple, expressions=expressions) 3700 else: 3701 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3702 3703 if this: 3704 this.add_comments(comments) 3705 3706 self._match_r_paren(expression=this) 3707 return this 3708 3709 return None 3710 3711 def _parse_field( 3712 self, 3713 any_token: bool = False, 3714 tokens: t.Optional[t.Collection[TokenType]] = None, 3715 anonymous_func: bool = False, 3716 ) -> t.Optional[exp.Expression]: 3717 return ( 3718 self._parse_primary() 3719 or self._parse_function(anonymous=anonymous_func) 3720 or self._parse_id_var(any_token=any_token, tokens=tokens) 3721 ) 3722 3723 def _parse_function( 3724 self, 3725 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3726 anonymous: bool = False, 3727 optional_parens: bool = True, 3728 ) -> t.Optional[exp.Expression]: 3729 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3730 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3731 fn_syntax = False 3732 if ( 3733 self._match(TokenType.L_BRACE, advance=False) 3734 and self._next 3735 and self._next.text.upper() == "FN" 3736 ): 3737 self._advance(2) 3738 fn_syntax = True 3739 3740 func = self._parse_function_call( 3741 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3742 ) 3743 3744 if fn_syntax: 3745 self._match(TokenType.R_BRACE) 3746 3747 return func 3748 3749 def _parse_function_call( 3750 self, 3751 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3752 anonymous: bool = False, 3753 optional_parens: bool = True, 3754 ) -> t.Optional[exp.Expression]: 3755 if not self._curr: 3756 return None 3757 3758 token_type = self._curr.token_type 3759 this = self._curr.text 3760 upper = this.upper() 3761 3762 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3763 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3764 self._advance() 3765 return parser(self) 3766 3767 if not self._next or self._next.token_type != TokenType.L_PAREN: 3768 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3769 self._advance() 3770 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3771 3772 return None 3773 3774 if token_type not in self.FUNC_TOKENS: 3775 return None 3776 3777 self._advance(2) 3778 3779 parser = self.FUNCTION_PARSERS.get(upper) 3780 if parser and not anonymous: 3781 this = parser(self) 3782 else: 3783 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3784 3785 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3786 this = self.expression(subquery_predicate, this=self._parse_select()) 3787 self._match_r_paren() 3788 return this 3789 3790 if functions is None: 3791 functions = self.FUNCTIONS 3792 3793 function = functions.get(upper) 3794 3795 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3796 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3797 3798 if function and not anonymous: 3799 func = self.validate_expression(function(args), args) 3800 if not self.NORMALIZE_FUNCTIONS: 3801 func.meta["name"] = this 3802 this = func 3803 else: 3804 this = self.expression(exp.Anonymous, this=this, expressions=args) 3805 3806 self._match_r_paren(this) 3807 return self._parse_window(this) 3808 3809 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3810 return self._parse_column_def(self._parse_id_var()) 3811 3812 def _parse_user_defined_function( 3813 self, kind: t.Optional[TokenType] = None 3814 ) -> t.Optional[exp.Expression]: 3815 this = self._parse_id_var() 3816 3817 while self._match(TokenType.DOT): 3818 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3819 3820 if not self._match(TokenType.L_PAREN): 3821 return this 3822 3823 expressions = self._parse_csv(self._parse_function_parameter) 3824 self._match_r_paren() 3825 return self.expression( 3826 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3827 ) 3828 3829 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3830 literal = self._parse_primary() 3831 if literal: 3832 return self.expression(exp.Introducer, this=token.text, expression=literal) 3833 3834 return self.expression(exp.Identifier, this=token.text) 3835 3836 def _parse_session_parameter(self) -> exp.SessionParameter: 3837 kind = None 3838 this = self._parse_id_var() or self._parse_primary() 3839 3840 if this and self._match(TokenType.DOT): 3841 kind = this.name 3842 this = self._parse_var() or self._parse_primary() 3843 3844 return self.expression(exp.SessionParameter, this=this, kind=kind) 3845 3846 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3847 index = self._index 3848 3849 if self._match(TokenType.L_PAREN): 3850 expressions = t.cast( 3851 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3852 ) 3853 3854 if not self._match(TokenType.R_PAREN): 3855 self._retreat(index) 3856 else: 3857 expressions = [self._parse_id_var()] 3858 3859 if self._match_set(self.LAMBDAS): 3860 return self.LAMBDAS[self._prev.token_type](self, expressions) 3861 3862 self._retreat(index) 3863 3864 this: t.Optional[exp.Expression] 3865 3866 if self._match(TokenType.DISTINCT): 3867 this = self.expression( 3868 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3869 ) 3870 else: 3871 this = self._parse_select_or_expression(alias=alias) 3872 3873 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3874 3875 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3876 index = self._index 3877 3878 if not self.errors: 3879 try: 3880 if self._parse_select(nested=True): 3881 return this 3882 except ParseError: 3883 pass 3884 finally: 3885 self.errors.clear() 3886 self._retreat(index) 3887 3888 if not self._match(TokenType.L_PAREN): 3889 return this 3890 3891 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3892 3893 self._match_r_paren() 3894 return self.expression(exp.Schema, this=this, expressions=args) 3895 3896 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3897 return self._parse_column_def(self._parse_field(any_token=True)) 3898 3899 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3900 # column defs are not really columns, they're identifiers 3901 if isinstance(this, exp.Column): 3902 this = this.this 3903 3904 kind = self._parse_types(schema=True) 3905 3906 if self._match_text_seq("FOR", "ORDINALITY"): 3907 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3908 3909 constraints: t.List[exp.Expression] = [] 3910 3911 if not kind and self._match(TokenType.ALIAS): 3912 constraints.append( 3913 self.expression( 3914 exp.ComputedColumnConstraint, 3915 this=self._parse_conjunction(), 3916 persisted=self._match_text_seq("PERSISTED"), 3917 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3918 ) 3919 ) 3920 3921 while True: 3922 constraint = self._parse_column_constraint() 3923 if not constraint: 3924 break 3925 constraints.append(constraint) 3926 3927 if not kind and not constraints: 3928 return this 3929 3930 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3931 3932 def _parse_auto_increment( 3933 self, 3934 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3935 start = None 3936 increment = None 3937 3938 if self._match(TokenType.L_PAREN, advance=False): 3939 args = self._parse_wrapped_csv(self._parse_bitwise) 3940 start = seq_get(args, 0) 3941 increment = seq_get(args, 1) 3942 elif self._match_text_seq("START"): 3943 start = self._parse_bitwise() 3944 self._match_text_seq("INCREMENT") 3945 increment = self._parse_bitwise() 3946 3947 if start and increment: 3948 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3949 3950 return exp.AutoIncrementColumnConstraint() 3951 3952 def _parse_compress(self) -> exp.CompressColumnConstraint: 3953 if self._match(TokenType.L_PAREN, advance=False): 3954 return self.expression( 3955 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3956 ) 3957 3958 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3959 3960 def _parse_generated_as_identity( 3961 self, 3962 ) -> ( 3963 exp.GeneratedAsIdentityColumnConstraint 3964 | exp.ComputedColumnConstraint 3965 | exp.GeneratedAsRowColumnConstraint 3966 ): 3967 if self._match_text_seq("BY", "DEFAULT"): 3968 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3969 this = self.expression( 3970 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3971 ) 3972 else: 3973 self._match_text_seq("ALWAYS") 3974 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3975 3976 self._match(TokenType.ALIAS) 3977 3978 if self._match_text_seq("ROW"): 3979 start = self._match_text_seq("START") 3980 if not start: 3981 self._match(TokenType.END) 3982 hidden = self._match_text_seq("HIDDEN") 3983 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 3984 3985 identity = self._match_text_seq("IDENTITY") 3986 3987 if self._match(TokenType.L_PAREN): 3988 if self._match(TokenType.START_WITH): 3989 this.set("start", self._parse_bitwise()) 3990 if self._match_text_seq("INCREMENT", "BY"): 3991 this.set("increment", self._parse_bitwise()) 3992 if self._match_text_seq("MINVALUE"): 3993 this.set("minvalue", self._parse_bitwise()) 3994 if self._match_text_seq("MAXVALUE"): 3995 this.set("maxvalue", self._parse_bitwise()) 3996 3997 if self._match_text_seq("CYCLE"): 3998 this.set("cycle", True) 3999 elif self._match_text_seq("NO", "CYCLE"): 4000 this.set("cycle", False) 4001 4002 if not identity: 4003 this.set("expression", self._parse_bitwise()) 4004 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4005 args = self._parse_csv(self._parse_bitwise) 4006 this.set("start", seq_get(args, 0)) 4007 this.set("increment", seq_get(args, 1)) 4008 4009 self._match_r_paren() 4010 4011 return this 4012 4013 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4014 self._match_text_seq("LENGTH") 4015 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4016 4017 def _parse_not_constraint( 4018 self, 4019 ) -> t.Optional[exp.Expression]: 4020 if self._match_text_seq("NULL"): 4021 return self.expression(exp.NotNullColumnConstraint) 4022 if self._match_text_seq("CASESPECIFIC"): 4023 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4024 if self._match_text_seq("FOR", "REPLICATION"): 4025 return self.expression(exp.NotForReplicationColumnConstraint) 4026 return None 4027 4028 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4029 if self._match(TokenType.CONSTRAINT): 4030 this = self._parse_id_var() 4031 else: 4032 this = None 4033 4034 if self._match_texts(self.CONSTRAINT_PARSERS): 4035 return self.expression( 4036 exp.ColumnConstraint, 4037 this=this, 4038 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4039 ) 4040 4041 return this 4042 4043 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4044 if not self._match(TokenType.CONSTRAINT): 4045 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4046 4047 this = self._parse_id_var() 4048 expressions = [] 4049 4050 while True: 4051 constraint = self._parse_unnamed_constraint() or self._parse_function() 4052 if not constraint: 4053 break 4054 expressions.append(constraint) 4055 4056 return self.expression(exp.Constraint, this=this, expressions=expressions) 4057 4058 def _parse_unnamed_constraint( 4059 self, constraints: t.Optional[t.Collection[str]] = None 4060 ) -> t.Optional[exp.Expression]: 4061 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4062 constraints or self.CONSTRAINT_PARSERS 4063 ): 4064 return None 4065 4066 constraint = self._prev.text.upper() 4067 if constraint not in self.CONSTRAINT_PARSERS: 4068 self.raise_error(f"No parser found for schema constraint {constraint}.") 4069 4070 return self.CONSTRAINT_PARSERS[constraint](self) 4071 4072 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4073 self._match_text_seq("KEY") 4074 return self.expression( 4075 exp.UniqueColumnConstraint, 4076 this=self._parse_schema(self._parse_id_var(any_token=False)), 4077 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4078 ) 4079 4080 def _parse_key_constraint_options(self) -> t.List[str]: 4081 options = [] 4082 while True: 4083 if not self._curr: 4084 break 4085 4086 if self._match(TokenType.ON): 4087 action = None 4088 on = self._advance_any() and self._prev.text 4089 4090 if self._match_text_seq("NO", "ACTION"): 4091 action = "NO ACTION" 4092 elif self._match_text_seq("CASCADE"): 4093 action = "CASCADE" 4094 elif self._match_text_seq("RESTRICT"): 4095 action = "RESTRICT" 4096 elif self._match_pair(TokenType.SET, TokenType.NULL): 4097 action = "SET NULL" 4098 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4099 action = "SET DEFAULT" 4100 else: 4101 self.raise_error("Invalid key constraint") 4102 4103 options.append(f"ON {on} {action}") 4104 elif self._match_text_seq("NOT", "ENFORCED"): 4105 options.append("NOT ENFORCED") 4106 elif self._match_text_seq("DEFERRABLE"): 4107 options.append("DEFERRABLE") 4108 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4109 options.append("INITIALLY DEFERRED") 4110 elif self._match_text_seq("NORELY"): 4111 options.append("NORELY") 4112 elif self._match_text_seq("MATCH", "FULL"): 4113 options.append("MATCH FULL") 4114 else: 4115 break 4116 4117 return options 4118 4119 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4120 if match and not self._match(TokenType.REFERENCES): 4121 return None 4122 4123 expressions = None 4124 this = self._parse_table(schema=True) 4125 options = self._parse_key_constraint_options() 4126 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4127 4128 def _parse_foreign_key(self) -> exp.ForeignKey: 4129 expressions = self._parse_wrapped_id_vars() 4130 reference = self._parse_references() 4131 options = {} 4132 4133 while self._match(TokenType.ON): 4134 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4135 self.raise_error("Expected DELETE or UPDATE") 4136 4137 kind = self._prev.text.lower() 4138 4139 if self._match_text_seq("NO", "ACTION"): 4140 action = "NO ACTION" 4141 elif self._match(TokenType.SET): 4142 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4143 action = "SET " + self._prev.text.upper() 4144 else: 4145 self._advance() 4146 action = self._prev.text.upper() 4147 4148 options[kind] = action 4149 4150 return self.expression( 4151 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4152 ) 4153 4154 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4155 return self._parse_field() 4156 4157 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4158 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4159 4160 id_vars = self._parse_wrapped_id_vars() 4161 return self.expression( 4162 exp.PeriodForSystemTimeConstraint, 4163 this=seq_get(id_vars, 0), 4164 expression=seq_get(id_vars, 1), 4165 ) 4166 4167 def _parse_primary_key( 4168 self, wrapped_optional: bool = False, in_props: bool = False 4169 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4170 desc = ( 4171 self._match_set((TokenType.ASC, TokenType.DESC)) 4172 and self._prev.token_type == TokenType.DESC 4173 ) 4174 4175 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4176 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4177 4178 expressions = self._parse_wrapped_csv( 4179 self._parse_primary_key_part, optional=wrapped_optional 4180 ) 4181 options = self._parse_key_constraint_options() 4182 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4183 4184 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4185 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4186 return this 4187 4188 bracket_kind = self._prev.token_type 4189 4190 if self._match(TokenType.COLON): 4191 expressions: t.List[exp.Expression] = [ 4192 self.expression(exp.Slice, expression=self._parse_conjunction()) 4193 ] 4194 else: 4195 expressions = self._parse_csv( 4196 lambda: self._parse_slice( 4197 self._parse_alias(self._parse_conjunction(), explicit=True) 4198 ) 4199 ) 4200 4201 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4202 self.raise_error("Expected ]") 4203 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4204 self.raise_error("Expected }") 4205 4206 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4207 if bracket_kind == TokenType.L_BRACE: 4208 this = self.expression(exp.Struct, expressions=expressions) 4209 elif not this or this.name.upper() == "ARRAY": 4210 this = self.expression(exp.Array, expressions=expressions) 4211 else: 4212 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4213 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4214 4215 self._add_comments(this) 4216 return self._parse_bracket(this) 4217 4218 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4219 if self._match(TokenType.COLON): 4220 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4221 return this 4222 4223 def _parse_case(self) -> t.Optional[exp.Expression]: 4224 ifs = [] 4225 default = None 4226 4227 comments = self._prev_comments 4228 expression = self._parse_conjunction() 4229 4230 while self._match(TokenType.WHEN): 4231 this = self._parse_conjunction() 4232 self._match(TokenType.THEN) 4233 then = self._parse_conjunction() 4234 ifs.append(self.expression(exp.If, this=this, true=then)) 4235 4236 if self._match(TokenType.ELSE): 4237 default = self._parse_conjunction() 4238 4239 if not self._match(TokenType.END): 4240 self.raise_error("Expected END after CASE", self._prev) 4241 4242 return self._parse_window( 4243 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4244 ) 4245 4246 def _parse_if(self) -> t.Optional[exp.Expression]: 4247 if self._match(TokenType.L_PAREN): 4248 args = self._parse_csv(self._parse_conjunction) 4249 this = self.validate_expression(exp.If.from_arg_list(args), args) 4250 self._match_r_paren() 4251 else: 4252 index = self._index - 1 4253 condition = self._parse_conjunction() 4254 4255 if not condition: 4256 self._retreat(index) 4257 return None 4258 4259 self._match(TokenType.THEN) 4260 true = self._parse_conjunction() 4261 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4262 self._match(TokenType.END) 4263 this = self.expression(exp.If, this=condition, true=true, false=false) 4264 4265 return self._parse_window(this) 4266 4267 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4268 if not self._match_text_seq("VALUE", "FOR"): 4269 self._retreat(self._index - 1) 4270 return None 4271 4272 return self.expression( 4273 exp.NextValueFor, 4274 this=self._parse_column(), 4275 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4276 ) 4277 4278 def _parse_extract(self) -> exp.Extract: 4279 this = self._parse_function() or self._parse_var() or self._parse_type() 4280 4281 if self._match(TokenType.FROM): 4282 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4283 4284 if not self._match(TokenType.COMMA): 4285 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4286 4287 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4288 4289 def _parse_any_value(self) -> exp.AnyValue: 4290 this = self._parse_lambda() 4291 is_max = None 4292 having = None 4293 4294 if self._match(TokenType.HAVING): 4295 self._match_texts(("MAX", "MIN")) 4296 is_max = self._prev.text == "MAX" 4297 having = self._parse_column() 4298 4299 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4300 4301 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4302 this = self._parse_conjunction() 4303 4304 if not self._match(TokenType.ALIAS): 4305 if self._match(TokenType.COMMA): 4306 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4307 4308 self.raise_error("Expected AS after CAST") 4309 4310 fmt = None 4311 to = self._parse_types() 4312 4313 if self._match(TokenType.FORMAT): 4314 fmt_string = self._parse_string() 4315 fmt = self._parse_at_time_zone(fmt_string) 4316 4317 if not to: 4318 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4319 if to.this in exp.DataType.TEMPORAL_TYPES: 4320 this = self.expression( 4321 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4322 this=this, 4323 format=exp.Literal.string( 4324 format_time( 4325 fmt_string.this if fmt_string else "", 4326 self.FORMAT_MAPPING or self.TIME_MAPPING, 4327 self.FORMAT_TRIE or self.TIME_TRIE, 4328 ) 4329 ), 4330 ) 4331 4332 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4333 this.set("zone", fmt.args["zone"]) 4334 return this 4335 elif not to: 4336 self.raise_error("Expected TYPE after CAST") 4337 elif isinstance(to, exp.Identifier): 4338 to = exp.DataType.build(to.name, udt=True) 4339 elif to.this == exp.DataType.Type.CHAR: 4340 if self._match(TokenType.CHARACTER_SET): 4341 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4342 4343 return self.expression( 4344 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4345 ) 4346 4347 def _parse_concat(self) -> t.Optional[exp.Expression]: 4348 args = self._parse_csv(self._parse_conjunction) 4349 if self.CONCAT_NULL_OUTPUTS_STRING: 4350 args = self._ensure_string_if_null(args) 4351 4352 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4353 # we find such a call we replace it with its argument. 4354 if len(args) == 1: 4355 return args[0] 4356 4357 return self.expression( 4358 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4359 ) 4360 4361 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4362 args = self._parse_csv(self._parse_conjunction) 4363 if len(args) < 2: 4364 return self.expression(exp.ConcatWs, expressions=args) 4365 delim, *values = args 4366 if self.CONCAT_NULL_OUTPUTS_STRING: 4367 values = self._ensure_string_if_null(values) 4368 4369 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4370 4371 def _parse_string_agg(self) -> exp.Expression: 4372 if self._match(TokenType.DISTINCT): 4373 args: t.List[t.Optional[exp.Expression]] = [ 4374 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4375 ] 4376 if self._match(TokenType.COMMA): 4377 args.extend(self._parse_csv(self._parse_conjunction)) 4378 else: 4379 args = self._parse_csv(self._parse_conjunction) # type: ignore 4380 4381 index = self._index 4382 if not self._match(TokenType.R_PAREN) and args: 4383 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4384 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4385 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4386 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4387 4388 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4389 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4390 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4391 if not self._match_text_seq("WITHIN", "GROUP"): 4392 self._retreat(index) 4393 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4394 4395 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4396 order = self._parse_order(this=seq_get(args, 0)) 4397 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4398 4399 def _parse_convert( 4400 self, strict: bool, safe: t.Optional[bool] = None 4401 ) -> t.Optional[exp.Expression]: 4402 this = self._parse_bitwise() 4403 4404 if self._match(TokenType.USING): 4405 to: t.Optional[exp.Expression] = self.expression( 4406 exp.CharacterSet, this=self._parse_var() 4407 ) 4408 elif self._match(TokenType.COMMA): 4409 to = self._parse_types() 4410 else: 4411 to = None 4412 4413 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4414 4415 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4416 """ 4417 There are generally two variants of the DECODE function: 4418 4419 - DECODE(bin, charset) 4420 - DECODE(expression, search, result [, search, result] ... [, default]) 4421 4422 The second variant will always be parsed into a CASE expression. Note that NULL 4423 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4424 instead of relying on pattern matching. 4425 """ 4426 args = self._parse_csv(self._parse_conjunction) 4427 4428 if len(args) < 3: 4429 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4430 4431 expression, *expressions = args 4432 if not expression: 4433 return None 4434 4435 ifs = [] 4436 for search, result in zip(expressions[::2], expressions[1::2]): 4437 if not search or not result: 4438 return None 4439 4440 if isinstance(search, exp.Literal): 4441 ifs.append( 4442 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4443 ) 4444 elif isinstance(search, exp.Null): 4445 ifs.append( 4446 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4447 ) 4448 else: 4449 cond = exp.or_( 4450 exp.EQ(this=expression.copy(), expression=search), 4451 exp.and_( 4452 exp.Is(this=expression.copy(), expression=exp.Null()), 4453 exp.Is(this=search.copy(), expression=exp.Null()), 4454 copy=False, 4455 ), 4456 copy=False, 4457 ) 4458 ifs.append(exp.If(this=cond, true=result)) 4459 4460 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4461 4462 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4463 self._match_text_seq("KEY") 4464 key = self._parse_column() 4465 self._match_set((TokenType.COLON, TokenType.COMMA)) 4466 self._match_text_seq("VALUE") 4467 value = self._parse_bitwise() 4468 4469 if not key and not value: 4470 return None 4471 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4472 4473 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4474 if not this or not self._match_text_seq("FORMAT", "JSON"): 4475 return this 4476 4477 return self.expression(exp.FormatJson, this=this) 4478 4479 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4480 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4481 for value in values: 4482 if self._match_text_seq(value, "ON", on): 4483 return f"{value} ON {on}" 4484 4485 return None 4486 4487 def _parse_json_object(self) -> exp.JSONObject: 4488 star = self._parse_star() 4489 expressions = ( 4490 [star] 4491 if star 4492 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4493 ) 4494 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4495 4496 unique_keys = None 4497 if self._match_text_seq("WITH", "UNIQUE"): 4498 unique_keys = True 4499 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4500 unique_keys = False 4501 4502 self._match_text_seq("KEYS") 4503 4504 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4505 self._parse_type() 4506 ) 4507 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4508 4509 return self.expression( 4510 exp.JSONObject, 4511 expressions=expressions, 4512 null_handling=null_handling, 4513 unique_keys=unique_keys, 4514 return_type=return_type, 4515 encoding=encoding, 4516 ) 4517 4518 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4519 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4520 if not self._match_text_seq("NESTED"): 4521 this = self._parse_id_var() 4522 kind = self._parse_types(allow_identifiers=False) 4523 nested = None 4524 else: 4525 this = None 4526 kind = None 4527 nested = True 4528 4529 path = self._match_text_seq("PATH") and self._parse_string() 4530 nested_schema = nested and self._parse_json_schema() 4531 4532 return self.expression( 4533 exp.JSONColumnDef, 4534 this=this, 4535 kind=kind, 4536 path=path, 4537 nested_schema=nested_schema, 4538 ) 4539 4540 def _parse_json_schema(self) -> exp.JSONSchema: 4541 self._match_text_seq("COLUMNS") 4542 return self.expression( 4543 exp.JSONSchema, 4544 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4545 ) 4546 4547 def _parse_json_table(self) -> exp.JSONTable: 4548 this = self._parse_format_json(self._parse_bitwise()) 4549 path = self._match(TokenType.COMMA) and self._parse_string() 4550 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4551 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4552 schema = self._parse_json_schema() 4553 4554 return exp.JSONTable( 4555 this=this, 4556 schema=schema, 4557 path=path, 4558 error_handling=error_handling, 4559 empty_handling=empty_handling, 4560 ) 4561 4562 def _parse_logarithm(self) -> exp.Func: 4563 # Default argument order is base, expression 4564 args = self._parse_csv(self._parse_range) 4565 4566 if len(args) > 1: 4567 if not self.LOG_BASE_FIRST: 4568 args.reverse() 4569 return exp.Log.from_arg_list(args) 4570 4571 return self.expression( 4572 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4573 ) 4574 4575 def _parse_match_against(self) -> exp.MatchAgainst: 4576 expressions = self._parse_csv(self._parse_column) 4577 4578 self._match_text_seq(")", "AGAINST", "(") 4579 4580 this = self._parse_string() 4581 4582 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4583 modifier = "IN NATURAL LANGUAGE MODE" 4584 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4585 modifier = f"{modifier} WITH QUERY EXPANSION" 4586 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4587 modifier = "IN BOOLEAN MODE" 4588 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4589 modifier = "WITH QUERY EXPANSION" 4590 else: 4591 modifier = None 4592 4593 return self.expression( 4594 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4595 ) 4596 4597 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4598 def _parse_open_json(self) -> exp.OpenJSON: 4599 this = self._parse_bitwise() 4600 path = self._match(TokenType.COMMA) and self._parse_string() 4601 4602 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4603 this = self._parse_field(any_token=True) 4604 kind = self._parse_types() 4605 path = self._parse_string() 4606 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4607 4608 return self.expression( 4609 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4610 ) 4611 4612 expressions = None 4613 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4614 self._match_l_paren() 4615 expressions = self._parse_csv(_parse_open_json_column_def) 4616 4617 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4618 4619 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4620 args = self._parse_csv(self._parse_bitwise) 4621 4622 if self._match(TokenType.IN): 4623 return self.expression( 4624 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4625 ) 4626 4627 if haystack_first: 4628 haystack = seq_get(args, 0) 4629 needle = seq_get(args, 1) 4630 else: 4631 needle = seq_get(args, 0) 4632 haystack = seq_get(args, 1) 4633 4634 return self.expression( 4635 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4636 ) 4637 4638 def _parse_predict(self) -> exp.Predict: 4639 self._match_text_seq("MODEL") 4640 this = self._parse_table() 4641 4642 self._match(TokenType.COMMA) 4643 self._match_text_seq("TABLE") 4644 4645 return self.expression( 4646 exp.Predict, 4647 this=this, 4648 expression=self._parse_table(), 4649 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4650 ) 4651 4652 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4653 args = self._parse_csv(self._parse_table) 4654 return exp.JoinHint(this=func_name.upper(), expressions=args) 4655 4656 def _parse_substring(self) -> exp.Substring: 4657 # Postgres supports the form: substring(string [from int] [for int]) 4658 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4659 4660 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4661 4662 if self._match(TokenType.FROM): 4663 args.append(self._parse_bitwise()) 4664 if self._match(TokenType.FOR): 4665 args.append(self._parse_bitwise()) 4666 4667 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4668 4669 def _parse_trim(self) -> exp.Trim: 4670 # https://www.w3resource.com/sql/character-functions/trim.php 4671 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4672 4673 position = None 4674 collation = None 4675 expression = None 4676 4677 if self._match_texts(self.TRIM_TYPES): 4678 position = self._prev.text.upper() 4679 4680 this = self._parse_bitwise() 4681 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4682 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4683 expression = self._parse_bitwise() 4684 4685 if invert_order: 4686 this, expression = expression, this 4687 4688 if self._match(TokenType.COLLATE): 4689 collation = self._parse_bitwise() 4690 4691 return self.expression( 4692 exp.Trim, this=this, position=position, expression=expression, collation=collation 4693 ) 4694 4695 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4696 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4697 4698 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4699 return self._parse_window(self._parse_id_var(), alias=True) 4700 4701 def _parse_respect_or_ignore_nulls( 4702 self, this: t.Optional[exp.Expression] 4703 ) -> t.Optional[exp.Expression]: 4704 if self._match_text_seq("IGNORE", "NULLS"): 4705 return self.expression(exp.IgnoreNulls, this=this) 4706 if self._match_text_seq("RESPECT", "NULLS"): 4707 return self.expression(exp.RespectNulls, this=this) 4708 return this 4709 4710 def _parse_window( 4711 self, this: t.Optional[exp.Expression], alias: bool = False 4712 ) -> t.Optional[exp.Expression]: 4713 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4714 self._match(TokenType.WHERE) 4715 this = self.expression( 4716 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4717 ) 4718 self._match_r_paren() 4719 4720 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4721 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4722 if self._match_text_seq("WITHIN", "GROUP"): 4723 order = self._parse_wrapped(self._parse_order) 4724 this = self.expression(exp.WithinGroup, this=this, expression=order) 4725 4726 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4727 # Some dialects choose to implement and some do not. 4728 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4729 4730 # There is some code above in _parse_lambda that handles 4731 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4732 4733 # The below changes handle 4734 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4735 4736 # Oracle allows both formats 4737 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4738 # and Snowflake chose to do the same for familiarity 4739 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4740 this = self._parse_respect_or_ignore_nulls(this) 4741 4742 # bigquery select from window x AS (partition by ...) 4743 if alias: 4744 over = None 4745 self._match(TokenType.ALIAS) 4746 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4747 return this 4748 else: 4749 over = self._prev.text.upper() 4750 4751 if not self._match(TokenType.L_PAREN): 4752 return self.expression( 4753 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4754 ) 4755 4756 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4757 4758 first = self._match(TokenType.FIRST) 4759 if self._match_text_seq("LAST"): 4760 first = False 4761 4762 partition, order = self._parse_partition_and_order() 4763 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4764 4765 if kind: 4766 self._match(TokenType.BETWEEN) 4767 start = self._parse_window_spec() 4768 self._match(TokenType.AND) 4769 end = self._parse_window_spec() 4770 4771 spec = self.expression( 4772 exp.WindowSpec, 4773 kind=kind, 4774 start=start["value"], 4775 start_side=start["side"], 4776 end=end["value"], 4777 end_side=end["side"], 4778 ) 4779 else: 4780 spec = None 4781 4782 self._match_r_paren() 4783 4784 window = self.expression( 4785 exp.Window, 4786 this=this, 4787 partition_by=partition, 4788 order=order, 4789 spec=spec, 4790 alias=window_alias, 4791 over=over, 4792 first=first, 4793 ) 4794 4795 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4796 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4797 return self._parse_window(window, alias=alias) 4798 4799 return window 4800 4801 def _parse_partition_and_order( 4802 self, 4803 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4804 return self._parse_partition_by(), self._parse_order() 4805 4806 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4807 self._match(TokenType.BETWEEN) 4808 4809 return { 4810 "value": ( 4811 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4812 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4813 or self._parse_bitwise() 4814 ), 4815 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4816 } 4817 4818 def _parse_alias( 4819 self, this: t.Optional[exp.Expression], explicit: bool = False 4820 ) -> t.Optional[exp.Expression]: 4821 any_token = self._match(TokenType.ALIAS) 4822 4823 if explicit and not any_token: 4824 return this 4825 4826 if self._match(TokenType.L_PAREN): 4827 aliases = self.expression( 4828 exp.Aliases, 4829 this=this, 4830 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4831 ) 4832 self._match_r_paren(aliases) 4833 return aliases 4834 4835 alias = self._parse_id_var(any_token) 4836 4837 if alias: 4838 return self.expression(exp.Alias, this=this, alias=alias) 4839 4840 return this 4841 4842 def _parse_id_var( 4843 self, 4844 any_token: bool = True, 4845 tokens: t.Optional[t.Collection[TokenType]] = None, 4846 ) -> t.Optional[exp.Expression]: 4847 identifier = self._parse_identifier() 4848 4849 if identifier: 4850 return identifier 4851 4852 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4853 quoted = self._prev.token_type == TokenType.STRING 4854 return exp.Identifier(this=self._prev.text, quoted=quoted) 4855 4856 return None 4857 4858 def _parse_string(self) -> t.Optional[exp.Expression]: 4859 if self._match(TokenType.STRING): 4860 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4861 return self._parse_placeholder() 4862 4863 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4864 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4865 4866 def _parse_number(self) -> t.Optional[exp.Expression]: 4867 if self._match(TokenType.NUMBER): 4868 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4869 return self._parse_placeholder() 4870 4871 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4872 if self._match(TokenType.IDENTIFIER): 4873 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4874 return self._parse_placeholder() 4875 4876 def _parse_var( 4877 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4878 ) -> t.Optional[exp.Expression]: 4879 if ( 4880 (any_token and self._advance_any()) 4881 or self._match(TokenType.VAR) 4882 or (self._match_set(tokens) if tokens else False) 4883 ): 4884 return self.expression(exp.Var, this=self._prev.text) 4885 return self._parse_placeholder() 4886 4887 def _advance_any(self) -> t.Optional[Token]: 4888 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4889 self._advance() 4890 return self._prev 4891 return None 4892 4893 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4894 return self._parse_var() or self._parse_string() 4895 4896 def _parse_null(self) -> t.Optional[exp.Expression]: 4897 if self._match_set(self.NULL_TOKENS): 4898 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4899 return self._parse_placeholder() 4900 4901 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4902 if self._match(TokenType.TRUE): 4903 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4904 if self._match(TokenType.FALSE): 4905 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4906 return self._parse_placeholder() 4907 4908 def _parse_star(self) -> t.Optional[exp.Expression]: 4909 if self._match(TokenType.STAR): 4910 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4911 return self._parse_placeholder() 4912 4913 def _parse_parameter(self) -> exp.Parameter: 4914 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4915 return ( 4916 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4917 ) 4918 4919 self._match(TokenType.L_BRACE) 4920 this = _parse_parameter_part() 4921 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4922 self._match(TokenType.R_BRACE) 4923 4924 return self.expression(exp.Parameter, this=this, expression=expression) 4925 4926 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4927 if self._match_set(self.PLACEHOLDER_PARSERS): 4928 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4929 if placeholder: 4930 return placeholder 4931 self._advance(-1) 4932 return None 4933 4934 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4935 if not self._match(TokenType.EXCEPT): 4936 return None 4937 if self._match(TokenType.L_PAREN, advance=False): 4938 return self._parse_wrapped_csv(self._parse_column) 4939 4940 except_column = self._parse_column() 4941 return [except_column] if except_column else None 4942 4943 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4944 if not self._match(TokenType.REPLACE): 4945 return None 4946 if self._match(TokenType.L_PAREN, advance=False): 4947 return self._parse_wrapped_csv(self._parse_expression) 4948 4949 replace_expression = self._parse_expression() 4950 return [replace_expression] if replace_expression else None 4951 4952 def _parse_csv( 4953 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4954 ) -> t.List[exp.Expression]: 4955 parse_result = parse_method() 4956 items = [parse_result] if parse_result is not None else [] 4957 4958 while self._match(sep): 4959 self._add_comments(parse_result) 4960 parse_result = parse_method() 4961 if parse_result is not None: 4962 items.append(parse_result) 4963 4964 return items 4965 4966 def _parse_tokens( 4967 self, parse_method: t.Callable, expressions: t.Dict 4968 ) -> t.Optional[exp.Expression]: 4969 this = parse_method() 4970 4971 while self._match_set(expressions): 4972 this = self.expression( 4973 expressions[self._prev.token_type], 4974 this=this, 4975 comments=self._prev_comments, 4976 expression=parse_method(), 4977 ) 4978 4979 return this 4980 4981 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4982 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4983 4984 def _parse_wrapped_csv( 4985 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4986 ) -> t.List[exp.Expression]: 4987 return self._parse_wrapped( 4988 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4989 ) 4990 4991 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4992 wrapped = self._match(TokenType.L_PAREN) 4993 if not wrapped and not optional: 4994 self.raise_error("Expecting (") 4995 parse_result = parse_method() 4996 if wrapped: 4997 self._match_r_paren() 4998 return parse_result 4999 5000 def _parse_expressions(self) -> t.List[exp.Expression]: 5001 return self._parse_csv(self._parse_expression) 5002 5003 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5004 return self._parse_select() or self._parse_set_operations( 5005 self._parse_expression() if alias else self._parse_conjunction() 5006 ) 5007 5008 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5009 return self._parse_query_modifiers( 5010 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5011 ) 5012 5013 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5014 this = None 5015 if self._match_texts(self.TRANSACTION_KIND): 5016 this = self._prev.text 5017 5018 self._match_texts(("TRANSACTION", "WORK")) 5019 5020 modes = [] 5021 while True: 5022 mode = [] 5023 while self._match(TokenType.VAR): 5024 mode.append(self._prev.text) 5025 5026 if mode: 5027 modes.append(" ".join(mode)) 5028 if not self._match(TokenType.COMMA): 5029 break 5030 5031 return self.expression(exp.Transaction, this=this, modes=modes) 5032 5033 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5034 chain = None 5035 savepoint = None 5036 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5037 5038 self._match_texts(("TRANSACTION", "WORK")) 5039 5040 if self._match_text_seq("TO"): 5041 self._match_text_seq("SAVEPOINT") 5042 savepoint = self._parse_id_var() 5043 5044 if self._match(TokenType.AND): 5045 chain = not self._match_text_seq("NO") 5046 self._match_text_seq("CHAIN") 5047 5048 if is_rollback: 5049 return self.expression(exp.Rollback, savepoint=savepoint) 5050 5051 return self.expression(exp.Commit, chain=chain) 5052 5053 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5054 if not self._match_text_seq("ADD"): 5055 return None 5056 5057 self._match(TokenType.COLUMN) 5058 exists_column = self._parse_exists(not_=True) 5059 expression = self._parse_field_def() 5060 5061 if expression: 5062 expression.set("exists", exists_column) 5063 5064 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5065 if self._match_texts(("FIRST", "AFTER")): 5066 position = self._prev.text 5067 column_position = self.expression( 5068 exp.ColumnPosition, this=self._parse_column(), position=position 5069 ) 5070 expression.set("position", column_position) 5071 5072 return expression 5073 5074 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5075 drop = self._match(TokenType.DROP) and self._parse_drop() 5076 if drop and not isinstance(drop, exp.Command): 5077 drop.set("kind", drop.args.get("kind", "COLUMN")) 5078 return drop 5079 5080 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5081 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5082 return self.expression( 5083 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5084 ) 5085 5086 def _parse_add_constraint(self) -> exp.AddConstraint: 5087 this = None 5088 kind = self._prev.token_type 5089 5090 if kind == TokenType.CONSTRAINT: 5091 this = self._parse_id_var() 5092 5093 if self._match_text_seq("CHECK"): 5094 expression = self._parse_wrapped(self._parse_conjunction) 5095 enforced = self._match_text_seq("ENFORCED") 5096 5097 return self.expression( 5098 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5099 ) 5100 5101 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5102 expression = self._parse_foreign_key() 5103 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5104 expression = self._parse_primary_key() 5105 else: 5106 expression = None 5107 5108 return self.expression(exp.AddConstraint, this=this, expression=expression) 5109 5110 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5111 index = self._index - 1 5112 5113 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5114 return self._parse_csv(self._parse_add_constraint) 5115 5116 self._retreat(index) 5117 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5118 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5119 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5120 5121 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5122 self._match(TokenType.COLUMN) 5123 column = self._parse_field(any_token=True) 5124 5125 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5126 return self.expression(exp.AlterColumn, this=column, drop=True) 5127 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5128 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5129 5130 self._match_text_seq("SET", "DATA") 5131 return self.expression( 5132 exp.AlterColumn, 5133 this=column, 5134 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5135 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5136 using=self._match(TokenType.USING) and self._parse_conjunction(), 5137 ) 5138 5139 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5140 index = self._index - 1 5141 5142 partition_exists = self._parse_exists() 5143 if self._match(TokenType.PARTITION, advance=False): 5144 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5145 5146 self._retreat(index) 5147 return self._parse_csv(self._parse_drop_column) 5148 5149 def _parse_alter_table_rename(self) -> exp.RenameTable: 5150 self._match_text_seq("TO") 5151 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5152 5153 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5154 start = self._prev 5155 5156 if not self._match(TokenType.TABLE): 5157 return self._parse_as_command(start) 5158 5159 exists = self._parse_exists() 5160 only = self._match_text_seq("ONLY") 5161 this = self._parse_table(schema=True) 5162 5163 if self._next: 5164 self._advance() 5165 5166 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5167 if parser: 5168 actions = ensure_list(parser(self)) 5169 5170 if not self._curr: 5171 return self.expression( 5172 exp.AlterTable, 5173 this=this, 5174 exists=exists, 5175 actions=actions, 5176 only=only, 5177 ) 5178 5179 return self._parse_as_command(start) 5180 5181 def _parse_merge(self) -> exp.Merge: 5182 self._match(TokenType.INTO) 5183 target = self._parse_table() 5184 5185 if target and self._match(TokenType.ALIAS, advance=False): 5186 target.set("alias", self._parse_table_alias()) 5187 5188 self._match(TokenType.USING) 5189 using = self._parse_table() 5190 5191 self._match(TokenType.ON) 5192 on = self._parse_conjunction() 5193 5194 return self.expression( 5195 exp.Merge, 5196 this=target, 5197 using=using, 5198 on=on, 5199 expressions=self._parse_when_matched(), 5200 ) 5201 5202 def _parse_when_matched(self) -> t.List[exp.When]: 5203 whens = [] 5204 5205 while self._match(TokenType.WHEN): 5206 matched = not self._match(TokenType.NOT) 5207 self._match_text_seq("MATCHED") 5208 source = ( 5209 False 5210 if self._match_text_seq("BY", "TARGET") 5211 else self._match_text_seq("BY", "SOURCE") 5212 ) 5213 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5214 5215 self._match(TokenType.THEN) 5216 5217 if self._match(TokenType.INSERT): 5218 _this = self._parse_star() 5219 if _this: 5220 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5221 else: 5222 then = self.expression( 5223 exp.Insert, 5224 this=self._parse_value(), 5225 expression=self._match(TokenType.VALUES) and self._parse_value(), 5226 ) 5227 elif self._match(TokenType.UPDATE): 5228 expressions = self._parse_star() 5229 if expressions: 5230 then = self.expression(exp.Update, expressions=expressions) 5231 else: 5232 then = self.expression( 5233 exp.Update, 5234 expressions=self._match(TokenType.SET) 5235 and self._parse_csv(self._parse_equality), 5236 ) 5237 elif self._match(TokenType.DELETE): 5238 then = self.expression(exp.Var, this=self._prev.text) 5239 else: 5240 then = None 5241 5242 whens.append( 5243 self.expression( 5244 exp.When, 5245 matched=matched, 5246 source=source, 5247 condition=condition, 5248 then=then, 5249 ) 5250 ) 5251 return whens 5252 5253 def _parse_show(self) -> t.Optional[exp.Expression]: 5254 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5255 if parser: 5256 return parser(self) 5257 return self._parse_as_command(self._prev) 5258 5259 def _parse_set_item_assignment( 5260 self, kind: t.Optional[str] = None 5261 ) -> t.Optional[exp.Expression]: 5262 index = self._index 5263 5264 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5265 return self._parse_set_transaction(global_=kind == "GLOBAL") 5266 5267 left = self._parse_primary() or self._parse_id_var() 5268 assignment_delimiter = self._match_texts(("=", "TO")) 5269 5270 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5271 self._retreat(index) 5272 return None 5273 5274 right = self._parse_statement() or self._parse_id_var() 5275 this = self.expression(exp.EQ, this=left, expression=right) 5276 5277 return self.expression(exp.SetItem, this=this, kind=kind) 5278 5279 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5280 self._match_text_seq("TRANSACTION") 5281 characteristics = self._parse_csv( 5282 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5283 ) 5284 return self.expression( 5285 exp.SetItem, 5286 expressions=characteristics, 5287 kind="TRANSACTION", 5288 **{"global": global_}, # type: ignore 5289 ) 5290 5291 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5292 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5293 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5294 5295 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5296 index = self._index 5297 set_ = self.expression( 5298 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5299 ) 5300 5301 if self._curr: 5302 self._retreat(index) 5303 return self._parse_as_command(self._prev) 5304 5305 return set_ 5306 5307 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5308 for option in options: 5309 if self._match_text_seq(*option.split(" ")): 5310 return exp.var(option) 5311 return None 5312 5313 def _parse_as_command(self, start: Token) -> exp.Command: 5314 while self._curr: 5315 self._advance() 5316 text = self._find_sql(start, self._prev) 5317 size = len(start.text) 5318 return exp.Command(this=text[:size], expression=text[size:]) 5319 5320 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5321 settings = [] 5322 5323 self._match_l_paren() 5324 kind = self._parse_id_var() 5325 5326 if self._match(TokenType.L_PAREN): 5327 while True: 5328 key = self._parse_id_var() 5329 value = self._parse_primary() 5330 5331 if not key and value is None: 5332 break 5333 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5334 self._match(TokenType.R_PAREN) 5335 5336 self._match_r_paren() 5337 5338 return self.expression( 5339 exp.DictProperty, 5340 this=this, 5341 kind=kind.this if kind else None, 5342 settings=settings, 5343 ) 5344 5345 def _parse_dict_range(self, this: str) -> exp.DictRange: 5346 self._match_l_paren() 5347 has_min = self._match_text_seq("MIN") 5348 if has_min: 5349 min = self._parse_var() or self._parse_primary() 5350 self._match_text_seq("MAX") 5351 max = self._parse_var() or self._parse_primary() 5352 else: 5353 max = self._parse_var() or self._parse_primary() 5354 min = exp.Literal.number(0) 5355 self._match_r_paren() 5356 return self.expression(exp.DictRange, this=this, min=min, max=max) 5357 5358 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5359 index = self._index 5360 expression = self._parse_column() 5361 if not self._match(TokenType.IN): 5362 self._retreat(index - 1) 5363 return None 5364 iterator = self._parse_column() 5365 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5366 return self.expression( 5367 exp.Comprehension, 5368 this=this, 5369 expression=expression, 5370 iterator=iterator, 5371 condition=condition, 5372 ) 5373 5374 def _find_parser( 5375 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5376 ) -> t.Optional[t.Callable]: 5377 if not self._curr: 5378 return None 5379 5380 index = self._index 5381 this = [] 5382 while True: 5383 # The current token might be multiple words 5384 curr = self._curr.text.upper() 5385 key = curr.split(" ") 5386 this.append(curr) 5387 5388 self._advance() 5389 result, trie = in_trie(trie, key) 5390 if result == TrieResult.FAILED: 5391 break 5392 5393 if result == TrieResult.EXISTS: 5394 subparser = parsers[" ".join(this)] 5395 return subparser 5396 5397 self._retreat(index) 5398 return None 5399 5400 def _match(self, token_type, advance=True, expression=None): 5401 if not self._curr: 5402 return None 5403 5404 if self._curr.token_type == token_type: 5405 if advance: 5406 self._advance() 5407 self._add_comments(expression) 5408 return True 5409 5410 return None 5411 5412 def _match_set(self, types, advance=True): 5413 if not self._curr: 5414 return None 5415 5416 if self._curr.token_type in types: 5417 if advance: 5418 self._advance() 5419 return True 5420 5421 return None 5422 5423 def _match_pair(self, token_type_a, token_type_b, advance=True): 5424 if not self._curr or not self._next: 5425 return None 5426 5427 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5428 if advance: 5429 self._advance(2) 5430 return True 5431 5432 return None 5433 5434 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5435 if not self._match(TokenType.L_PAREN, expression=expression): 5436 self.raise_error("Expecting (") 5437 5438 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5439 if not self._match(TokenType.R_PAREN, expression=expression): 5440 self.raise_error("Expecting )") 5441 5442 def _match_texts(self, texts, advance=True): 5443 if self._curr and self._curr.text.upper() in texts: 5444 if advance: 5445 self._advance() 5446 return True 5447 return False 5448 5449 def _match_text_seq(self, *texts, advance=True): 5450 index = self._index 5451 for text in texts: 5452 if self._curr and self._curr.text.upper() == text: 5453 self._advance() 5454 else: 5455 self._retreat(index) 5456 return False 5457 5458 if not advance: 5459 self._retreat(index) 5460 5461 return True 5462 5463 @t.overload 5464 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5465 ... 5466 5467 @t.overload 5468 def _replace_columns_with_dots( 5469 self, this: t.Optional[exp.Expression] 5470 ) -> t.Optional[exp.Expression]: 5471 ... 5472 5473 def _replace_columns_with_dots(self, this): 5474 if isinstance(this, exp.Dot): 5475 exp.replace_children(this, self._replace_columns_with_dots) 5476 elif isinstance(this, exp.Column): 5477 exp.replace_children(this, self._replace_columns_with_dots) 5478 table = this.args.get("table") 5479 this = ( 5480 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5481 ) 5482 5483 return this 5484 5485 def _replace_lambda( 5486 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5487 ) -> t.Optional[exp.Expression]: 5488 if not node: 5489 return node 5490 5491 for column in node.find_all(exp.Column): 5492 if column.parts[0].name in lambda_variables: 5493 dot_or_id = column.to_dot() if column.table else column.this 5494 parent = column.parent 5495 5496 while isinstance(parent, exp.Dot): 5497 if not isinstance(parent.parent, exp.Dot): 5498 parent.replace(dot_or_id) 5499 break 5500 parent = parent.parent 5501 else: 5502 if column is node: 5503 node = dot_or_id 5504 else: 5505 column.replace(dot_or_id) 5506 return node 5507 5508 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5509 return [ 5510 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5511 for value in values 5512 if value 5513 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.CONSTRAINT, 250 TokenType.FUNCTION, 251 TokenType.INDEX, 252 TokenType.PROCEDURE, 253 *DB_CREATABLES, 254 } 255 256 # Tokens that can represent identifiers 257 ID_VAR_TOKENS = { 258 TokenType.VAR, 259 TokenType.ANTI, 260 TokenType.APPLY, 261 TokenType.ASC, 262 TokenType.AUTO_INCREMENT, 263 TokenType.BEGIN, 264 TokenType.CACHE, 265 TokenType.CASE, 266 TokenType.COLLATE, 267 TokenType.COMMAND, 268 TokenType.COMMENT, 269 TokenType.COMMIT, 270 TokenType.CONSTRAINT, 271 TokenType.DEFAULT, 272 TokenType.DELETE, 273 TokenType.DESC, 274 TokenType.DESCRIBE, 275 TokenType.DICTIONARY, 276 TokenType.DIV, 277 TokenType.END, 278 TokenType.EXECUTE, 279 TokenType.ESCAPE, 280 TokenType.FALSE, 281 TokenType.FIRST, 282 TokenType.FILTER, 283 TokenType.FORMAT, 284 TokenType.FULL, 285 TokenType.IS, 286 TokenType.ISNULL, 287 TokenType.INTERVAL, 288 TokenType.KEEP, 289 TokenType.KILL, 290 TokenType.LEFT, 291 TokenType.LOAD, 292 TokenType.MERGE, 293 TokenType.NATURAL, 294 TokenType.NEXT, 295 TokenType.OFFSET, 296 TokenType.ORDINALITY, 297 TokenType.OVERLAPS, 298 TokenType.OVERWRITE, 299 TokenType.PARTITION, 300 TokenType.PERCENT, 301 TokenType.PIVOT, 302 TokenType.PRAGMA, 303 TokenType.RANGE, 304 TokenType.RECURSIVE, 305 TokenType.REFERENCES, 306 TokenType.RIGHT, 307 TokenType.ROW, 308 TokenType.ROWS, 309 TokenType.SEMI, 310 TokenType.SET, 311 TokenType.SETTINGS, 312 TokenType.SHOW, 313 TokenType.TEMPORARY, 314 TokenType.TOP, 315 TokenType.TRUE, 316 TokenType.UNIQUE, 317 TokenType.UNPIVOT, 318 TokenType.UPDATE, 319 TokenType.USE, 320 TokenType.VOLATILE, 321 TokenType.WINDOW, 322 *CREATABLES, 323 *SUBQUERY_PREDICATES, 324 *TYPE_TOKENS, 325 *NO_PAREN_FUNCTIONS, 326 } 327 328 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 329 330 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 331 TokenType.ANTI, 332 TokenType.APPLY, 333 TokenType.ASOF, 334 TokenType.FULL, 335 TokenType.LEFT, 336 TokenType.LOCK, 337 TokenType.NATURAL, 338 TokenType.OFFSET, 339 TokenType.RIGHT, 340 TokenType.SEMI, 341 TokenType.WINDOW, 342 } 343 344 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 345 346 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 347 348 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 349 350 FUNC_TOKENS = { 351 TokenType.COLLATE, 352 TokenType.COMMAND, 353 TokenType.CURRENT_DATE, 354 TokenType.CURRENT_DATETIME, 355 TokenType.CURRENT_TIMESTAMP, 356 TokenType.CURRENT_TIME, 357 TokenType.CURRENT_USER, 358 TokenType.FILTER, 359 TokenType.FIRST, 360 TokenType.FORMAT, 361 TokenType.GLOB, 362 TokenType.IDENTIFIER, 363 TokenType.INDEX, 364 TokenType.ISNULL, 365 TokenType.ILIKE, 366 TokenType.INSERT, 367 TokenType.LIKE, 368 TokenType.MERGE, 369 TokenType.OFFSET, 370 TokenType.PRIMARY_KEY, 371 TokenType.RANGE, 372 TokenType.REPLACE, 373 TokenType.RLIKE, 374 TokenType.ROW, 375 TokenType.UNNEST, 376 TokenType.VAR, 377 TokenType.LEFT, 378 TokenType.RIGHT, 379 TokenType.DATE, 380 TokenType.DATETIME, 381 TokenType.TABLE, 382 TokenType.TIMESTAMP, 383 TokenType.TIMESTAMPTZ, 384 TokenType.WINDOW, 385 TokenType.XOR, 386 *TYPE_TOKENS, 387 *SUBQUERY_PREDICATES, 388 } 389 390 CONJUNCTION = { 391 TokenType.AND: exp.And, 392 TokenType.OR: exp.Or, 393 } 394 395 EQUALITY = { 396 TokenType.COLON_EQ: exp.PropertyEQ, 397 TokenType.EQ: exp.EQ, 398 TokenType.NEQ: exp.NEQ, 399 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 400 } 401 402 COMPARISON = { 403 TokenType.GT: exp.GT, 404 TokenType.GTE: exp.GTE, 405 TokenType.LT: exp.LT, 406 TokenType.LTE: exp.LTE, 407 } 408 409 BITWISE = { 410 TokenType.AMP: exp.BitwiseAnd, 411 TokenType.CARET: exp.BitwiseXor, 412 TokenType.PIPE: exp.BitwiseOr, 413 TokenType.DPIPE: exp.DPipe, 414 } 415 416 TERM = { 417 TokenType.DASH: exp.Sub, 418 TokenType.PLUS: exp.Add, 419 TokenType.MOD: exp.Mod, 420 TokenType.COLLATE: exp.Collate, 421 } 422 423 FACTOR = { 424 TokenType.DIV: exp.IntDiv, 425 TokenType.LR_ARROW: exp.Distance, 426 TokenType.SLASH: exp.Div, 427 TokenType.STAR: exp.Mul, 428 } 429 430 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 431 432 TIMES = { 433 TokenType.TIME, 434 TokenType.TIMETZ, 435 } 436 437 TIMESTAMPS = { 438 TokenType.TIMESTAMP, 439 TokenType.TIMESTAMPTZ, 440 TokenType.TIMESTAMPLTZ, 441 *TIMES, 442 } 443 444 SET_OPERATIONS = { 445 TokenType.UNION, 446 TokenType.INTERSECT, 447 TokenType.EXCEPT, 448 } 449 450 JOIN_METHODS = { 451 TokenType.NATURAL, 452 TokenType.ASOF, 453 } 454 455 JOIN_SIDES = { 456 TokenType.LEFT, 457 TokenType.RIGHT, 458 TokenType.FULL, 459 } 460 461 JOIN_KINDS = { 462 TokenType.INNER, 463 TokenType.OUTER, 464 TokenType.CROSS, 465 TokenType.SEMI, 466 TokenType.ANTI, 467 } 468 469 JOIN_HINTS: t.Set[str] = set() 470 471 LAMBDAS = { 472 TokenType.ARROW: lambda self, expressions: self.expression( 473 exp.Lambda, 474 this=self._replace_lambda( 475 self._parse_conjunction(), 476 {node.name for node in expressions}, 477 ), 478 expressions=expressions, 479 ), 480 TokenType.FARROW: lambda self, expressions: self.expression( 481 exp.Kwarg, 482 this=exp.var(expressions[0].name), 483 expression=self._parse_conjunction(), 484 ), 485 } 486 487 COLUMN_OPERATORS = { 488 TokenType.DOT: None, 489 TokenType.DCOLON: lambda self, this, to: self.expression( 490 exp.Cast if self.STRICT_CAST else exp.TryCast, 491 this=this, 492 to=to, 493 ), 494 TokenType.ARROW: lambda self, this, path: self.expression( 495 exp.JSONExtract, 496 this=this, 497 expression=path, 498 ), 499 TokenType.DARROW: lambda self, this, path: self.expression( 500 exp.JSONExtractScalar, 501 this=this, 502 expression=path, 503 ), 504 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 505 exp.JSONBExtract, 506 this=this, 507 expression=path, 508 ), 509 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 510 exp.JSONBExtractScalar, 511 this=this, 512 expression=path, 513 ), 514 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 515 exp.JSONBContains, 516 this=this, 517 expression=key, 518 ), 519 } 520 521 EXPRESSION_PARSERS = { 522 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 523 exp.Column: lambda self: self._parse_column(), 524 exp.Condition: lambda self: self._parse_conjunction(), 525 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 526 exp.Expression: lambda self: self._parse_statement(), 527 exp.From: lambda self: self._parse_from(), 528 exp.Group: lambda self: self._parse_group(), 529 exp.Having: lambda self: self._parse_having(), 530 exp.Identifier: lambda self: self._parse_id_var(), 531 exp.Join: lambda self: self._parse_join(), 532 exp.Lambda: lambda self: self._parse_lambda(), 533 exp.Lateral: lambda self: self._parse_lateral(), 534 exp.Limit: lambda self: self._parse_limit(), 535 exp.Offset: lambda self: self._parse_offset(), 536 exp.Order: lambda self: self._parse_order(), 537 exp.Ordered: lambda self: self._parse_ordered(), 538 exp.Properties: lambda self: self._parse_properties(), 539 exp.Qualify: lambda self: self._parse_qualify(), 540 exp.Returning: lambda self: self._parse_returning(), 541 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 542 exp.Table: lambda self: self._parse_table_parts(), 543 exp.TableAlias: lambda self: self._parse_table_alias(), 544 exp.Where: lambda self: self._parse_where(), 545 exp.Window: lambda self: self._parse_named_window(), 546 exp.With: lambda self: self._parse_with(), 547 "JOIN_TYPE": lambda self: self._parse_join_parts(), 548 } 549 550 STATEMENT_PARSERS = { 551 TokenType.ALTER: lambda self: self._parse_alter(), 552 TokenType.BEGIN: lambda self: self._parse_transaction(), 553 TokenType.CACHE: lambda self: self._parse_cache(), 554 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 555 TokenType.COMMENT: lambda self: self._parse_comment(), 556 TokenType.CREATE: lambda self: self._parse_create(), 557 TokenType.DELETE: lambda self: self._parse_delete(), 558 TokenType.DESC: lambda self: self._parse_describe(), 559 TokenType.DESCRIBE: lambda self: self._parse_describe(), 560 TokenType.DROP: lambda self: self._parse_drop(), 561 TokenType.INSERT: lambda self: self._parse_insert(), 562 TokenType.KILL: lambda self: self._parse_kill(), 563 TokenType.LOAD: lambda self: self._parse_load(), 564 TokenType.MERGE: lambda self: self._parse_merge(), 565 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 566 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 567 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 568 TokenType.SET: lambda self: self._parse_set(), 569 TokenType.UNCACHE: lambda self: self._parse_uncache(), 570 TokenType.UPDATE: lambda self: self._parse_update(), 571 TokenType.USE: lambda self: self.expression( 572 exp.Use, 573 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 574 and exp.var(self._prev.text), 575 this=self._parse_table(schema=False), 576 ), 577 } 578 579 UNARY_PARSERS = { 580 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 581 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 582 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 583 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 584 } 585 586 PRIMARY_PARSERS = { 587 TokenType.STRING: lambda self, token: self.expression( 588 exp.Literal, this=token.text, is_string=True 589 ), 590 TokenType.NUMBER: lambda self, token: self.expression( 591 exp.Literal, this=token.text, is_string=False 592 ), 593 TokenType.STAR: lambda self, _: self.expression( 594 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 595 ), 596 TokenType.NULL: lambda self, _: self.expression(exp.Null), 597 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 598 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 599 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 600 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 601 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 602 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 603 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 604 exp.National, this=token.text 605 ), 606 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 607 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 608 exp.RawString, this=token.text 609 ), 610 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 611 } 612 613 PLACEHOLDER_PARSERS = { 614 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 615 TokenType.PARAMETER: lambda self: self._parse_parameter(), 616 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 617 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 618 else None, 619 } 620 621 RANGE_PARSERS = { 622 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 623 TokenType.GLOB: binary_range_parser(exp.Glob), 624 TokenType.ILIKE: binary_range_parser(exp.ILike), 625 TokenType.IN: lambda self, this: self._parse_in(this), 626 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 627 TokenType.IS: lambda self, this: self._parse_is(this), 628 TokenType.LIKE: binary_range_parser(exp.Like), 629 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 630 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 631 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 632 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 633 } 634 635 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 636 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 637 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 638 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 639 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 640 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 641 "CHECKSUM": lambda self: self._parse_checksum(), 642 "CLUSTER BY": lambda self: self._parse_cluster(), 643 "CLUSTERED": lambda self: self._parse_clustered_by(), 644 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 645 exp.CollateProperty, **kwargs 646 ), 647 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 648 "COPY": lambda self: self._parse_copy_property(), 649 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 650 "DEFINER": lambda self: self._parse_definer(), 651 "DETERMINISTIC": lambda self: self.expression( 652 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 653 ), 654 "DISTKEY": lambda self: self._parse_distkey(), 655 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 656 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 657 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 658 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 659 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 660 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 661 "FREESPACE": lambda self: self._parse_freespace(), 662 "HEAP": lambda self: self.expression(exp.HeapProperty), 663 "IMMUTABLE": lambda self: self.expression( 664 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 665 ), 666 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 667 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 668 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 669 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 670 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 671 "LIKE": lambda self: self._parse_create_like(), 672 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 673 "LOCK": lambda self: self._parse_locking(), 674 "LOCKING": lambda self: self._parse_locking(), 675 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 676 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 677 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 678 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 679 "NO": lambda self: self._parse_no_property(), 680 "ON": lambda self: self._parse_on_property(), 681 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 682 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 683 "PARTITION": lambda self: self._parse_partitioned_of(), 684 "PARTITION BY": lambda self: self._parse_partitioned_by(), 685 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 686 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 688 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 689 "REMOTE": lambda self: self._parse_remote_with_connection(), 690 "RETURNS": lambda self: self._parse_returns(), 691 "ROW": lambda self: self._parse_row(), 692 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 693 "SAMPLE": lambda self: self.expression( 694 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 695 ), 696 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 697 "SETTINGS": lambda self: self.expression( 698 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 699 ), 700 "SORTKEY": lambda self: self._parse_sortkey(), 701 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 702 "STABLE": lambda self: self.expression( 703 exp.StabilityProperty, this=exp.Literal.string("STABLE") 704 ), 705 "STORED": lambda self: self._parse_stored(), 706 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 707 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 708 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 709 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 710 "TO": lambda self: self._parse_to_table(), 711 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 712 "TRANSFORM": lambda self: self.expression( 713 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 714 ), 715 "TTL": lambda self: self._parse_ttl(), 716 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 717 "VOLATILE": lambda self: self._parse_volatile_property(), 718 "WITH": lambda self: self._parse_with_property(), 719 } 720 721 CONSTRAINT_PARSERS = { 722 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 723 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 724 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 725 "CHARACTER SET": lambda self: self.expression( 726 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 727 ), 728 "CHECK": lambda self: self.expression( 729 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 730 ), 731 "COLLATE": lambda self: self.expression( 732 exp.CollateColumnConstraint, this=self._parse_var() 733 ), 734 "COMMENT": lambda self: self.expression( 735 exp.CommentColumnConstraint, this=self._parse_string() 736 ), 737 "COMPRESS": lambda self: self._parse_compress(), 738 "CLUSTERED": lambda self: self.expression( 739 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 740 ), 741 "NONCLUSTERED": lambda self: self.expression( 742 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 743 ), 744 "DEFAULT": lambda self: self.expression( 745 exp.DefaultColumnConstraint, this=self._parse_bitwise() 746 ), 747 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 748 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 749 "FORMAT": lambda self: self.expression( 750 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 751 ), 752 "GENERATED": lambda self: self._parse_generated_as_identity(), 753 "IDENTITY": lambda self: self._parse_auto_increment(), 754 "INLINE": lambda self: self._parse_inline(), 755 "LIKE": lambda self: self._parse_create_like(), 756 "NOT": lambda self: self._parse_not_constraint(), 757 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 758 "ON": lambda self: ( 759 self._match(TokenType.UPDATE) 760 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 761 ) 762 or self.expression(exp.OnProperty, this=self._parse_id_var()), 763 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 764 "PERIOD": lambda self: self._parse_period_for_system_time(), 765 "PRIMARY KEY": lambda self: self._parse_primary_key(), 766 "REFERENCES": lambda self: self._parse_references(match=False), 767 "TITLE": lambda self: self.expression( 768 exp.TitleColumnConstraint, this=self._parse_var_or_string() 769 ), 770 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 771 "UNIQUE": lambda self: self._parse_unique(), 772 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 773 "WITH": lambda self: self.expression( 774 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 775 ), 776 } 777 778 ALTER_PARSERS = { 779 "ADD": lambda self: self._parse_alter_table_add(), 780 "ALTER": lambda self: self._parse_alter_table_alter(), 781 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 782 "DROP": lambda self: self._parse_alter_table_drop(), 783 "RENAME": lambda self: self._parse_alter_table_rename(), 784 } 785 786 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 787 788 NO_PAREN_FUNCTION_PARSERS = { 789 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 790 "CASE": lambda self: self._parse_case(), 791 "IF": lambda self: self._parse_if(), 792 "NEXT": lambda self: self._parse_next_value_for(), 793 } 794 795 INVALID_FUNC_NAME_TOKENS = { 796 TokenType.IDENTIFIER, 797 TokenType.STRING, 798 } 799 800 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 801 802 FUNCTION_PARSERS = { 803 "ANY_VALUE": lambda self: self._parse_any_value(), 804 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 805 "CONCAT": lambda self: self._parse_concat(), 806 "CONCAT_WS": lambda self: self._parse_concat_ws(), 807 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 808 "DECODE": lambda self: self._parse_decode(), 809 "EXTRACT": lambda self: self._parse_extract(), 810 "JSON_OBJECT": lambda self: self._parse_json_object(), 811 "JSON_TABLE": lambda self: self._parse_json_table(), 812 "LOG": lambda self: self._parse_logarithm(), 813 "MATCH": lambda self: self._parse_match_against(), 814 "OPENJSON": lambda self: self._parse_open_json(), 815 "POSITION": lambda self: self._parse_position(), 816 "PREDICT": lambda self: self._parse_predict(), 817 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 818 "STRING_AGG": lambda self: self._parse_string_agg(), 819 "SUBSTRING": lambda self: self._parse_substring(), 820 "TRIM": lambda self: self._parse_trim(), 821 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 822 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 823 } 824 825 QUERY_MODIFIER_PARSERS = { 826 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 827 TokenType.WHERE: lambda self: ("where", self._parse_where()), 828 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 829 TokenType.HAVING: lambda self: ("having", self._parse_having()), 830 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 831 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 832 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 833 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 834 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 835 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 836 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 837 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 838 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 839 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 840 TokenType.CLUSTER_BY: lambda self: ( 841 "cluster", 842 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 843 ), 844 TokenType.DISTRIBUTE_BY: lambda self: ( 845 "distribute", 846 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 847 ), 848 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 849 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 850 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 851 } 852 853 SET_PARSERS = { 854 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 855 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 856 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 857 "TRANSACTION": lambda self: self._parse_set_transaction(), 858 } 859 860 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 861 862 TYPE_LITERAL_PARSERS = { 863 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 864 } 865 866 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 867 868 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 869 870 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 871 872 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 873 TRANSACTION_CHARACTERISTICS = { 874 "ISOLATION LEVEL REPEATABLE READ", 875 "ISOLATION LEVEL READ COMMITTED", 876 "ISOLATION LEVEL READ UNCOMMITTED", 877 "ISOLATION LEVEL SERIALIZABLE", 878 "READ WRITE", 879 "READ ONLY", 880 } 881 882 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 883 884 CLONE_KEYWORDS = {"CLONE", "COPY"} 885 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 886 887 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 888 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 889 890 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 891 892 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 893 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 894 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 895 896 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 897 898 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 899 900 DISTINCT_TOKENS = {TokenType.DISTINCT} 901 902 NULL_TOKENS = {TokenType.NULL} 903 904 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 905 906 STRICT_CAST = True 907 908 # A NULL arg in CONCAT yields NULL by default 909 CONCAT_NULL_OUTPUTS_STRING = False 910 911 PREFIXED_PIVOT_COLUMNS = False 912 IDENTIFY_PIVOT_STRINGS = False 913 914 LOG_BASE_FIRST = True 915 LOG_DEFAULTS_TO_LN = False 916 917 # Whether or not ADD is present for each column added by ALTER TABLE 918 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 919 920 # Whether or not the table sample clause expects CSV syntax 921 TABLESAMPLE_CSV = False 922 923 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 924 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 925 926 # Whether the TRIM function expects the characters to trim as its first argument 927 TRIM_PATTERN_FIRST = False 928 929 # Whether the behavior of a / b depends on the types of a and b. 930 # False means a / b is always float division. 931 # True means a / b is integer division if both a and b are integers. 932 TYPED_DIVISION = False 933 934 # False means 1 / 0 throws an error. 935 # True means 1 / 0 returns null. 936 SAFE_DIVISION = False 937 938 __slots__ = ( 939 "error_level", 940 "error_message_context", 941 "max_errors", 942 "sql", 943 "errors", 944 "_tokens", 945 "_index", 946 "_curr", 947 "_next", 948 "_prev", 949 "_prev_comments", 950 "_tokenizer", 951 ) 952 953 # Autofilled 954 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 955 INDEX_OFFSET: int = 0 956 UNNEST_COLUMN_ONLY: bool = False 957 ALIAS_POST_TABLESAMPLE: bool = False 958 STRICT_STRING_CONCAT = False 959 SUPPORTS_USER_DEFINED_TYPES = True 960 NORMALIZE_FUNCTIONS = "upper" 961 NULL_ORDERING: str = "nulls_are_small" 962 SHOW_TRIE: t.Dict = {} 963 SET_TRIE: t.Dict = {} 964 FORMAT_MAPPING: t.Dict[str, str] = {} 965 FORMAT_TRIE: t.Dict = {} 966 TIME_MAPPING: t.Dict[str, str] = {} 967 TIME_TRIE: t.Dict = {} 968 969 def __init__( 970 self, 971 error_level: t.Optional[ErrorLevel] = None, 972 error_message_context: int = 100, 973 max_errors: int = 3, 974 ): 975 self.error_level = error_level or ErrorLevel.IMMEDIATE 976 self.error_message_context = error_message_context 977 self.max_errors = max_errors 978 self._tokenizer = self.TOKENIZER_CLASS() 979 self.reset() 980 981 def reset(self): 982 self.sql = "" 983 self.errors = [] 984 self._tokens = [] 985 self._index = 0 986 self._curr = None 987 self._next = None 988 self._prev = None 989 self._prev_comments = None 990 991 def parse( 992 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 993 ) -> t.List[t.Optional[exp.Expression]]: 994 """ 995 Parses a list of tokens and returns a list of syntax trees, one tree 996 per parsed SQL statement. 997 998 Args: 999 raw_tokens: The list of tokens. 1000 sql: The original SQL string, used to produce helpful debug messages. 1001 1002 Returns: 1003 The list of the produced syntax trees. 1004 """ 1005 return self._parse( 1006 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1007 ) 1008 1009 def parse_into( 1010 self, 1011 expression_types: exp.IntoType, 1012 raw_tokens: t.List[Token], 1013 sql: t.Optional[str] = None, 1014 ) -> t.List[t.Optional[exp.Expression]]: 1015 """ 1016 Parses a list of tokens into a given Expression type. If a collection of Expression 1017 types is given instead, this method will try to parse the token list into each one 1018 of them, stopping at the first for which the parsing succeeds. 1019 1020 Args: 1021 expression_types: The expression type(s) to try and parse the token list into. 1022 raw_tokens: The list of tokens. 1023 sql: The original SQL string, used to produce helpful debug messages. 1024 1025 Returns: 1026 The target Expression. 1027 """ 1028 errors = [] 1029 for expression_type in ensure_list(expression_types): 1030 parser = self.EXPRESSION_PARSERS.get(expression_type) 1031 if not parser: 1032 raise TypeError(f"No parser registered for {expression_type}") 1033 1034 try: 1035 return self._parse(parser, raw_tokens, sql) 1036 except ParseError as e: 1037 e.errors[0]["into_expression"] = expression_type 1038 errors.append(e) 1039 1040 raise ParseError( 1041 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1042 errors=merge_errors(errors), 1043 ) from errors[-1] 1044 1045 def _parse( 1046 self, 1047 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1048 raw_tokens: t.List[Token], 1049 sql: t.Optional[str] = None, 1050 ) -> t.List[t.Optional[exp.Expression]]: 1051 self.reset() 1052 self.sql = sql or "" 1053 1054 total = len(raw_tokens) 1055 chunks: t.List[t.List[Token]] = [[]] 1056 1057 for i, token in enumerate(raw_tokens): 1058 if token.token_type == TokenType.SEMICOLON: 1059 if i < total - 1: 1060 chunks.append([]) 1061 else: 1062 chunks[-1].append(token) 1063 1064 expressions = [] 1065 1066 for tokens in chunks: 1067 self._index = -1 1068 self._tokens = tokens 1069 self._advance() 1070 1071 expressions.append(parse_method(self)) 1072 1073 if self._index < len(self._tokens): 1074 self.raise_error("Invalid expression / Unexpected token") 1075 1076 self.check_errors() 1077 1078 return expressions 1079 1080 def check_errors(self) -> None: 1081 """Logs or raises any found errors, depending on the chosen error level setting.""" 1082 if self.error_level == ErrorLevel.WARN: 1083 for error in self.errors: 1084 logger.error(str(error)) 1085 elif self.error_level == ErrorLevel.RAISE and self.errors: 1086 raise ParseError( 1087 concat_messages(self.errors, self.max_errors), 1088 errors=merge_errors(self.errors), 1089 ) 1090 1091 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1092 """ 1093 Appends an error in the list of recorded errors or raises it, depending on the chosen 1094 error level setting. 1095 """ 1096 token = token or self._curr or self._prev or Token.string("") 1097 start = token.start 1098 end = token.end + 1 1099 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1100 highlight = self.sql[start:end] 1101 end_context = self.sql[end : end + self.error_message_context] 1102 1103 error = ParseError.new( 1104 f"{message}. Line {token.line}, Col: {token.col}.\n" 1105 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1106 description=message, 1107 line=token.line, 1108 col=token.col, 1109 start_context=start_context, 1110 highlight=highlight, 1111 end_context=end_context, 1112 ) 1113 1114 if self.error_level == ErrorLevel.IMMEDIATE: 1115 raise error 1116 1117 self.errors.append(error) 1118 1119 def expression( 1120 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1121 ) -> E: 1122 """ 1123 Creates a new, validated Expression. 1124 1125 Args: 1126 exp_class: The expression class to instantiate. 1127 comments: An optional list of comments to attach to the expression. 1128 kwargs: The arguments to set for the expression along with their respective values. 1129 1130 Returns: 1131 The target expression. 1132 """ 1133 instance = exp_class(**kwargs) 1134 instance.add_comments(comments) if comments else self._add_comments(instance) 1135 return self.validate_expression(instance) 1136 1137 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1138 if expression and self._prev_comments: 1139 expression.add_comments(self._prev_comments) 1140 self._prev_comments = None 1141 1142 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1143 """ 1144 Validates an Expression, making sure that all its mandatory arguments are set. 1145 1146 Args: 1147 expression: The expression to validate. 1148 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1149 1150 Returns: 1151 The validated expression. 1152 """ 1153 if self.error_level != ErrorLevel.IGNORE: 1154 for error_message in expression.error_messages(args): 1155 self.raise_error(error_message) 1156 1157 return expression 1158 1159 def _find_sql(self, start: Token, end: Token) -> str: 1160 return self.sql[start.start : end.end + 1] 1161 1162 def _advance(self, times: int = 1) -> None: 1163 self._index += times 1164 self._curr = seq_get(self._tokens, self._index) 1165 self._next = seq_get(self._tokens, self._index + 1) 1166 1167 if self._index > 0: 1168 self._prev = self._tokens[self._index - 1] 1169 self._prev_comments = self._prev.comments 1170 else: 1171 self._prev = None 1172 self._prev_comments = None 1173 1174 def _retreat(self, index: int) -> None: 1175 if index != self._index: 1176 self._advance(index - self._index) 1177 1178 def _parse_command(self) -> exp.Command: 1179 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1180 1181 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1182 start = self._prev 1183 exists = self._parse_exists() if allow_exists else None 1184 1185 self._match(TokenType.ON) 1186 1187 kind = self._match_set(self.CREATABLES) and self._prev 1188 if not kind: 1189 return self._parse_as_command(start) 1190 1191 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1192 this = self._parse_user_defined_function(kind=kind.token_type) 1193 elif kind.token_type == TokenType.TABLE: 1194 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1195 elif kind.token_type == TokenType.COLUMN: 1196 this = self._parse_column() 1197 else: 1198 this = self._parse_id_var() 1199 1200 self._match(TokenType.IS) 1201 1202 return self.expression( 1203 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1204 ) 1205 1206 def _parse_to_table( 1207 self, 1208 ) -> exp.ToTableProperty: 1209 table = self._parse_table_parts(schema=True) 1210 return self.expression(exp.ToTableProperty, this=table) 1211 1212 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1213 def _parse_ttl(self) -> exp.Expression: 1214 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1215 this = self._parse_bitwise() 1216 1217 if self._match_text_seq("DELETE"): 1218 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1219 if self._match_text_seq("RECOMPRESS"): 1220 return self.expression( 1221 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1222 ) 1223 if self._match_text_seq("TO", "DISK"): 1224 return self.expression( 1225 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1226 ) 1227 if self._match_text_seq("TO", "VOLUME"): 1228 return self.expression( 1229 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1230 ) 1231 1232 return this 1233 1234 expressions = self._parse_csv(_parse_ttl_action) 1235 where = self._parse_where() 1236 group = self._parse_group() 1237 1238 aggregates = None 1239 if group and self._match(TokenType.SET): 1240 aggregates = self._parse_csv(self._parse_set_item) 1241 1242 return self.expression( 1243 exp.MergeTreeTTL, 1244 expressions=expressions, 1245 where=where, 1246 group=group, 1247 aggregates=aggregates, 1248 ) 1249 1250 def _parse_statement(self) -> t.Optional[exp.Expression]: 1251 if self._curr is None: 1252 return None 1253 1254 if self._match_set(self.STATEMENT_PARSERS): 1255 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1256 1257 if self._match_set(Tokenizer.COMMANDS): 1258 return self._parse_command() 1259 1260 expression = self._parse_expression() 1261 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1262 return self._parse_query_modifiers(expression) 1263 1264 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1265 start = self._prev 1266 temporary = self._match(TokenType.TEMPORARY) 1267 materialized = self._match_text_seq("MATERIALIZED") 1268 1269 kind = self._match_set(self.CREATABLES) and self._prev.text 1270 if not kind: 1271 return self._parse_as_command(start) 1272 1273 return self.expression( 1274 exp.Drop, 1275 comments=start.comments, 1276 exists=exists or self._parse_exists(), 1277 this=self._parse_table(schema=True), 1278 kind=kind, 1279 temporary=temporary, 1280 materialized=materialized, 1281 cascade=self._match_text_seq("CASCADE"), 1282 constraints=self._match_text_seq("CONSTRAINTS"), 1283 purge=self._match_text_seq("PURGE"), 1284 ) 1285 1286 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1287 return ( 1288 self._match_text_seq("IF") 1289 and (not not_ or self._match(TokenType.NOT)) 1290 and self._match(TokenType.EXISTS) 1291 ) 1292 1293 def _parse_create(self) -> exp.Create | exp.Command: 1294 # Note: this can't be None because we've matched a statement parser 1295 start = self._prev 1296 comments = self._prev_comments 1297 1298 replace = start.text.upper() == "REPLACE" or self._match_pair( 1299 TokenType.OR, TokenType.REPLACE 1300 ) 1301 unique = self._match(TokenType.UNIQUE) 1302 1303 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1304 self._advance() 1305 1306 properties = None 1307 create_token = self._match_set(self.CREATABLES) and self._prev 1308 1309 if not create_token: 1310 # exp.Properties.Location.POST_CREATE 1311 properties = self._parse_properties() 1312 create_token = self._match_set(self.CREATABLES) and self._prev 1313 1314 if not properties or not create_token: 1315 return self._parse_as_command(start) 1316 1317 exists = self._parse_exists(not_=True) 1318 this = None 1319 expression: t.Optional[exp.Expression] = None 1320 indexes = None 1321 no_schema_binding = None 1322 begin = None 1323 end = None 1324 clone = None 1325 1326 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1327 nonlocal properties 1328 if properties and temp_props: 1329 properties.expressions.extend(temp_props.expressions) 1330 elif temp_props: 1331 properties = temp_props 1332 1333 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1334 this = self._parse_user_defined_function(kind=create_token.token_type) 1335 1336 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1337 extend_props(self._parse_properties()) 1338 1339 self._match(TokenType.ALIAS) 1340 1341 if self._match(TokenType.COMMAND): 1342 expression = self._parse_as_command(self._prev) 1343 else: 1344 begin = self._match(TokenType.BEGIN) 1345 return_ = self._match_text_seq("RETURN") 1346 1347 if self._match(TokenType.STRING, advance=False): 1348 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1349 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1350 expression = self._parse_string() 1351 extend_props(self._parse_properties()) 1352 else: 1353 expression = self._parse_statement() 1354 1355 end = self._match_text_seq("END") 1356 1357 if return_: 1358 expression = self.expression(exp.Return, this=expression) 1359 elif create_token.token_type == TokenType.INDEX: 1360 this = self._parse_index(index=self._parse_id_var()) 1361 elif create_token.token_type in self.DB_CREATABLES: 1362 table_parts = self._parse_table_parts(schema=True) 1363 1364 # exp.Properties.Location.POST_NAME 1365 self._match(TokenType.COMMA) 1366 extend_props(self._parse_properties(before=True)) 1367 1368 this = self._parse_schema(this=table_parts) 1369 1370 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1371 extend_props(self._parse_properties()) 1372 1373 self._match(TokenType.ALIAS) 1374 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1375 # exp.Properties.Location.POST_ALIAS 1376 extend_props(self._parse_properties()) 1377 1378 expression = self._parse_ddl_select() 1379 1380 if create_token.token_type == TokenType.TABLE: 1381 # exp.Properties.Location.POST_EXPRESSION 1382 extend_props(self._parse_properties()) 1383 1384 indexes = [] 1385 while True: 1386 index = self._parse_index() 1387 1388 # exp.Properties.Location.POST_INDEX 1389 extend_props(self._parse_properties()) 1390 1391 if not index: 1392 break 1393 else: 1394 self._match(TokenType.COMMA) 1395 indexes.append(index) 1396 elif create_token.token_type == TokenType.VIEW: 1397 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1398 no_schema_binding = True 1399 1400 shallow = self._match_text_seq("SHALLOW") 1401 1402 if self._match_texts(self.CLONE_KEYWORDS): 1403 copy = self._prev.text.lower() == "copy" 1404 clone = self._parse_table(schema=True) 1405 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1406 clone_kind = ( 1407 self._match(TokenType.L_PAREN) 1408 and self._match_texts(self.CLONE_KINDS) 1409 and self._prev.text.upper() 1410 ) 1411 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1412 self._match(TokenType.R_PAREN) 1413 clone = self.expression( 1414 exp.Clone, 1415 this=clone, 1416 when=when, 1417 kind=clone_kind, 1418 shallow=shallow, 1419 expression=clone_expression, 1420 copy=copy, 1421 ) 1422 1423 return self.expression( 1424 exp.Create, 1425 comments=comments, 1426 this=this, 1427 kind=create_token.text, 1428 replace=replace, 1429 unique=unique, 1430 expression=expression, 1431 exists=exists, 1432 properties=properties, 1433 indexes=indexes, 1434 no_schema_binding=no_schema_binding, 1435 begin=begin, 1436 end=end, 1437 clone=clone, 1438 ) 1439 1440 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1441 # only used for teradata currently 1442 self._match(TokenType.COMMA) 1443 1444 kwargs = { 1445 "no": self._match_text_seq("NO"), 1446 "dual": self._match_text_seq("DUAL"), 1447 "before": self._match_text_seq("BEFORE"), 1448 "default": self._match_text_seq("DEFAULT"), 1449 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1450 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1451 "after": self._match_text_seq("AFTER"), 1452 "minimum": self._match_texts(("MIN", "MINIMUM")), 1453 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1454 } 1455 1456 if self._match_texts(self.PROPERTY_PARSERS): 1457 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1458 try: 1459 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1460 except TypeError: 1461 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1462 1463 return None 1464 1465 def _parse_property(self) -> t.Optional[exp.Expression]: 1466 if self._match_texts(self.PROPERTY_PARSERS): 1467 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1468 1469 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1470 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1471 1472 if self._match_text_seq("COMPOUND", "SORTKEY"): 1473 return self._parse_sortkey(compound=True) 1474 1475 if self._match_text_seq("SQL", "SECURITY"): 1476 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1477 1478 index = self._index 1479 key = self._parse_column() 1480 1481 if not self._match(TokenType.EQ): 1482 self._retreat(index) 1483 return None 1484 1485 return self.expression( 1486 exp.Property, 1487 this=key.to_dot() if isinstance(key, exp.Column) else key, 1488 value=self._parse_column() or self._parse_var(any_token=True), 1489 ) 1490 1491 def _parse_stored(self) -> exp.FileFormatProperty: 1492 self._match(TokenType.ALIAS) 1493 1494 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1495 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1496 1497 return self.expression( 1498 exp.FileFormatProperty, 1499 this=self.expression( 1500 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1501 ) 1502 if input_format or output_format 1503 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1504 ) 1505 1506 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1507 self._match(TokenType.EQ) 1508 self._match(TokenType.ALIAS) 1509 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1510 1511 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1512 properties = [] 1513 while True: 1514 if before: 1515 prop = self._parse_property_before() 1516 else: 1517 prop = self._parse_property() 1518 1519 if not prop: 1520 break 1521 for p in ensure_list(prop): 1522 properties.append(p) 1523 1524 if properties: 1525 return self.expression(exp.Properties, expressions=properties) 1526 1527 return None 1528 1529 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1530 return self.expression( 1531 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1532 ) 1533 1534 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1535 if self._index >= 2: 1536 pre_volatile_token = self._tokens[self._index - 2] 1537 else: 1538 pre_volatile_token = None 1539 1540 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1541 return exp.VolatileProperty() 1542 1543 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1544 1545 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1546 self._match_pair(TokenType.EQ, TokenType.ON) 1547 1548 prop = self.expression(exp.WithSystemVersioningProperty) 1549 if self._match(TokenType.L_PAREN): 1550 self._match_text_seq("HISTORY_TABLE", "=") 1551 prop.set("this", self._parse_table_parts()) 1552 1553 if self._match(TokenType.COMMA): 1554 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1555 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1556 1557 self._match_r_paren() 1558 1559 return prop 1560 1561 def _parse_with_property( 1562 self, 1563 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1564 if self._match(TokenType.L_PAREN, advance=False): 1565 return self._parse_wrapped_csv(self._parse_property) 1566 1567 if self._match_text_seq("JOURNAL"): 1568 return self._parse_withjournaltable() 1569 1570 if self._match_text_seq("DATA"): 1571 return self._parse_withdata(no=False) 1572 elif self._match_text_seq("NO", "DATA"): 1573 return self._parse_withdata(no=True) 1574 1575 if not self._next: 1576 return None 1577 1578 return self._parse_withisolatedloading() 1579 1580 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1581 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1582 self._match(TokenType.EQ) 1583 1584 user = self._parse_id_var() 1585 self._match(TokenType.PARAMETER) 1586 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1587 1588 if not user or not host: 1589 return None 1590 1591 return exp.DefinerProperty(this=f"{user}@{host}") 1592 1593 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1594 self._match(TokenType.TABLE) 1595 self._match(TokenType.EQ) 1596 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1597 1598 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1599 return self.expression(exp.LogProperty, no=no) 1600 1601 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1602 return self.expression(exp.JournalProperty, **kwargs) 1603 1604 def _parse_checksum(self) -> exp.ChecksumProperty: 1605 self._match(TokenType.EQ) 1606 1607 on = None 1608 if self._match(TokenType.ON): 1609 on = True 1610 elif self._match_text_seq("OFF"): 1611 on = False 1612 1613 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1614 1615 def _parse_cluster(self) -> exp.Cluster: 1616 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1617 1618 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1619 self._match_text_seq("BY") 1620 1621 self._match_l_paren() 1622 expressions = self._parse_csv(self._parse_column) 1623 self._match_r_paren() 1624 1625 if self._match_text_seq("SORTED", "BY"): 1626 self._match_l_paren() 1627 sorted_by = self._parse_csv(self._parse_ordered) 1628 self._match_r_paren() 1629 else: 1630 sorted_by = None 1631 1632 self._match(TokenType.INTO) 1633 buckets = self._parse_number() 1634 self._match_text_seq("BUCKETS") 1635 1636 return self.expression( 1637 exp.ClusteredByProperty, 1638 expressions=expressions, 1639 sorted_by=sorted_by, 1640 buckets=buckets, 1641 ) 1642 1643 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1644 if not self._match_text_seq("GRANTS"): 1645 self._retreat(self._index - 1) 1646 return None 1647 1648 return self.expression(exp.CopyGrantsProperty) 1649 1650 def _parse_freespace(self) -> exp.FreespaceProperty: 1651 self._match(TokenType.EQ) 1652 return self.expression( 1653 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1654 ) 1655 1656 def _parse_mergeblockratio( 1657 self, no: bool = False, default: bool = False 1658 ) -> exp.MergeBlockRatioProperty: 1659 if self._match(TokenType.EQ): 1660 return self.expression( 1661 exp.MergeBlockRatioProperty, 1662 this=self._parse_number(), 1663 percent=self._match(TokenType.PERCENT), 1664 ) 1665 1666 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1667 1668 def _parse_datablocksize( 1669 self, 1670 default: t.Optional[bool] = None, 1671 minimum: t.Optional[bool] = None, 1672 maximum: t.Optional[bool] = None, 1673 ) -> exp.DataBlocksizeProperty: 1674 self._match(TokenType.EQ) 1675 size = self._parse_number() 1676 1677 units = None 1678 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1679 units = self._prev.text 1680 1681 return self.expression( 1682 exp.DataBlocksizeProperty, 1683 size=size, 1684 units=units, 1685 default=default, 1686 minimum=minimum, 1687 maximum=maximum, 1688 ) 1689 1690 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1691 self._match(TokenType.EQ) 1692 always = self._match_text_seq("ALWAYS") 1693 manual = self._match_text_seq("MANUAL") 1694 never = self._match_text_seq("NEVER") 1695 default = self._match_text_seq("DEFAULT") 1696 1697 autotemp = None 1698 if self._match_text_seq("AUTOTEMP"): 1699 autotemp = self._parse_schema() 1700 1701 return self.expression( 1702 exp.BlockCompressionProperty, 1703 always=always, 1704 manual=manual, 1705 never=never, 1706 default=default, 1707 autotemp=autotemp, 1708 ) 1709 1710 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1711 no = self._match_text_seq("NO") 1712 concurrent = self._match_text_seq("CONCURRENT") 1713 self._match_text_seq("ISOLATED", "LOADING") 1714 for_all = self._match_text_seq("FOR", "ALL") 1715 for_insert = self._match_text_seq("FOR", "INSERT") 1716 for_none = self._match_text_seq("FOR", "NONE") 1717 return self.expression( 1718 exp.IsolatedLoadingProperty, 1719 no=no, 1720 concurrent=concurrent, 1721 for_all=for_all, 1722 for_insert=for_insert, 1723 for_none=for_none, 1724 ) 1725 1726 def _parse_locking(self) -> exp.LockingProperty: 1727 if self._match(TokenType.TABLE): 1728 kind = "TABLE" 1729 elif self._match(TokenType.VIEW): 1730 kind = "VIEW" 1731 elif self._match(TokenType.ROW): 1732 kind = "ROW" 1733 elif self._match_text_seq("DATABASE"): 1734 kind = "DATABASE" 1735 else: 1736 kind = None 1737 1738 if kind in ("DATABASE", "TABLE", "VIEW"): 1739 this = self._parse_table_parts() 1740 else: 1741 this = None 1742 1743 if self._match(TokenType.FOR): 1744 for_or_in = "FOR" 1745 elif self._match(TokenType.IN): 1746 for_or_in = "IN" 1747 else: 1748 for_or_in = None 1749 1750 if self._match_text_seq("ACCESS"): 1751 lock_type = "ACCESS" 1752 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1753 lock_type = "EXCLUSIVE" 1754 elif self._match_text_seq("SHARE"): 1755 lock_type = "SHARE" 1756 elif self._match_text_seq("READ"): 1757 lock_type = "READ" 1758 elif self._match_text_seq("WRITE"): 1759 lock_type = "WRITE" 1760 elif self._match_text_seq("CHECKSUM"): 1761 lock_type = "CHECKSUM" 1762 else: 1763 lock_type = None 1764 1765 override = self._match_text_seq("OVERRIDE") 1766 1767 return self.expression( 1768 exp.LockingProperty, 1769 this=this, 1770 kind=kind, 1771 for_or_in=for_or_in, 1772 lock_type=lock_type, 1773 override=override, 1774 ) 1775 1776 def _parse_partition_by(self) -> t.List[exp.Expression]: 1777 if self._match(TokenType.PARTITION_BY): 1778 return self._parse_csv(self._parse_conjunction) 1779 return [] 1780 1781 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1782 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1783 if self._match_text_seq("MINVALUE"): 1784 return exp.var("MINVALUE") 1785 if self._match_text_seq("MAXVALUE"): 1786 return exp.var("MAXVALUE") 1787 return self._parse_bitwise() 1788 1789 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1790 expression = None 1791 from_expressions = None 1792 to_expressions = None 1793 1794 if self._match(TokenType.IN): 1795 this = self._parse_wrapped_csv(self._parse_bitwise) 1796 elif self._match(TokenType.FROM): 1797 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1798 self._match_text_seq("TO") 1799 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1800 elif self._match_text_seq("WITH", "(", "MODULUS"): 1801 this = self._parse_number() 1802 self._match_text_seq(",", "REMAINDER") 1803 expression = self._parse_number() 1804 self._match_r_paren() 1805 else: 1806 self.raise_error("Failed to parse partition bound spec.") 1807 1808 return self.expression( 1809 exp.PartitionBoundSpec, 1810 this=this, 1811 expression=expression, 1812 from_expressions=from_expressions, 1813 to_expressions=to_expressions, 1814 ) 1815 1816 # https://www.postgresql.org/docs/current/sql-createtable.html 1817 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1818 if not self._match_text_seq("OF"): 1819 self._retreat(self._index - 1) 1820 return None 1821 1822 this = self._parse_table(schema=True) 1823 1824 if self._match(TokenType.DEFAULT): 1825 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1826 elif self._match_text_seq("FOR", "VALUES"): 1827 expression = self._parse_partition_bound_spec() 1828 else: 1829 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1830 1831 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1832 1833 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1834 self._match(TokenType.EQ) 1835 return self.expression( 1836 exp.PartitionedByProperty, 1837 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1838 ) 1839 1840 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1841 if self._match_text_seq("AND", "STATISTICS"): 1842 statistics = True 1843 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1844 statistics = False 1845 else: 1846 statistics = None 1847 1848 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1849 1850 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1851 if self._match_text_seq("PRIMARY", "INDEX"): 1852 return exp.NoPrimaryIndexProperty() 1853 return None 1854 1855 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1856 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1857 return exp.OnCommitProperty() 1858 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1859 return exp.OnCommitProperty(delete=True) 1860 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1861 1862 def _parse_distkey(self) -> exp.DistKeyProperty: 1863 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1864 1865 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1866 table = self._parse_table(schema=True) 1867 1868 options = [] 1869 while self._match_texts(("INCLUDING", "EXCLUDING")): 1870 this = self._prev.text.upper() 1871 1872 id_var = self._parse_id_var() 1873 if not id_var: 1874 return None 1875 1876 options.append( 1877 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1878 ) 1879 1880 return self.expression(exp.LikeProperty, this=table, expressions=options) 1881 1882 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1883 return self.expression( 1884 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1885 ) 1886 1887 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1888 self._match(TokenType.EQ) 1889 return self.expression( 1890 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1891 ) 1892 1893 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1894 self._match_text_seq("WITH", "CONNECTION") 1895 return self.expression( 1896 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1897 ) 1898 1899 def _parse_returns(self) -> exp.ReturnsProperty: 1900 value: t.Optional[exp.Expression] 1901 is_table = self._match(TokenType.TABLE) 1902 1903 if is_table: 1904 if self._match(TokenType.LT): 1905 value = self.expression( 1906 exp.Schema, 1907 this="TABLE", 1908 expressions=self._parse_csv(self._parse_struct_types), 1909 ) 1910 if not self._match(TokenType.GT): 1911 self.raise_error("Expecting >") 1912 else: 1913 value = self._parse_schema(exp.var("TABLE")) 1914 else: 1915 value = self._parse_types() 1916 1917 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1918 1919 def _parse_describe(self) -> exp.Describe: 1920 kind = self._match_set(self.CREATABLES) and self._prev.text 1921 this = self._parse_table(schema=True) 1922 properties = self._parse_properties() 1923 expressions = properties.expressions if properties else None 1924 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1925 1926 def _parse_insert(self) -> exp.Insert: 1927 comments = ensure_list(self._prev_comments) 1928 overwrite = self._match(TokenType.OVERWRITE) 1929 ignore = self._match(TokenType.IGNORE) 1930 local = self._match_text_seq("LOCAL") 1931 alternative = None 1932 1933 if self._match_text_seq("DIRECTORY"): 1934 this: t.Optional[exp.Expression] = self.expression( 1935 exp.Directory, 1936 this=self._parse_var_or_string(), 1937 local=local, 1938 row_format=self._parse_row_format(match_row=True), 1939 ) 1940 else: 1941 if self._match(TokenType.OR): 1942 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1943 1944 self._match(TokenType.INTO) 1945 comments += ensure_list(self._prev_comments) 1946 self._match(TokenType.TABLE) 1947 this = self._parse_table(schema=True) 1948 1949 returning = self._parse_returning() 1950 1951 return self.expression( 1952 exp.Insert, 1953 comments=comments, 1954 this=this, 1955 by_name=self._match_text_seq("BY", "NAME"), 1956 exists=self._parse_exists(), 1957 partition=self._parse_partition(), 1958 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1959 and self._parse_conjunction(), 1960 expression=self._parse_ddl_select(), 1961 conflict=self._parse_on_conflict(), 1962 returning=returning or self._parse_returning(), 1963 overwrite=overwrite, 1964 alternative=alternative, 1965 ignore=ignore, 1966 ) 1967 1968 def _parse_kill(self) -> exp.Kill: 1969 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1970 1971 return self.expression( 1972 exp.Kill, 1973 this=self._parse_primary(), 1974 kind=kind, 1975 ) 1976 1977 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1978 conflict = self._match_text_seq("ON", "CONFLICT") 1979 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1980 1981 if not conflict and not duplicate: 1982 return None 1983 1984 nothing = None 1985 expressions = None 1986 key = None 1987 constraint = None 1988 1989 if conflict: 1990 if self._match_text_seq("ON", "CONSTRAINT"): 1991 constraint = self._parse_id_var() 1992 else: 1993 key = self._parse_csv(self._parse_value) 1994 1995 self._match_text_seq("DO") 1996 if self._match_text_seq("NOTHING"): 1997 nothing = True 1998 else: 1999 self._match(TokenType.UPDATE) 2000 self._match(TokenType.SET) 2001 expressions = self._parse_csv(self._parse_equality) 2002 2003 return self.expression( 2004 exp.OnConflict, 2005 duplicate=duplicate, 2006 expressions=expressions, 2007 nothing=nothing, 2008 key=key, 2009 constraint=constraint, 2010 ) 2011 2012 def _parse_returning(self) -> t.Optional[exp.Returning]: 2013 if not self._match(TokenType.RETURNING): 2014 return None 2015 return self.expression( 2016 exp.Returning, 2017 expressions=self._parse_csv(self._parse_expression), 2018 into=self._match(TokenType.INTO) and self._parse_table_part(), 2019 ) 2020 2021 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2022 if not self._match(TokenType.FORMAT): 2023 return None 2024 return self._parse_row_format() 2025 2026 def _parse_row_format( 2027 self, match_row: bool = False 2028 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2029 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2030 return None 2031 2032 if self._match_text_seq("SERDE"): 2033 this = self._parse_string() 2034 2035 serde_properties = None 2036 if self._match(TokenType.SERDE_PROPERTIES): 2037 serde_properties = self.expression( 2038 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2039 ) 2040 2041 return self.expression( 2042 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2043 ) 2044 2045 self._match_text_seq("DELIMITED") 2046 2047 kwargs = {} 2048 2049 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2050 kwargs["fields"] = self._parse_string() 2051 if self._match_text_seq("ESCAPED", "BY"): 2052 kwargs["escaped"] = self._parse_string() 2053 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2054 kwargs["collection_items"] = self._parse_string() 2055 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2056 kwargs["map_keys"] = self._parse_string() 2057 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2058 kwargs["lines"] = self._parse_string() 2059 if self._match_text_seq("NULL", "DEFINED", "AS"): 2060 kwargs["null"] = self._parse_string() 2061 2062 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2063 2064 def _parse_load(self) -> exp.LoadData | exp.Command: 2065 if self._match_text_seq("DATA"): 2066 local = self._match_text_seq("LOCAL") 2067 self._match_text_seq("INPATH") 2068 inpath = self._parse_string() 2069 overwrite = self._match(TokenType.OVERWRITE) 2070 self._match_pair(TokenType.INTO, TokenType.TABLE) 2071 2072 return self.expression( 2073 exp.LoadData, 2074 this=self._parse_table(schema=True), 2075 local=local, 2076 overwrite=overwrite, 2077 inpath=inpath, 2078 partition=self._parse_partition(), 2079 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2080 serde=self._match_text_seq("SERDE") and self._parse_string(), 2081 ) 2082 return self._parse_as_command(self._prev) 2083 2084 def _parse_delete(self) -> exp.Delete: 2085 # This handles MySQL's "Multiple-Table Syntax" 2086 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2087 tables = None 2088 comments = self._prev_comments 2089 if not self._match(TokenType.FROM, advance=False): 2090 tables = self._parse_csv(self._parse_table) or None 2091 2092 returning = self._parse_returning() 2093 2094 return self.expression( 2095 exp.Delete, 2096 comments=comments, 2097 tables=tables, 2098 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2099 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2100 where=self._parse_where(), 2101 returning=returning or self._parse_returning(), 2102 limit=self._parse_limit(), 2103 ) 2104 2105 def _parse_update(self) -> exp.Update: 2106 comments = self._prev_comments 2107 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2108 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2109 returning = self._parse_returning() 2110 return self.expression( 2111 exp.Update, 2112 comments=comments, 2113 **{ # type: ignore 2114 "this": this, 2115 "expressions": expressions, 2116 "from": self._parse_from(joins=True), 2117 "where": self._parse_where(), 2118 "returning": returning or self._parse_returning(), 2119 "order": self._parse_order(), 2120 "limit": self._parse_limit(), 2121 }, 2122 ) 2123 2124 def _parse_uncache(self) -> exp.Uncache: 2125 if not self._match(TokenType.TABLE): 2126 self.raise_error("Expecting TABLE after UNCACHE") 2127 2128 return self.expression( 2129 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2130 ) 2131 2132 def _parse_cache(self) -> exp.Cache: 2133 lazy = self._match_text_seq("LAZY") 2134 self._match(TokenType.TABLE) 2135 table = self._parse_table(schema=True) 2136 2137 options = [] 2138 if self._match_text_seq("OPTIONS"): 2139 self._match_l_paren() 2140 k = self._parse_string() 2141 self._match(TokenType.EQ) 2142 v = self._parse_string() 2143 options = [k, v] 2144 self._match_r_paren() 2145 2146 self._match(TokenType.ALIAS) 2147 return self.expression( 2148 exp.Cache, 2149 this=table, 2150 lazy=lazy, 2151 options=options, 2152 expression=self._parse_select(nested=True), 2153 ) 2154 2155 def _parse_partition(self) -> t.Optional[exp.Partition]: 2156 if not self._match(TokenType.PARTITION): 2157 return None 2158 2159 return self.expression( 2160 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2161 ) 2162 2163 def _parse_value(self) -> exp.Tuple: 2164 if self._match(TokenType.L_PAREN): 2165 expressions = self._parse_csv(self._parse_conjunction) 2166 self._match_r_paren() 2167 return self.expression(exp.Tuple, expressions=expressions) 2168 2169 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2170 # https://prestodb.io/docs/current/sql/values.html 2171 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2172 2173 def _parse_projections(self) -> t.List[exp.Expression]: 2174 return self._parse_expressions() 2175 2176 def _parse_select( 2177 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2178 ) -> t.Optional[exp.Expression]: 2179 cte = self._parse_with() 2180 2181 if cte: 2182 this = self._parse_statement() 2183 2184 if not this: 2185 self.raise_error("Failed to parse any statement following CTE") 2186 return cte 2187 2188 if "with" in this.arg_types: 2189 this.set("with", cte) 2190 else: 2191 self.raise_error(f"{this.key} does not support CTE") 2192 this = cte 2193 2194 return this 2195 2196 # duckdb supports leading with FROM x 2197 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2198 2199 if self._match(TokenType.SELECT): 2200 comments = self._prev_comments 2201 2202 hint = self._parse_hint() 2203 all_ = self._match(TokenType.ALL) 2204 distinct = self._match_set(self.DISTINCT_TOKENS) 2205 2206 kind = ( 2207 self._match(TokenType.ALIAS) 2208 and self._match_texts(("STRUCT", "VALUE")) 2209 and self._prev.text 2210 ) 2211 2212 if distinct: 2213 distinct = self.expression( 2214 exp.Distinct, 2215 on=self._parse_value() if self._match(TokenType.ON) else None, 2216 ) 2217 2218 if all_ and distinct: 2219 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2220 2221 limit = self._parse_limit(top=True) 2222 projections = self._parse_projections() 2223 2224 this = self.expression( 2225 exp.Select, 2226 kind=kind, 2227 hint=hint, 2228 distinct=distinct, 2229 expressions=projections, 2230 limit=limit, 2231 ) 2232 this.comments = comments 2233 2234 into = self._parse_into() 2235 if into: 2236 this.set("into", into) 2237 2238 if not from_: 2239 from_ = self._parse_from() 2240 2241 if from_: 2242 this.set("from", from_) 2243 2244 this = self._parse_query_modifiers(this) 2245 elif (table or nested) and self._match(TokenType.L_PAREN): 2246 if self._match(TokenType.PIVOT): 2247 this = self._parse_simplified_pivot() 2248 elif self._match(TokenType.FROM): 2249 this = exp.select("*").from_( 2250 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2251 ) 2252 else: 2253 this = self._parse_table() if table else self._parse_select(nested=True) 2254 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2255 2256 self._match_r_paren() 2257 2258 # We return early here so that the UNION isn't attached to the subquery by the 2259 # following call to _parse_set_operations, but instead becomes the parent node 2260 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2261 elif self._match(TokenType.VALUES): 2262 this = self.expression( 2263 exp.Values, 2264 expressions=self._parse_csv(self._parse_value), 2265 alias=self._parse_table_alias(), 2266 ) 2267 elif from_: 2268 this = exp.select("*").from_(from_.this, copy=False) 2269 else: 2270 this = None 2271 2272 return self._parse_set_operations(this) 2273 2274 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2275 if not skip_with_token and not self._match(TokenType.WITH): 2276 return None 2277 2278 comments = self._prev_comments 2279 recursive = self._match(TokenType.RECURSIVE) 2280 2281 expressions = [] 2282 while True: 2283 expressions.append(self._parse_cte()) 2284 2285 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2286 break 2287 else: 2288 self._match(TokenType.WITH) 2289 2290 return self.expression( 2291 exp.With, comments=comments, expressions=expressions, recursive=recursive 2292 ) 2293 2294 def _parse_cte(self) -> exp.CTE: 2295 alias = self._parse_table_alias() 2296 if not alias or not alias.this: 2297 self.raise_error("Expected CTE to have alias") 2298 2299 self._match(TokenType.ALIAS) 2300 return self.expression( 2301 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2302 ) 2303 2304 def _parse_table_alias( 2305 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2306 ) -> t.Optional[exp.TableAlias]: 2307 any_token = self._match(TokenType.ALIAS) 2308 alias = ( 2309 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2310 or self._parse_string_as_identifier() 2311 ) 2312 2313 index = self._index 2314 if self._match(TokenType.L_PAREN): 2315 columns = self._parse_csv(self._parse_function_parameter) 2316 self._match_r_paren() if columns else self._retreat(index) 2317 else: 2318 columns = None 2319 2320 if not alias and not columns: 2321 return None 2322 2323 return self.expression(exp.TableAlias, this=alias, columns=columns) 2324 2325 def _parse_subquery( 2326 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2327 ) -> t.Optional[exp.Subquery]: 2328 if not this: 2329 return None 2330 2331 return self.expression( 2332 exp.Subquery, 2333 this=this, 2334 pivots=self._parse_pivots(), 2335 alias=self._parse_table_alias() if parse_alias else None, 2336 ) 2337 2338 def _parse_query_modifiers( 2339 self, this: t.Optional[exp.Expression] 2340 ) -> t.Optional[exp.Expression]: 2341 if isinstance(this, self.MODIFIABLES): 2342 for join in iter(self._parse_join, None): 2343 this.append("joins", join) 2344 for lateral in iter(self._parse_lateral, None): 2345 this.append("laterals", lateral) 2346 2347 while True: 2348 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2349 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2350 key, expression = parser(self) 2351 2352 if expression: 2353 this.set(key, expression) 2354 if key == "limit": 2355 offset = expression.args.pop("offset", None) 2356 if offset: 2357 this.set("offset", exp.Offset(expression=offset)) 2358 continue 2359 break 2360 return this 2361 2362 def _parse_hint(self) -> t.Optional[exp.Hint]: 2363 if self._match(TokenType.HINT): 2364 hints = [] 2365 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2366 hints.extend(hint) 2367 2368 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2369 self.raise_error("Expected */ after HINT") 2370 2371 return self.expression(exp.Hint, expressions=hints) 2372 2373 return None 2374 2375 def _parse_into(self) -> t.Optional[exp.Into]: 2376 if not self._match(TokenType.INTO): 2377 return None 2378 2379 temp = self._match(TokenType.TEMPORARY) 2380 unlogged = self._match_text_seq("UNLOGGED") 2381 self._match(TokenType.TABLE) 2382 2383 return self.expression( 2384 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2385 ) 2386 2387 def _parse_from( 2388 self, joins: bool = False, skip_from_token: bool = False 2389 ) -> t.Optional[exp.From]: 2390 if not skip_from_token and not self._match(TokenType.FROM): 2391 return None 2392 2393 return self.expression( 2394 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2395 ) 2396 2397 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2398 if not self._match(TokenType.MATCH_RECOGNIZE): 2399 return None 2400 2401 self._match_l_paren() 2402 2403 partition = self._parse_partition_by() 2404 order = self._parse_order() 2405 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2406 2407 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2408 rows = exp.var("ONE ROW PER MATCH") 2409 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2410 text = "ALL ROWS PER MATCH" 2411 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2412 text += f" SHOW EMPTY MATCHES" 2413 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2414 text += f" OMIT EMPTY MATCHES" 2415 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2416 text += f" WITH UNMATCHED ROWS" 2417 rows = exp.var(text) 2418 else: 2419 rows = None 2420 2421 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2422 text = "AFTER MATCH SKIP" 2423 if self._match_text_seq("PAST", "LAST", "ROW"): 2424 text += f" PAST LAST ROW" 2425 elif self._match_text_seq("TO", "NEXT", "ROW"): 2426 text += f" TO NEXT ROW" 2427 elif self._match_text_seq("TO", "FIRST"): 2428 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2429 elif self._match_text_seq("TO", "LAST"): 2430 text += f" TO LAST {self._advance_any().text}" # type: ignore 2431 after = exp.var(text) 2432 else: 2433 after = None 2434 2435 if self._match_text_seq("PATTERN"): 2436 self._match_l_paren() 2437 2438 if not self._curr: 2439 self.raise_error("Expecting )", self._curr) 2440 2441 paren = 1 2442 start = self._curr 2443 2444 while self._curr and paren > 0: 2445 if self._curr.token_type == TokenType.L_PAREN: 2446 paren += 1 2447 if self._curr.token_type == TokenType.R_PAREN: 2448 paren -= 1 2449 2450 end = self._prev 2451 self._advance() 2452 2453 if paren > 0: 2454 self.raise_error("Expecting )", self._curr) 2455 2456 pattern = exp.var(self._find_sql(start, end)) 2457 else: 2458 pattern = None 2459 2460 define = ( 2461 self._parse_csv( 2462 lambda: self.expression( 2463 exp.Alias, 2464 alias=self._parse_id_var(any_token=True), 2465 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2466 ) 2467 ) 2468 if self._match_text_seq("DEFINE") 2469 else None 2470 ) 2471 2472 self._match_r_paren() 2473 2474 return self.expression( 2475 exp.MatchRecognize, 2476 partition_by=partition, 2477 order=order, 2478 measures=measures, 2479 rows=rows, 2480 after=after, 2481 pattern=pattern, 2482 define=define, 2483 alias=self._parse_table_alias(), 2484 ) 2485 2486 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2487 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2488 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2489 2490 if outer_apply or cross_apply: 2491 this = self._parse_select(table=True) 2492 view = None 2493 outer = not cross_apply 2494 elif self._match(TokenType.LATERAL): 2495 this = self._parse_select(table=True) 2496 view = self._match(TokenType.VIEW) 2497 outer = self._match(TokenType.OUTER) 2498 else: 2499 return None 2500 2501 if not this: 2502 this = ( 2503 self._parse_unnest() 2504 or self._parse_function() 2505 or self._parse_id_var(any_token=False) 2506 ) 2507 2508 while self._match(TokenType.DOT): 2509 this = exp.Dot( 2510 this=this, 2511 expression=self._parse_function() or self._parse_id_var(any_token=False), 2512 ) 2513 2514 if view: 2515 table = self._parse_id_var(any_token=False) 2516 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2517 table_alias: t.Optional[exp.TableAlias] = self.expression( 2518 exp.TableAlias, this=table, columns=columns 2519 ) 2520 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2521 # We move the alias from the lateral's child node to the lateral itself 2522 table_alias = this.args["alias"].pop() 2523 else: 2524 table_alias = self._parse_table_alias() 2525 2526 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2527 2528 def _parse_join_parts( 2529 self, 2530 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2531 return ( 2532 self._match_set(self.JOIN_METHODS) and self._prev, 2533 self._match_set(self.JOIN_SIDES) and self._prev, 2534 self._match_set(self.JOIN_KINDS) and self._prev, 2535 ) 2536 2537 def _parse_join( 2538 self, skip_join_token: bool = False, parse_bracket: bool = False 2539 ) -> t.Optional[exp.Join]: 2540 if self._match(TokenType.COMMA): 2541 return self.expression(exp.Join, this=self._parse_table()) 2542 2543 index = self._index 2544 method, side, kind = self._parse_join_parts() 2545 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2546 join = self._match(TokenType.JOIN) 2547 2548 if not skip_join_token and not join: 2549 self._retreat(index) 2550 kind = None 2551 method = None 2552 side = None 2553 2554 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2555 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2556 2557 if not skip_join_token and not join and not outer_apply and not cross_apply: 2558 return None 2559 2560 if outer_apply: 2561 side = Token(TokenType.LEFT, "LEFT") 2562 2563 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2564 2565 if method: 2566 kwargs["method"] = method.text 2567 if side: 2568 kwargs["side"] = side.text 2569 if kind: 2570 kwargs["kind"] = kind.text 2571 if hint: 2572 kwargs["hint"] = hint 2573 2574 if self._match(TokenType.ON): 2575 kwargs["on"] = self._parse_conjunction() 2576 elif self._match(TokenType.USING): 2577 kwargs["using"] = self._parse_wrapped_id_vars() 2578 elif not (kind and kind.token_type == TokenType.CROSS): 2579 index = self._index 2580 join = self._parse_join() 2581 2582 if join and self._match(TokenType.ON): 2583 kwargs["on"] = self._parse_conjunction() 2584 elif join and self._match(TokenType.USING): 2585 kwargs["using"] = self._parse_wrapped_id_vars() 2586 else: 2587 join = None 2588 self._retreat(index) 2589 2590 kwargs["this"].set("joins", [join] if join else None) 2591 2592 comments = [c for token in (method, side, kind) if token for c in token.comments] 2593 return self.expression(exp.Join, comments=comments, **kwargs) 2594 2595 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2596 this = self._parse_conjunction() 2597 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2598 return this 2599 2600 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2601 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2602 2603 return this 2604 2605 def _parse_index( 2606 self, 2607 index: t.Optional[exp.Expression] = None, 2608 ) -> t.Optional[exp.Index]: 2609 if index: 2610 unique = None 2611 primary = None 2612 amp = None 2613 2614 self._match(TokenType.ON) 2615 self._match(TokenType.TABLE) # hive 2616 table = self._parse_table_parts(schema=True) 2617 else: 2618 unique = self._match(TokenType.UNIQUE) 2619 primary = self._match_text_seq("PRIMARY") 2620 amp = self._match_text_seq("AMP") 2621 2622 if not self._match(TokenType.INDEX): 2623 return None 2624 2625 index = self._parse_id_var() 2626 table = None 2627 2628 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2629 2630 if self._match(TokenType.L_PAREN, advance=False): 2631 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2632 else: 2633 columns = None 2634 2635 return self.expression( 2636 exp.Index, 2637 this=index, 2638 table=table, 2639 using=using, 2640 columns=columns, 2641 unique=unique, 2642 primary=primary, 2643 amp=amp, 2644 partition_by=self._parse_partition_by(), 2645 where=self._parse_where(), 2646 ) 2647 2648 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2649 hints: t.List[exp.Expression] = [] 2650 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2651 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2652 hints.append( 2653 self.expression( 2654 exp.WithTableHint, 2655 expressions=self._parse_csv( 2656 lambda: self._parse_function() or self._parse_var(any_token=True) 2657 ), 2658 ) 2659 ) 2660 self._match_r_paren() 2661 else: 2662 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2663 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2664 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2665 2666 self._match_texts(("INDEX", "KEY")) 2667 if self._match(TokenType.FOR): 2668 hint.set("target", self._advance_any() and self._prev.text.upper()) 2669 2670 hint.set("expressions", self._parse_wrapped_id_vars()) 2671 hints.append(hint) 2672 2673 return hints or None 2674 2675 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2676 return ( 2677 (not schema and self._parse_function(optional_parens=False)) 2678 or self._parse_id_var(any_token=False) 2679 or self._parse_string_as_identifier() 2680 or self._parse_placeholder() 2681 ) 2682 2683 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2684 catalog = None 2685 db = None 2686 table = self._parse_table_part(schema=schema) 2687 2688 while self._match(TokenType.DOT): 2689 if catalog: 2690 # This allows nesting the table in arbitrarily many dot expressions if needed 2691 table = self.expression( 2692 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2693 ) 2694 else: 2695 catalog = db 2696 db = table 2697 table = self._parse_table_part(schema=schema) 2698 2699 if not table: 2700 self.raise_error(f"Expected table name but got {self._curr}") 2701 2702 return self.expression( 2703 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2704 ) 2705 2706 def _parse_table( 2707 self, 2708 schema: bool = False, 2709 joins: bool = False, 2710 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2711 parse_bracket: bool = False, 2712 ) -> t.Optional[exp.Expression]: 2713 lateral = self._parse_lateral() 2714 if lateral: 2715 return lateral 2716 2717 unnest = self._parse_unnest() 2718 if unnest: 2719 return unnest 2720 2721 values = self._parse_derived_table_values() 2722 if values: 2723 return values 2724 2725 subquery = self._parse_select(table=True) 2726 if subquery: 2727 if not subquery.args.get("pivots"): 2728 subquery.set("pivots", self._parse_pivots()) 2729 return subquery 2730 2731 bracket = parse_bracket and self._parse_bracket(None) 2732 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2733 this = t.cast( 2734 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2735 ) 2736 2737 if schema: 2738 return self._parse_schema(this=this) 2739 2740 version = self._parse_version() 2741 2742 if version: 2743 this.set("version", version) 2744 2745 if self.ALIAS_POST_TABLESAMPLE: 2746 table_sample = self._parse_table_sample() 2747 2748 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2749 if alias: 2750 this.set("alias", alias) 2751 2752 if self._match_text_seq("AT"): 2753 this.set("index", self._parse_id_var()) 2754 2755 this.set("hints", self._parse_table_hints()) 2756 2757 if not this.args.get("pivots"): 2758 this.set("pivots", self._parse_pivots()) 2759 2760 if not self.ALIAS_POST_TABLESAMPLE: 2761 table_sample = self._parse_table_sample() 2762 2763 if table_sample: 2764 table_sample.set("this", this) 2765 this = table_sample 2766 2767 if joins: 2768 for join in iter(self._parse_join, None): 2769 this.append("joins", join) 2770 2771 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2772 this.set("ordinality", True) 2773 this.set("alias", self._parse_table_alias()) 2774 2775 return this 2776 2777 def _parse_version(self) -> t.Optional[exp.Version]: 2778 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2779 this = "TIMESTAMP" 2780 elif self._match(TokenType.VERSION_SNAPSHOT): 2781 this = "VERSION" 2782 else: 2783 return None 2784 2785 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2786 kind = self._prev.text.upper() 2787 start = self._parse_bitwise() 2788 self._match_texts(("TO", "AND")) 2789 end = self._parse_bitwise() 2790 expression: t.Optional[exp.Expression] = self.expression( 2791 exp.Tuple, expressions=[start, end] 2792 ) 2793 elif self._match_text_seq("CONTAINED", "IN"): 2794 kind = "CONTAINED IN" 2795 expression = self.expression( 2796 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2797 ) 2798 elif self._match(TokenType.ALL): 2799 kind = "ALL" 2800 expression = None 2801 else: 2802 self._match_text_seq("AS", "OF") 2803 kind = "AS OF" 2804 expression = self._parse_type() 2805 2806 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2807 2808 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2809 if not self._match(TokenType.UNNEST): 2810 return None 2811 2812 expressions = self._parse_wrapped_csv(self._parse_equality) 2813 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2814 2815 alias = self._parse_table_alias() if with_alias else None 2816 2817 if alias: 2818 if self.UNNEST_COLUMN_ONLY: 2819 if alias.args.get("columns"): 2820 self.raise_error("Unexpected extra column alias in unnest.") 2821 2822 alias.set("columns", [alias.this]) 2823 alias.set("this", None) 2824 2825 columns = alias.args.get("columns") or [] 2826 if offset and len(expressions) < len(columns): 2827 offset = columns.pop() 2828 2829 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2830 self._match(TokenType.ALIAS) 2831 offset = self._parse_id_var( 2832 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2833 ) or exp.to_identifier("offset") 2834 2835 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2836 2837 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2838 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2839 if not is_derived and not self._match(TokenType.VALUES): 2840 return None 2841 2842 expressions = self._parse_csv(self._parse_value) 2843 alias = self._parse_table_alias() 2844 2845 if is_derived: 2846 self._match_r_paren() 2847 2848 return self.expression( 2849 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2850 ) 2851 2852 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2853 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2854 as_modifier and self._match_text_seq("USING", "SAMPLE") 2855 ): 2856 return None 2857 2858 bucket_numerator = None 2859 bucket_denominator = None 2860 bucket_field = None 2861 percent = None 2862 rows = None 2863 size = None 2864 seed = None 2865 2866 kind = ( 2867 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2868 ) 2869 method = self._parse_var(tokens=(TokenType.ROW,)) 2870 2871 matched_l_paren = self._match(TokenType.L_PAREN) 2872 2873 if self.TABLESAMPLE_CSV: 2874 num = None 2875 expressions = self._parse_csv(self._parse_primary) 2876 else: 2877 expressions = None 2878 num = ( 2879 self._parse_factor() 2880 if self._match(TokenType.NUMBER, advance=False) 2881 else self._parse_primary() 2882 ) 2883 2884 if self._match_text_seq("BUCKET"): 2885 bucket_numerator = self._parse_number() 2886 self._match_text_seq("OUT", "OF") 2887 bucket_denominator = bucket_denominator = self._parse_number() 2888 self._match(TokenType.ON) 2889 bucket_field = self._parse_field() 2890 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2891 percent = num 2892 elif self._match(TokenType.ROWS): 2893 rows = num 2894 elif num: 2895 size = num 2896 2897 if matched_l_paren: 2898 self._match_r_paren() 2899 2900 if self._match(TokenType.L_PAREN): 2901 method = self._parse_var() 2902 seed = self._match(TokenType.COMMA) and self._parse_number() 2903 self._match_r_paren() 2904 elif self._match_texts(("SEED", "REPEATABLE")): 2905 seed = self._parse_wrapped(self._parse_number) 2906 2907 return self.expression( 2908 exp.TableSample, 2909 expressions=expressions, 2910 method=method, 2911 bucket_numerator=bucket_numerator, 2912 bucket_denominator=bucket_denominator, 2913 bucket_field=bucket_field, 2914 percent=percent, 2915 rows=rows, 2916 size=size, 2917 seed=seed, 2918 kind=kind, 2919 ) 2920 2921 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2922 return list(iter(self._parse_pivot, None)) or None 2923 2924 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2925 return list(iter(self._parse_join, None)) or None 2926 2927 # https://duckdb.org/docs/sql/statements/pivot 2928 def _parse_simplified_pivot(self) -> exp.Pivot: 2929 def _parse_on() -> t.Optional[exp.Expression]: 2930 this = self._parse_bitwise() 2931 return self._parse_in(this) if self._match(TokenType.IN) else this 2932 2933 this = self._parse_table() 2934 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2935 using = self._match(TokenType.USING) and self._parse_csv( 2936 lambda: self._parse_alias(self._parse_function()) 2937 ) 2938 group = self._parse_group() 2939 return self.expression( 2940 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2941 ) 2942 2943 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2944 index = self._index 2945 include_nulls = None 2946 2947 if self._match(TokenType.PIVOT): 2948 unpivot = False 2949 elif self._match(TokenType.UNPIVOT): 2950 unpivot = True 2951 2952 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2953 if self._match_text_seq("INCLUDE", "NULLS"): 2954 include_nulls = True 2955 elif self._match_text_seq("EXCLUDE", "NULLS"): 2956 include_nulls = False 2957 else: 2958 return None 2959 2960 expressions = [] 2961 field = None 2962 2963 if not self._match(TokenType.L_PAREN): 2964 self._retreat(index) 2965 return None 2966 2967 if unpivot: 2968 expressions = self._parse_csv(self._parse_column) 2969 else: 2970 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2971 2972 if not expressions: 2973 self.raise_error("Failed to parse PIVOT's aggregation list") 2974 2975 if not self._match(TokenType.FOR): 2976 self.raise_error("Expecting FOR") 2977 2978 value = self._parse_column() 2979 2980 if not self._match(TokenType.IN): 2981 self.raise_error("Expecting IN") 2982 2983 field = self._parse_in(value, alias=True) 2984 2985 self._match_r_paren() 2986 2987 pivot = self.expression( 2988 exp.Pivot, 2989 expressions=expressions, 2990 field=field, 2991 unpivot=unpivot, 2992 include_nulls=include_nulls, 2993 ) 2994 2995 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2996 pivot.set("alias", self._parse_table_alias()) 2997 2998 if not unpivot: 2999 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3000 3001 columns: t.List[exp.Expression] = [] 3002 for fld in pivot.args["field"].expressions: 3003 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3004 for name in names: 3005 if self.PREFIXED_PIVOT_COLUMNS: 3006 name = f"{name}_{field_name}" if name else field_name 3007 else: 3008 name = f"{field_name}_{name}" if name else field_name 3009 3010 columns.append(exp.to_identifier(name)) 3011 3012 pivot.set("columns", columns) 3013 3014 return pivot 3015 3016 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3017 return [agg.alias for agg in aggregations] 3018 3019 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3020 if not skip_where_token and not self._match(TokenType.WHERE): 3021 return None 3022 3023 return self.expression( 3024 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3025 ) 3026 3027 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3028 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3029 return None 3030 3031 elements = defaultdict(list) 3032 3033 if self._match(TokenType.ALL): 3034 return self.expression(exp.Group, all=True) 3035 3036 while True: 3037 expressions = self._parse_csv(self._parse_conjunction) 3038 if expressions: 3039 elements["expressions"].extend(expressions) 3040 3041 grouping_sets = self._parse_grouping_sets() 3042 if grouping_sets: 3043 elements["grouping_sets"].extend(grouping_sets) 3044 3045 rollup = None 3046 cube = None 3047 totals = None 3048 3049 index = self._index 3050 with_ = self._match(TokenType.WITH) 3051 if self._match(TokenType.ROLLUP): 3052 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3053 elements["rollup"].extend(ensure_list(rollup)) 3054 3055 if self._match(TokenType.CUBE): 3056 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3057 elements["cube"].extend(ensure_list(cube)) 3058 3059 if self._match_text_seq("TOTALS"): 3060 totals = True 3061 elements["totals"] = True # type: ignore 3062 3063 if not (grouping_sets or rollup or cube or totals): 3064 if with_: 3065 self._retreat(index) 3066 break 3067 3068 return self.expression(exp.Group, **elements) # type: ignore 3069 3070 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3071 if not self._match(TokenType.GROUPING_SETS): 3072 return None 3073 3074 return self._parse_wrapped_csv(self._parse_grouping_set) 3075 3076 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3077 if self._match(TokenType.L_PAREN): 3078 grouping_set = self._parse_csv(self._parse_column) 3079 self._match_r_paren() 3080 return self.expression(exp.Tuple, expressions=grouping_set) 3081 3082 return self._parse_column() 3083 3084 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3085 if not skip_having_token and not self._match(TokenType.HAVING): 3086 return None 3087 return self.expression(exp.Having, this=self._parse_conjunction()) 3088 3089 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3090 if not self._match(TokenType.QUALIFY): 3091 return None 3092 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3093 3094 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3095 if skip_start_token: 3096 start = None 3097 elif self._match(TokenType.START_WITH): 3098 start = self._parse_conjunction() 3099 else: 3100 return None 3101 3102 self._match(TokenType.CONNECT_BY) 3103 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3104 exp.Prior, this=self._parse_bitwise() 3105 ) 3106 connect = self._parse_conjunction() 3107 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3108 3109 if not start and self._match(TokenType.START_WITH): 3110 start = self._parse_conjunction() 3111 3112 return self.expression(exp.Connect, start=start, connect=connect) 3113 3114 def _parse_order( 3115 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3116 ) -> t.Optional[exp.Expression]: 3117 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3118 return this 3119 3120 return self.expression( 3121 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3122 ) 3123 3124 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3125 if not self._match(token): 3126 return None 3127 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3128 3129 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3130 this = parse_method() if parse_method else self._parse_conjunction() 3131 3132 asc = self._match(TokenType.ASC) 3133 desc = self._match(TokenType.DESC) or (asc and False) 3134 3135 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3136 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3137 3138 nulls_first = is_nulls_first or False 3139 explicitly_null_ordered = is_nulls_first or is_nulls_last 3140 3141 if ( 3142 not explicitly_null_ordered 3143 and ( 3144 (not desc and self.NULL_ORDERING == "nulls_are_small") 3145 or (desc and self.NULL_ORDERING != "nulls_are_small") 3146 ) 3147 and self.NULL_ORDERING != "nulls_are_last" 3148 ): 3149 nulls_first = True 3150 3151 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3152 3153 def _parse_limit( 3154 self, this: t.Optional[exp.Expression] = None, top: bool = False 3155 ) -> t.Optional[exp.Expression]: 3156 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3157 comments = self._prev_comments 3158 if top: 3159 limit_paren = self._match(TokenType.L_PAREN) 3160 expression = self._parse_number() 3161 3162 if limit_paren: 3163 self._match_r_paren() 3164 else: 3165 expression = self._parse_term() 3166 3167 if self._match(TokenType.COMMA): 3168 offset = expression 3169 expression = self._parse_term() 3170 else: 3171 offset = None 3172 3173 limit_exp = self.expression( 3174 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3175 ) 3176 3177 return limit_exp 3178 3179 if self._match(TokenType.FETCH): 3180 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3181 direction = self._prev.text if direction else "FIRST" 3182 3183 count = self._parse_field(tokens=self.FETCH_TOKENS) 3184 percent = self._match(TokenType.PERCENT) 3185 3186 self._match_set((TokenType.ROW, TokenType.ROWS)) 3187 3188 only = self._match_text_seq("ONLY") 3189 with_ties = self._match_text_seq("WITH", "TIES") 3190 3191 if only and with_ties: 3192 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3193 3194 return self.expression( 3195 exp.Fetch, 3196 direction=direction, 3197 count=count, 3198 percent=percent, 3199 with_ties=with_ties, 3200 ) 3201 3202 return this 3203 3204 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3205 if not self._match(TokenType.OFFSET): 3206 return this 3207 3208 count = self._parse_term() 3209 self._match_set((TokenType.ROW, TokenType.ROWS)) 3210 return self.expression(exp.Offset, this=this, expression=count) 3211 3212 def _parse_locks(self) -> t.List[exp.Lock]: 3213 locks = [] 3214 while True: 3215 if self._match_text_seq("FOR", "UPDATE"): 3216 update = True 3217 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3218 "LOCK", "IN", "SHARE", "MODE" 3219 ): 3220 update = False 3221 else: 3222 break 3223 3224 expressions = None 3225 if self._match_text_seq("OF"): 3226 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3227 3228 wait: t.Optional[bool | exp.Expression] = None 3229 if self._match_text_seq("NOWAIT"): 3230 wait = True 3231 elif self._match_text_seq("WAIT"): 3232 wait = self._parse_primary() 3233 elif self._match_text_seq("SKIP", "LOCKED"): 3234 wait = False 3235 3236 locks.append( 3237 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3238 ) 3239 3240 return locks 3241 3242 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3243 if not self._match_set(self.SET_OPERATIONS): 3244 return this 3245 3246 token_type = self._prev.token_type 3247 3248 if token_type == TokenType.UNION: 3249 expression = exp.Union 3250 elif token_type == TokenType.EXCEPT: 3251 expression = exp.Except 3252 else: 3253 expression = exp.Intersect 3254 3255 return self.expression( 3256 expression, 3257 comments=self._prev.comments, 3258 this=this, 3259 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3260 by_name=self._match_text_seq("BY", "NAME"), 3261 expression=self._parse_set_operations(self._parse_select(nested=True)), 3262 ) 3263 3264 def _parse_expression(self) -> t.Optional[exp.Expression]: 3265 return self._parse_alias(self._parse_conjunction()) 3266 3267 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3268 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3269 3270 def _parse_equality(self) -> t.Optional[exp.Expression]: 3271 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3272 3273 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3274 return self._parse_tokens(self._parse_range, self.COMPARISON) 3275 3276 def _parse_range(self) -> t.Optional[exp.Expression]: 3277 this = self._parse_bitwise() 3278 negate = self._match(TokenType.NOT) 3279 3280 if self._match_set(self.RANGE_PARSERS): 3281 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3282 if not expression: 3283 return this 3284 3285 this = expression 3286 elif self._match(TokenType.ISNULL): 3287 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3288 3289 # Postgres supports ISNULL and NOTNULL for conditions. 3290 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3291 if self._match(TokenType.NOTNULL): 3292 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3293 this = self.expression(exp.Not, this=this) 3294 3295 if negate: 3296 this = self.expression(exp.Not, this=this) 3297 3298 if self._match(TokenType.IS): 3299 this = self._parse_is(this) 3300 3301 return this 3302 3303 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3304 index = self._index - 1 3305 negate = self._match(TokenType.NOT) 3306 3307 if self._match_text_seq("DISTINCT", "FROM"): 3308 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3309 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3310 3311 expression = self._parse_null() or self._parse_boolean() 3312 if not expression: 3313 self._retreat(index) 3314 return None 3315 3316 this = self.expression(exp.Is, this=this, expression=expression) 3317 return self.expression(exp.Not, this=this) if negate else this 3318 3319 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3320 unnest = self._parse_unnest(with_alias=False) 3321 if unnest: 3322 this = self.expression(exp.In, this=this, unnest=unnest) 3323 elif self._match(TokenType.L_PAREN): 3324 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3325 3326 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3327 this = self.expression(exp.In, this=this, query=expressions[0]) 3328 else: 3329 this = self.expression(exp.In, this=this, expressions=expressions) 3330 3331 self._match_r_paren(this) 3332 else: 3333 this = self.expression(exp.In, this=this, field=self._parse_field()) 3334 3335 return this 3336 3337 def _parse_between(self, this: exp.Expression) -> exp.Between: 3338 low = self._parse_bitwise() 3339 self._match(TokenType.AND) 3340 high = self._parse_bitwise() 3341 return self.expression(exp.Between, this=this, low=low, high=high) 3342 3343 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3344 if not self._match(TokenType.ESCAPE): 3345 return this 3346 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3347 3348 def _parse_interval(self) -> t.Optional[exp.Interval]: 3349 index = self._index 3350 3351 if not self._match(TokenType.INTERVAL): 3352 return None 3353 3354 if self._match(TokenType.STRING, advance=False): 3355 this = self._parse_primary() 3356 else: 3357 this = self._parse_term() 3358 3359 if not this: 3360 self._retreat(index) 3361 return None 3362 3363 unit = self._parse_function() or self._parse_var(any_token=True) 3364 3365 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3366 # each INTERVAL expression into this canonical form so it's easy to transpile 3367 if this and this.is_number: 3368 this = exp.Literal.string(this.name) 3369 elif this and this.is_string: 3370 parts = this.name.split() 3371 3372 if len(parts) == 2: 3373 if unit: 3374 # This is not actually a unit, it's something else (e.g. a "window side") 3375 unit = None 3376 self._retreat(self._index - 1) 3377 3378 this = exp.Literal.string(parts[0]) 3379 unit = self.expression(exp.Var, this=parts[1]) 3380 3381 return self.expression(exp.Interval, this=this, unit=unit) 3382 3383 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3384 this = self._parse_term() 3385 3386 while True: 3387 if self._match_set(self.BITWISE): 3388 this = self.expression( 3389 self.BITWISE[self._prev.token_type], 3390 this=this, 3391 expression=self._parse_term(), 3392 ) 3393 elif self._match(TokenType.DQMARK): 3394 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3395 elif self._match_pair(TokenType.LT, TokenType.LT): 3396 this = self.expression( 3397 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3398 ) 3399 elif self._match_pair(TokenType.GT, TokenType.GT): 3400 this = self.expression( 3401 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3402 ) 3403 else: 3404 break 3405 3406 return this 3407 3408 def _parse_term(self) -> t.Optional[exp.Expression]: 3409 return self._parse_tokens(self._parse_factor, self.TERM) 3410 3411 def _parse_factor(self) -> t.Optional[exp.Expression]: 3412 if self.EXPONENT: 3413 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3414 else: 3415 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3416 if isinstance(factor, exp.Div): 3417 factor.args["typed"] = self.TYPED_DIVISION 3418 factor.args["safe"] = self.SAFE_DIVISION 3419 return factor 3420 3421 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3422 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3423 3424 def _parse_unary(self) -> t.Optional[exp.Expression]: 3425 if self._match_set(self.UNARY_PARSERS): 3426 return self.UNARY_PARSERS[self._prev.token_type](self) 3427 return self._parse_at_time_zone(self._parse_type()) 3428 3429 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3430 interval = parse_interval and self._parse_interval() 3431 if interval: 3432 return interval 3433 3434 index = self._index 3435 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3436 this = self._parse_column() 3437 3438 if data_type: 3439 if isinstance(this, exp.Literal): 3440 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3441 if parser: 3442 return parser(self, this, data_type) 3443 return self.expression(exp.Cast, this=this, to=data_type) 3444 if not data_type.expressions: 3445 self._retreat(index) 3446 return self._parse_column() 3447 return self._parse_column_ops(data_type) 3448 3449 return this and self._parse_column_ops(this) 3450 3451 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3452 this = self._parse_type() 3453 if not this: 3454 return None 3455 3456 return self.expression( 3457 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3458 ) 3459 3460 def _parse_types( 3461 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3462 ) -> t.Optional[exp.Expression]: 3463 index = self._index 3464 3465 prefix = self._match_text_seq("SYSUDTLIB", ".") 3466 3467 if not self._match_set(self.TYPE_TOKENS): 3468 identifier = allow_identifiers and self._parse_id_var( 3469 any_token=False, tokens=(TokenType.VAR,) 3470 ) 3471 3472 if identifier: 3473 tokens = self._tokenizer.tokenize(identifier.name) 3474 3475 if len(tokens) != 1: 3476 self.raise_error("Unexpected identifier", self._prev) 3477 3478 if tokens[0].token_type in self.TYPE_TOKENS: 3479 self._prev = tokens[0] 3480 elif self.SUPPORTS_USER_DEFINED_TYPES: 3481 type_name = identifier.name 3482 3483 while self._match(TokenType.DOT): 3484 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3485 3486 return exp.DataType.build(type_name, udt=True) 3487 else: 3488 return None 3489 else: 3490 return None 3491 3492 type_token = self._prev.token_type 3493 3494 if type_token == TokenType.PSEUDO_TYPE: 3495 return self.expression(exp.PseudoType, this=self._prev.text) 3496 3497 if type_token == TokenType.OBJECT_IDENTIFIER: 3498 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3499 3500 nested = type_token in self.NESTED_TYPE_TOKENS 3501 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3502 expressions = None 3503 maybe_func = False 3504 3505 if self._match(TokenType.L_PAREN): 3506 if is_struct: 3507 expressions = self._parse_csv(self._parse_struct_types) 3508 elif nested: 3509 expressions = self._parse_csv( 3510 lambda: self._parse_types( 3511 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3512 ) 3513 ) 3514 elif type_token in self.ENUM_TYPE_TOKENS: 3515 expressions = self._parse_csv(self._parse_equality) 3516 else: 3517 expressions = self._parse_csv(self._parse_type_size) 3518 3519 if not expressions or not self._match(TokenType.R_PAREN): 3520 self._retreat(index) 3521 return None 3522 3523 maybe_func = True 3524 3525 this: t.Optional[exp.Expression] = None 3526 values: t.Optional[t.List[exp.Expression]] = None 3527 3528 if nested and self._match(TokenType.LT): 3529 if is_struct: 3530 expressions = self._parse_csv(self._parse_struct_types) 3531 else: 3532 expressions = self._parse_csv( 3533 lambda: self._parse_types( 3534 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3535 ) 3536 ) 3537 3538 if not self._match(TokenType.GT): 3539 self.raise_error("Expecting >") 3540 3541 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3542 values = self._parse_csv(self._parse_conjunction) 3543 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3544 3545 if type_token in self.TIMESTAMPS: 3546 if self._match_text_seq("WITH", "TIME", "ZONE"): 3547 maybe_func = False 3548 tz_type = ( 3549 exp.DataType.Type.TIMETZ 3550 if type_token in self.TIMES 3551 else exp.DataType.Type.TIMESTAMPTZ 3552 ) 3553 this = exp.DataType(this=tz_type, expressions=expressions) 3554 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3555 maybe_func = False 3556 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3557 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3558 maybe_func = False 3559 elif type_token == TokenType.INTERVAL: 3560 unit = self._parse_var() 3561 3562 if self._match_text_seq("TO"): 3563 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3564 else: 3565 span = None 3566 3567 if span or not unit: 3568 this = self.expression( 3569 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3570 ) 3571 else: 3572 this = self.expression(exp.Interval, unit=unit) 3573 3574 if maybe_func and check_func: 3575 index2 = self._index 3576 peek = self._parse_string() 3577 3578 if not peek: 3579 self._retreat(index) 3580 return None 3581 3582 self._retreat(index2) 3583 3584 if not this: 3585 if self._match_text_seq("UNSIGNED"): 3586 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3587 if not unsigned_type_token: 3588 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3589 3590 type_token = unsigned_type_token or type_token 3591 3592 this = exp.DataType( 3593 this=exp.DataType.Type[type_token.value], 3594 expressions=expressions, 3595 nested=nested, 3596 values=values, 3597 prefix=prefix, 3598 ) 3599 3600 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3601 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3602 3603 return this 3604 3605 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3606 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3607 self._match(TokenType.COLON) 3608 return self._parse_column_def(this) 3609 3610 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3611 if not self._match_text_seq("AT", "TIME", "ZONE"): 3612 return this 3613 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3614 3615 def _parse_column(self) -> t.Optional[exp.Expression]: 3616 this = self._parse_field() 3617 if isinstance(this, exp.Identifier): 3618 this = self.expression(exp.Column, this=this) 3619 elif not this: 3620 return self._parse_bracket(this) 3621 return self._parse_column_ops(this) 3622 3623 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3624 this = self._parse_bracket(this) 3625 3626 while self._match_set(self.COLUMN_OPERATORS): 3627 op_token = self._prev.token_type 3628 op = self.COLUMN_OPERATORS.get(op_token) 3629 3630 if op_token == TokenType.DCOLON: 3631 field = self._parse_types() 3632 if not field: 3633 self.raise_error("Expected type") 3634 elif op and self._curr: 3635 self._advance() 3636 value = self._prev.text 3637 field = ( 3638 exp.Literal.number(value) 3639 if self._prev.token_type == TokenType.NUMBER 3640 else exp.Literal.string(value) 3641 ) 3642 else: 3643 field = self._parse_field(anonymous_func=True, any_token=True) 3644 3645 if isinstance(field, exp.Func): 3646 # bigquery allows function calls like x.y.count(...) 3647 # SAFE.SUBSTR(...) 3648 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3649 this = self._replace_columns_with_dots(this) 3650 3651 if op: 3652 this = op(self, this, field) 3653 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3654 this = self.expression( 3655 exp.Column, 3656 this=field, 3657 table=this.this, 3658 db=this.args.get("table"), 3659 catalog=this.args.get("db"), 3660 ) 3661 else: 3662 this = self.expression(exp.Dot, this=this, expression=field) 3663 this = self._parse_bracket(this) 3664 return this 3665 3666 def _parse_primary(self) -> t.Optional[exp.Expression]: 3667 if self._match_set(self.PRIMARY_PARSERS): 3668 token_type = self._prev.token_type 3669 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3670 3671 if token_type == TokenType.STRING: 3672 expressions = [primary] 3673 while self._match(TokenType.STRING): 3674 expressions.append(exp.Literal.string(self._prev.text)) 3675 3676 if len(expressions) > 1: 3677 return self.expression(exp.Concat, expressions=expressions) 3678 3679 return primary 3680 3681 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3682 return exp.Literal.number(f"0.{self._prev.text}") 3683 3684 if self._match(TokenType.L_PAREN): 3685 comments = self._prev_comments 3686 query = self._parse_select() 3687 3688 if query: 3689 expressions = [query] 3690 else: 3691 expressions = self._parse_expressions() 3692 3693 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3694 3695 if isinstance(this, exp.Subqueryable): 3696 this = self._parse_set_operations( 3697 self._parse_subquery(this=this, parse_alias=False) 3698 ) 3699 elif len(expressions) > 1: 3700 this = self.expression(exp.Tuple, expressions=expressions) 3701 else: 3702 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3703 3704 if this: 3705 this.add_comments(comments) 3706 3707 self._match_r_paren(expression=this) 3708 return this 3709 3710 return None 3711 3712 def _parse_field( 3713 self, 3714 any_token: bool = False, 3715 tokens: t.Optional[t.Collection[TokenType]] = None, 3716 anonymous_func: bool = False, 3717 ) -> t.Optional[exp.Expression]: 3718 return ( 3719 self._parse_primary() 3720 or self._parse_function(anonymous=anonymous_func) 3721 or self._parse_id_var(any_token=any_token, tokens=tokens) 3722 ) 3723 3724 def _parse_function( 3725 self, 3726 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3727 anonymous: bool = False, 3728 optional_parens: bool = True, 3729 ) -> t.Optional[exp.Expression]: 3730 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3731 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3732 fn_syntax = False 3733 if ( 3734 self._match(TokenType.L_BRACE, advance=False) 3735 and self._next 3736 and self._next.text.upper() == "FN" 3737 ): 3738 self._advance(2) 3739 fn_syntax = True 3740 3741 func = self._parse_function_call( 3742 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3743 ) 3744 3745 if fn_syntax: 3746 self._match(TokenType.R_BRACE) 3747 3748 return func 3749 3750 def _parse_function_call( 3751 self, 3752 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3753 anonymous: bool = False, 3754 optional_parens: bool = True, 3755 ) -> t.Optional[exp.Expression]: 3756 if not self._curr: 3757 return None 3758 3759 token_type = self._curr.token_type 3760 this = self._curr.text 3761 upper = this.upper() 3762 3763 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3764 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3765 self._advance() 3766 return parser(self) 3767 3768 if not self._next or self._next.token_type != TokenType.L_PAREN: 3769 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3770 self._advance() 3771 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3772 3773 return None 3774 3775 if token_type not in self.FUNC_TOKENS: 3776 return None 3777 3778 self._advance(2) 3779 3780 parser = self.FUNCTION_PARSERS.get(upper) 3781 if parser and not anonymous: 3782 this = parser(self) 3783 else: 3784 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3785 3786 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3787 this = self.expression(subquery_predicate, this=self._parse_select()) 3788 self._match_r_paren() 3789 return this 3790 3791 if functions is None: 3792 functions = self.FUNCTIONS 3793 3794 function = functions.get(upper) 3795 3796 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3797 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3798 3799 if function and not anonymous: 3800 func = self.validate_expression(function(args), args) 3801 if not self.NORMALIZE_FUNCTIONS: 3802 func.meta["name"] = this 3803 this = func 3804 else: 3805 this = self.expression(exp.Anonymous, this=this, expressions=args) 3806 3807 self._match_r_paren(this) 3808 return self._parse_window(this) 3809 3810 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3811 return self._parse_column_def(self._parse_id_var()) 3812 3813 def _parse_user_defined_function( 3814 self, kind: t.Optional[TokenType] = None 3815 ) -> t.Optional[exp.Expression]: 3816 this = self._parse_id_var() 3817 3818 while self._match(TokenType.DOT): 3819 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3820 3821 if not self._match(TokenType.L_PAREN): 3822 return this 3823 3824 expressions = self._parse_csv(self._parse_function_parameter) 3825 self._match_r_paren() 3826 return self.expression( 3827 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3828 ) 3829 3830 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3831 literal = self._parse_primary() 3832 if literal: 3833 return self.expression(exp.Introducer, this=token.text, expression=literal) 3834 3835 return self.expression(exp.Identifier, this=token.text) 3836 3837 def _parse_session_parameter(self) -> exp.SessionParameter: 3838 kind = None 3839 this = self._parse_id_var() or self._parse_primary() 3840 3841 if this and self._match(TokenType.DOT): 3842 kind = this.name 3843 this = self._parse_var() or self._parse_primary() 3844 3845 return self.expression(exp.SessionParameter, this=this, kind=kind) 3846 3847 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3848 index = self._index 3849 3850 if self._match(TokenType.L_PAREN): 3851 expressions = t.cast( 3852 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3853 ) 3854 3855 if not self._match(TokenType.R_PAREN): 3856 self._retreat(index) 3857 else: 3858 expressions = [self._parse_id_var()] 3859 3860 if self._match_set(self.LAMBDAS): 3861 return self.LAMBDAS[self._prev.token_type](self, expressions) 3862 3863 self._retreat(index) 3864 3865 this: t.Optional[exp.Expression] 3866 3867 if self._match(TokenType.DISTINCT): 3868 this = self.expression( 3869 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3870 ) 3871 else: 3872 this = self._parse_select_or_expression(alias=alias) 3873 3874 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3875 3876 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3877 index = self._index 3878 3879 if not self.errors: 3880 try: 3881 if self._parse_select(nested=True): 3882 return this 3883 except ParseError: 3884 pass 3885 finally: 3886 self.errors.clear() 3887 self._retreat(index) 3888 3889 if not self._match(TokenType.L_PAREN): 3890 return this 3891 3892 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3893 3894 self._match_r_paren() 3895 return self.expression(exp.Schema, this=this, expressions=args) 3896 3897 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3898 return self._parse_column_def(self._parse_field(any_token=True)) 3899 3900 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3901 # column defs are not really columns, they're identifiers 3902 if isinstance(this, exp.Column): 3903 this = this.this 3904 3905 kind = self._parse_types(schema=True) 3906 3907 if self._match_text_seq("FOR", "ORDINALITY"): 3908 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3909 3910 constraints: t.List[exp.Expression] = [] 3911 3912 if not kind and self._match(TokenType.ALIAS): 3913 constraints.append( 3914 self.expression( 3915 exp.ComputedColumnConstraint, 3916 this=self._parse_conjunction(), 3917 persisted=self._match_text_seq("PERSISTED"), 3918 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3919 ) 3920 ) 3921 3922 while True: 3923 constraint = self._parse_column_constraint() 3924 if not constraint: 3925 break 3926 constraints.append(constraint) 3927 3928 if not kind and not constraints: 3929 return this 3930 3931 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3932 3933 def _parse_auto_increment( 3934 self, 3935 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3936 start = None 3937 increment = None 3938 3939 if self._match(TokenType.L_PAREN, advance=False): 3940 args = self._parse_wrapped_csv(self._parse_bitwise) 3941 start = seq_get(args, 0) 3942 increment = seq_get(args, 1) 3943 elif self._match_text_seq("START"): 3944 start = self._parse_bitwise() 3945 self._match_text_seq("INCREMENT") 3946 increment = self._parse_bitwise() 3947 3948 if start and increment: 3949 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3950 3951 return exp.AutoIncrementColumnConstraint() 3952 3953 def _parse_compress(self) -> exp.CompressColumnConstraint: 3954 if self._match(TokenType.L_PAREN, advance=False): 3955 return self.expression( 3956 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3957 ) 3958 3959 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3960 3961 def _parse_generated_as_identity( 3962 self, 3963 ) -> ( 3964 exp.GeneratedAsIdentityColumnConstraint 3965 | exp.ComputedColumnConstraint 3966 | exp.GeneratedAsRowColumnConstraint 3967 ): 3968 if self._match_text_seq("BY", "DEFAULT"): 3969 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3970 this = self.expression( 3971 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3972 ) 3973 else: 3974 self._match_text_seq("ALWAYS") 3975 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3976 3977 self._match(TokenType.ALIAS) 3978 3979 if self._match_text_seq("ROW"): 3980 start = self._match_text_seq("START") 3981 if not start: 3982 self._match(TokenType.END) 3983 hidden = self._match_text_seq("HIDDEN") 3984 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 3985 3986 identity = self._match_text_seq("IDENTITY") 3987 3988 if self._match(TokenType.L_PAREN): 3989 if self._match(TokenType.START_WITH): 3990 this.set("start", self._parse_bitwise()) 3991 if self._match_text_seq("INCREMENT", "BY"): 3992 this.set("increment", self._parse_bitwise()) 3993 if self._match_text_seq("MINVALUE"): 3994 this.set("minvalue", self._parse_bitwise()) 3995 if self._match_text_seq("MAXVALUE"): 3996 this.set("maxvalue", self._parse_bitwise()) 3997 3998 if self._match_text_seq("CYCLE"): 3999 this.set("cycle", True) 4000 elif self._match_text_seq("NO", "CYCLE"): 4001 this.set("cycle", False) 4002 4003 if not identity: 4004 this.set("expression", self._parse_bitwise()) 4005 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4006 args = self._parse_csv(self._parse_bitwise) 4007 this.set("start", seq_get(args, 0)) 4008 this.set("increment", seq_get(args, 1)) 4009 4010 self._match_r_paren() 4011 4012 return this 4013 4014 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4015 self._match_text_seq("LENGTH") 4016 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4017 4018 def _parse_not_constraint( 4019 self, 4020 ) -> t.Optional[exp.Expression]: 4021 if self._match_text_seq("NULL"): 4022 return self.expression(exp.NotNullColumnConstraint) 4023 if self._match_text_seq("CASESPECIFIC"): 4024 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4025 if self._match_text_seq("FOR", "REPLICATION"): 4026 return self.expression(exp.NotForReplicationColumnConstraint) 4027 return None 4028 4029 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4030 if self._match(TokenType.CONSTRAINT): 4031 this = self._parse_id_var() 4032 else: 4033 this = None 4034 4035 if self._match_texts(self.CONSTRAINT_PARSERS): 4036 return self.expression( 4037 exp.ColumnConstraint, 4038 this=this, 4039 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4040 ) 4041 4042 return this 4043 4044 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4045 if not self._match(TokenType.CONSTRAINT): 4046 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4047 4048 this = self._parse_id_var() 4049 expressions = [] 4050 4051 while True: 4052 constraint = self._parse_unnamed_constraint() or self._parse_function() 4053 if not constraint: 4054 break 4055 expressions.append(constraint) 4056 4057 return self.expression(exp.Constraint, this=this, expressions=expressions) 4058 4059 def _parse_unnamed_constraint( 4060 self, constraints: t.Optional[t.Collection[str]] = None 4061 ) -> t.Optional[exp.Expression]: 4062 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4063 constraints or self.CONSTRAINT_PARSERS 4064 ): 4065 return None 4066 4067 constraint = self._prev.text.upper() 4068 if constraint not in self.CONSTRAINT_PARSERS: 4069 self.raise_error(f"No parser found for schema constraint {constraint}.") 4070 4071 return self.CONSTRAINT_PARSERS[constraint](self) 4072 4073 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4074 self._match_text_seq("KEY") 4075 return self.expression( 4076 exp.UniqueColumnConstraint, 4077 this=self._parse_schema(self._parse_id_var(any_token=False)), 4078 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4079 ) 4080 4081 def _parse_key_constraint_options(self) -> t.List[str]: 4082 options = [] 4083 while True: 4084 if not self._curr: 4085 break 4086 4087 if self._match(TokenType.ON): 4088 action = None 4089 on = self._advance_any() and self._prev.text 4090 4091 if self._match_text_seq("NO", "ACTION"): 4092 action = "NO ACTION" 4093 elif self._match_text_seq("CASCADE"): 4094 action = "CASCADE" 4095 elif self._match_text_seq("RESTRICT"): 4096 action = "RESTRICT" 4097 elif self._match_pair(TokenType.SET, TokenType.NULL): 4098 action = "SET NULL" 4099 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4100 action = "SET DEFAULT" 4101 else: 4102 self.raise_error("Invalid key constraint") 4103 4104 options.append(f"ON {on} {action}") 4105 elif self._match_text_seq("NOT", "ENFORCED"): 4106 options.append("NOT ENFORCED") 4107 elif self._match_text_seq("DEFERRABLE"): 4108 options.append("DEFERRABLE") 4109 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4110 options.append("INITIALLY DEFERRED") 4111 elif self._match_text_seq("NORELY"): 4112 options.append("NORELY") 4113 elif self._match_text_seq("MATCH", "FULL"): 4114 options.append("MATCH FULL") 4115 else: 4116 break 4117 4118 return options 4119 4120 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4121 if match and not self._match(TokenType.REFERENCES): 4122 return None 4123 4124 expressions = None 4125 this = self._parse_table(schema=True) 4126 options = self._parse_key_constraint_options() 4127 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4128 4129 def _parse_foreign_key(self) -> exp.ForeignKey: 4130 expressions = self._parse_wrapped_id_vars() 4131 reference = self._parse_references() 4132 options = {} 4133 4134 while self._match(TokenType.ON): 4135 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4136 self.raise_error("Expected DELETE or UPDATE") 4137 4138 kind = self._prev.text.lower() 4139 4140 if self._match_text_seq("NO", "ACTION"): 4141 action = "NO ACTION" 4142 elif self._match(TokenType.SET): 4143 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4144 action = "SET " + self._prev.text.upper() 4145 else: 4146 self._advance() 4147 action = self._prev.text.upper() 4148 4149 options[kind] = action 4150 4151 return self.expression( 4152 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4153 ) 4154 4155 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4156 return self._parse_field() 4157 4158 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4159 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4160 4161 id_vars = self._parse_wrapped_id_vars() 4162 return self.expression( 4163 exp.PeriodForSystemTimeConstraint, 4164 this=seq_get(id_vars, 0), 4165 expression=seq_get(id_vars, 1), 4166 ) 4167 4168 def _parse_primary_key( 4169 self, wrapped_optional: bool = False, in_props: bool = False 4170 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4171 desc = ( 4172 self._match_set((TokenType.ASC, TokenType.DESC)) 4173 and self._prev.token_type == TokenType.DESC 4174 ) 4175 4176 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4177 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4178 4179 expressions = self._parse_wrapped_csv( 4180 self._parse_primary_key_part, optional=wrapped_optional 4181 ) 4182 options = self._parse_key_constraint_options() 4183 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4184 4185 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4186 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4187 return this 4188 4189 bracket_kind = self._prev.token_type 4190 4191 if self._match(TokenType.COLON): 4192 expressions: t.List[exp.Expression] = [ 4193 self.expression(exp.Slice, expression=self._parse_conjunction()) 4194 ] 4195 else: 4196 expressions = self._parse_csv( 4197 lambda: self._parse_slice( 4198 self._parse_alias(self._parse_conjunction(), explicit=True) 4199 ) 4200 ) 4201 4202 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4203 self.raise_error("Expected ]") 4204 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4205 self.raise_error("Expected }") 4206 4207 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4208 if bracket_kind == TokenType.L_BRACE: 4209 this = self.expression(exp.Struct, expressions=expressions) 4210 elif not this or this.name.upper() == "ARRAY": 4211 this = self.expression(exp.Array, expressions=expressions) 4212 else: 4213 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4214 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4215 4216 self._add_comments(this) 4217 return self._parse_bracket(this) 4218 4219 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4220 if self._match(TokenType.COLON): 4221 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4222 return this 4223 4224 def _parse_case(self) -> t.Optional[exp.Expression]: 4225 ifs = [] 4226 default = None 4227 4228 comments = self._prev_comments 4229 expression = self._parse_conjunction() 4230 4231 while self._match(TokenType.WHEN): 4232 this = self._parse_conjunction() 4233 self._match(TokenType.THEN) 4234 then = self._parse_conjunction() 4235 ifs.append(self.expression(exp.If, this=this, true=then)) 4236 4237 if self._match(TokenType.ELSE): 4238 default = self._parse_conjunction() 4239 4240 if not self._match(TokenType.END): 4241 self.raise_error("Expected END after CASE", self._prev) 4242 4243 return self._parse_window( 4244 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4245 ) 4246 4247 def _parse_if(self) -> t.Optional[exp.Expression]: 4248 if self._match(TokenType.L_PAREN): 4249 args = self._parse_csv(self._parse_conjunction) 4250 this = self.validate_expression(exp.If.from_arg_list(args), args) 4251 self._match_r_paren() 4252 else: 4253 index = self._index - 1 4254 condition = self._parse_conjunction() 4255 4256 if not condition: 4257 self._retreat(index) 4258 return None 4259 4260 self._match(TokenType.THEN) 4261 true = self._parse_conjunction() 4262 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4263 self._match(TokenType.END) 4264 this = self.expression(exp.If, this=condition, true=true, false=false) 4265 4266 return self._parse_window(this) 4267 4268 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4269 if not self._match_text_seq("VALUE", "FOR"): 4270 self._retreat(self._index - 1) 4271 return None 4272 4273 return self.expression( 4274 exp.NextValueFor, 4275 this=self._parse_column(), 4276 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4277 ) 4278 4279 def _parse_extract(self) -> exp.Extract: 4280 this = self._parse_function() or self._parse_var() or self._parse_type() 4281 4282 if self._match(TokenType.FROM): 4283 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4284 4285 if not self._match(TokenType.COMMA): 4286 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4287 4288 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4289 4290 def _parse_any_value(self) -> exp.AnyValue: 4291 this = self._parse_lambda() 4292 is_max = None 4293 having = None 4294 4295 if self._match(TokenType.HAVING): 4296 self._match_texts(("MAX", "MIN")) 4297 is_max = self._prev.text == "MAX" 4298 having = self._parse_column() 4299 4300 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4301 4302 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4303 this = self._parse_conjunction() 4304 4305 if not self._match(TokenType.ALIAS): 4306 if self._match(TokenType.COMMA): 4307 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4308 4309 self.raise_error("Expected AS after CAST") 4310 4311 fmt = None 4312 to = self._parse_types() 4313 4314 if self._match(TokenType.FORMAT): 4315 fmt_string = self._parse_string() 4316 fmt = self._parse_at_time_zone(fmt_string) 4317 4318 if not to: 4319 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4320 if to.this in exp.DataType.TEMPORAL_TYPES: 4321 this = self.expression( 4322 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4323 this=this, 4324 format=exp.Literal.string( 4325 format_time( 4326 fmt_string.this if fmt_string else "", 4327 self.FORMAT_MAPPING or self.TIME_MAPPING, 4328 self.FORMAT_TRIE or self.TIME_TRIE, 4329 ) 4330 ), 4331 ) 4332 4333 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4334 this.set("zone", fmt.args["zone"]) 4335 return this 4336 elif not to: 4337 self.raise_error("Expected TYPE after CAST") 4338 elif isinstance(to, exp.Identifier): 4339 to = exp.DataType.build(to.name, udt=True) 4340 elif to.this == exp.DataType.Type.CHAR: 4341 if self._match(TokenType.CHARACTER_SET): 4342 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4343 4344 return self.expression( 4345 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4346 ) 4347 4348 def _parse_concat(self) -> t.Optional[exp.Expression]: 4349 args = self._parse_csv(self._parse_conjunction) 4350 if self.CONCAT_NULL_OUTPUTS_STRING: 4351 args = self._ensure_string_if_null(args) 4352 4353 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4354 # we find such a call we replace it with its argument. 4355 if len(args) == 1: 4356 return args[0] 4357 4358 return self.expression( 4359 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4360 ) 4361 4362 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4363 args = self._parse_csv(self._parse_conjunction) 4364 if len(args) < 2: 4365 return self.expression(exp.ConcatWs, expressions=args) 4366 delim, *values = args 4367 if self.CONCAT_NULL_OUTPUTS_STRING: 4368 values = self._ensure_string_if_null(values) 4369 4370 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4371 4372 def _parse_string_agg(self) -> exp.Expression: 4373 if self._match(TokenType.DISTINCT): 4374 args: t.List[t.Optional[exp.Expression]] = [ 4375 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4376 ] 4377 if self._match(TokenType.COMMA): 4378 args.extend(self._parse_csv(self._parse_conjunction)) 4379 else: 4380 args = self._parse_csv(self._parse_conjunction) # type: ignore 4381 4382 index = self._index 4383 if not self._match(TokenType.R_PAREN) and args: 4384 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4385 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4386 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4387 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4388 4389 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4390 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4391 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4392 if not self._match_text_seq("WITHIN", "GROUP"): 4393 self._retreat(index) 4394 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4395 4396 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4397 order = self._parse_order(this=seq_get(args, 0)) 4398 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4399 4400 def _parse_convert( 4401 self, strict: bool, safe: t.Optional[bool] = None 4402 ) -> t.Optional[exp.Expression]: 4403 this = self._parse_bitwise() 4404 4405 if self._match(TokenType.USING): 4406 to: t.Optional[exp.Expression] = self.expression( 4407 exp.CharacterSet, this=self._parse_var() 4408 ) 4409 elif self._match(TokenType.COMMA): 4410 to = self._parse_types() 4411 else: 4412 to = None 4413 4414 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4415 4416 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4417 """ 4418 There are generally two variants of the DECODE function: 4419 4420 - DECODE(bin, charset) 4421 - DECODE(expression, search, result [, search, result] ... [, default]) 4422 4423 The second variant will always be parsed into a CASE expression. Note that NULL 4424 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4425 instead of relying on pattern matching. 4426 """ 4427 args = self._parse_csv(self._parse_conjunction) 4428 4429 if len(args) < 3: 4430 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4431 4432 expression, *expressions = args 4433 if not expression: 4434 return None 4435 4436 ifs = [] 4437 for search, result in zip(expressions[::2], expressions[1::2]): 4438 if not search or not result: 4439 return None 4440 4441 if isinstance(search, exp.Literal): 4442 ifs.append( 4443 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4444 ) 4445 elif isinstance(search, exp.Null): 4446 ifs.append( 4447 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4448 ) 4449 else: 4450 cond = exp.or_( 4451 exp.EQ(this=expression.copy(), expression=search), 4452 exp.and_( 4453 exp.Is(this=expression.copy(), expression=exp.Null()), 4454 exp.Is(this=search.copy(), expression=exp.Null()), 4455 copy=False, 4456 ), 4457 copy=False, 4458 ) 4459 ifs.append(exp.If(this=cond, true=result)) 4460 4461 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4462 4463 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4464 self._match_text_seq("KEY") 4465 key = self._parse_column() 4466 self._match_set((TokenType.COLON, TokenType.COMMA)) 4467 self._match_text_seq("VALUE") 4468 value = self._parse_bitwise() 4469 4470 if not key and not value: 4471 return None 4472 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4473 4474 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4475 if not this or not self._match_text_seq("FORMAT", "JSON"): 4476 return this 4477 4478 return self.expression(exp.FormatJson, this=this) 4479 4480 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4481 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4482 for value in values: 4483 if self._match_text_seq(value, "ON", on): 4484 return f"{value} ON {on}" 4485 4486 return None 4487 4488 def _parse_json_object(self) -> exp.JSONObject: 4489 star = self._parse_star() 4490 expressions = ( 4491 [star] 4492 if star 4493 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4494 ) 4495 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4496 4497 unique_keys = None 4498 if self._match_text_seq("WITH", "UNIQUE"): 4499 unique_keys = True 4500 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4501 unique_keys = False 4502 4503 self._match_text_seq("KEYS") 4504 4505 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4506 self._parse_type() 4507 ) 4508 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4509 4510 return self.expression( 4511 exp.JSONObject, 4512 expressions=expressions, 4513 null_handling=null_handling, 4514 unique_keys=unique_keys, 4515 return_type=return_type, 4516 encoding=encoding, 4517 ) 4518 4519 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4520 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4521 if not self._match_text_seq("NESTED"): 4522 this = self._parse_id_var() 4523 kind = self._parse_types(allow_identifiers=False) 4524 nested = None 4525 else: 4526 this = None 4527 kind = None 4528 nested = True 4529 4530 path = self._match_text_seq("PATH") and self._parse_string() 4531 nested_schema = nested and self._parse_json_schema() 4532 4533 return self.expression( 4534 exp.JSONColumnDef, 4535 this=this, 4536 kind=kind, 4537 path=path, 4538 nested_schema=nested_schema, 4539 ) 4540 4541 def _parse_json_schema(self) -> exp.JSONSchema: 4542 self._match_text_seq("COLUMNS") 4543 return self.expression( 4544 exp.JSONSchema, 4545 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4546 ) 4547 4548 def _parse_json_table(self) -> exp.JSONTable: 4549 this = self._parse_format_json(self._parse_bitwise()) 4550 path = self._match(TokenType.COMMA) and self._parse_string() 4551 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4552 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4553 schema = self._parse_json_schema() 4554 4555 return exp.JSONTable( 4556 this=this, 4557 schema=schema, 4558 path=path, 4559 error_handling=error_handling, 4560 empty_handling=empty_handling, 4561 ) 4562 4563 def _parse_logarithm(self) -> exp.Func: 4564 # Default argument order is base, expression 4565 args = self._parse_csv(self._parse_range) 4566 4567 if len(args) > 1: 4568 if not self.LOG_BASE_FIRST: 4569 args.reverse() 4570 return exp.Log.from_arg_list(args) 4571 4572 return self.expression( 4573 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4574 ) 4575 4576 def _parse_match_against(self) -> exp.MatchAgainst: 4577 expressions = self._parse_csv(self._parse_column) 4578 4579 self._match_text_seq(")", "AGAINST", "(") 4580 4581 this = self._parse_string() 4582 4583 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4584 modifier = "IN NATURAL LANGUAGE MODE" 4585 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4586 modifier = f"{modifier} WITH QUERY EXPANSION" 4587 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4588 modifier = "IN BOOLEAN MODE" 4589 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4590 modifier = "WITH QUERY EXPANSION" 4591 else: 4592 modifier = None 4593 4594 return self.expression( 4595 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4596 ) 4597 4598 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4599 def _parse_open_json(self) -> exp.OpenJSON: 4600 this = self._parse_bitwise() 4601 path = self._match(TokenType.COMMA) and self._parse_string() 4602 4603 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4604 this = self._parse_field(any_token=True) 4605 kind = self._parse_types() 4606 path = self._parse_string() 4607 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4608 4609 return self.expression( 4610 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4611 ) 4612 4613 expressions = None 4614 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4615 self._match_l_paren() 4616 expressions = self._parse_csv(_parse_open_json_column_def) 4617 4618 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4619 4620 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4621 args = self._parse_csv(self._parse_bitwise) 4622 4623 if self._match(TokenType.IN): 4624 return self.expression( 4625 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4626 ) 4627 4628 if haystack_first: 4629 haystack = seq_get(args, 0) 4630 needle = seq_get(args, 1) 4631 else: 4632 needle = seq_get(args, 0) 4633 haystack = seq_get(args, 1) 4634 4635 return self.expression( 4636 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4637 ) 4638 4639 def _parse_predict(self) -> exp.Predict: 4640 self._match_text_seq("MODEL") 4641 this = self._parse_table() 4642 4643 self._match(TokenType.COMMA) 4644 self._match_text_seq("TABLE") 4645 4646 return self.expression( 4647 exp.Predict, 4648 this=this, 4649 expression=self._parse_table(), 4650 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4651 ) 4652 4653 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4654 args = self._parse_csv(self._parse_table) 4655 return exp.JoinHint(this=func_name.upper(), expressions=args) 4656 4657 def _parse_substring(self) -> exp.Substring: 4658 # Postgres supports the form: substring(string [from int] [for int]) 4659 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4660 4661 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4662 4663 if self._match(TokenType.FROM): 4664 args.append(self._parse_bitwise()) 4665 if self._match(TokenType.FOR): 4666 args.append(self._parse_bitwise()) 4667 4668 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4669 4670 def _parse_trim(self) -> exp.Trim: 4671 # https://www.w3resource.com/sql/character-functions/trim.php 4672 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4673 4674 position = None 4675 collation = None 4676 expression = None 4677 4678 if self._match_texts(self.TRIM_TYPES): 4679 position = self._prev.text.upper() 4680 4681 this = self._parse_bitwise() 4682 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4683 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4684 expression = self._parse_bitwise() 4685 4686 if invert_order: 4687 this, expression = expression, this 4688 4689 if self._match(TokenType.COLLATE): 4690 collation = self._parse_bitwise() 4691 4692 return self.expression( 4693 exp.Trim, this=this, position=position, expression=expression, collation=collation 4694 ) 4695 4696 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4697 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4698 4699 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4700 return self._parse_window(self._parse_id_var(), alias=True) 4701 4702 def _parse_respect_or_ignore_nulls( 4703 self, this: t.Optional[exp.Expression] 4704 ) -> t.Optional[exp.Expression]: 4705 if self._match_text_seq("IGNORE", "NULLS"): 4706 return self.expression(exp.IgnoreNulls, this=this) 4707 if self._match_text_seq("RESPECT", "NULLS"): 4708 return self.expression(exp.RespectNulls, this=this) 4709 return this 4710 4711 def _parse_window( 4712 self, this: t.Optional[exp.Expression], alias: bool = False 4713 ) -> t.Optional[exp.Expression]: 4714 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4715 self._match(TokenType.WHERE) 4716 this = self.expression( 4717 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4718 ) 4719 self._match_r_paren() 4720 4721 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4722 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4723 if self._match_text_seq("WITHIN", "GROUP"): 4724 order = self._parse_wrapped(self._parse_order) 4725 this = self.expression(exp.WithinGroup, this=this, expression=order) 4726 4727 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4728 # Some dialects choose to implement and some do not. 4729 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4730 4731 # There is some code above in _parse_lambda that handles 4732 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4733 4734 # The below changes handle 4735 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4736 4737 # Oracle allows both formats 4738 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4739 # and Snowflake chose to do the same for familiarity 4740 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4741 this = self._parse_respect_or_ignore_nulls(this) 4742 4743 # bigquery select from window x AS (partition by ...) 4744 if alias: 4745 over = None 4746 self._match(TokenType.ALIAS) 4747 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4748 return this 4749 else: 4750 over = self._prev.text.upper() 4751 4752 if not self._match(TokenType.L_PAREN): 4753 return self.expression( 4754 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4755 ) 4756 4757 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4758 4759 first = self._match(TokenType.FIRST) 4760 if self._match_text_seq("LAST"): 4761 first = False 4762 4763 partition, order = self._parse_partition_and_order() 4764 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4765 4766 if kind: 4767 self._match(TokenType.BETWEEN) 4768 start = self._parse_window_spec() 4769 self._match(TokenType.AND) 4770 end = self._parse_window_spec() 4771 4772 spec = self.expression( 4773 exp.WindowSpec, 4774 kind=kind, 4775 start=start["value"], 4776 start_side=start["side"], 4777 end=end["value"], 4778 end_side=end["side"], 4779 ) 4780 else: 4781 spec = None 4782 4783 self._match_r_paren() 4784 4785 window = self.expression( 4786 exp.Window, 4787 this=this, 4788 partition_by=partition, 4789 order=order, 4790 spec=spec, 4791 alias=window_alias, 4792 over=over, 4793 first=first, 4794 ) 4795 4796 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4797 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4798 return self._parse_window(window, alias=alias) 4799 4800 return window 4801 4802 def _parse_partition_and_order( 4803 self, 4804 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4805 return self._parse_partition_by(), self._parse_order() 4806 4807 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4808 self._match(TokenType.BETWEEN) 4809 4810 return { 4811 "value": ( 4812 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4813 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4814 or self._parse_bitwise() 4815 ), 4816 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4817 } 4818 4819 def _parse_alias( 4820 self, this: t.Optional[exp.Expression], explicit: bool = False 4821 ) -> t.Optional[exp.Expression]: 4822 any_token = self._match(TokenType.ALIAS) 4823 4824 if explicit and not any_token: 4825 return this 4826 4827 if self._match(TokenType.L_PAREN): 4828 aliases = self.expression( 4829 exp.Aliases, 4830 this=this, 4831 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4832 ) 4833 self._match_r_paren(aliases) 4834 return aliases 4835 4836 alias = self._parse_id_var(any_token) 4837 4838 if alias: 4839 return self.expression(exp.Alias, this=this, alias=alias) 4840 4841 return this 4842 4843 def _parse_id_var( 4844 self, 4845 any_token: bool = True, 4846 tokens: t.Optional[t.Collection[TokenType]] = None, 4847 ) -> t.Optional[exp.Expression]: 4848 identifier = self._parse_identifier() 4849 4850 if identifier: 4851 return identifier 4852 4853 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4854 quoted = self._prev.token_type == TokenType.STRING 4855 return exp.Identifier(this=self._prev.text, quoted=quoted) 4856 4857 return None 4858 4859 def _parse_string(self) -> t.Optional[exp.Expression]: 4860 if self._match(TokenType.STRING): 4861 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4862 return self._parse_placeholder() 4863 4864 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4865 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4866 4867 def _parse_number(self) -> t.Optional[exp.Expression]: 4868 if self._match(TokenType.NUMBER): 4869 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4870 return self._parse_placeholder() 4871 4872 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4873 if self._match(TokenType.IDENTIFIER): 4874 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4875 return self._parse_placeholder() 4876 4877 def _parse_var( 4878 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4879 ) -> t.Optional[exp.Expression]: 4880 if ( 4881 (any_token and self._advance_any()) 4882 or self._match(TokenType.VAR) 4883 or (self._match_set(tokens) if tokens else False) 4884 ): 4885 return self.expression(exp.Var, this=self._prev.text) 4886 return self._parse_placeholder() 4887 4888 def _advance_any(self) -> t.Optional[Token]: 4889 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4890 self._advance() 4891 return self._prev 4892 return None 4893 4894 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4895 return self._parse_var() or self._parse_string() 4896 4897 def _parse_null(self) -> t.Optional[exp.Expression]: 4898 if self._match_set(self.NULL_TOKENS): 4899 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4900 return self._parse_placeholder() 4901 4902 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4903 if self._match(TokenType.TRUE): 4904 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4905 if self._match(TokenType.FALSE): 4906 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4907 return self._parse_placeholder() 4908 4909 def _parse_star(self) -> t.Optional[exp.Expression]: 4910 if self._match(TokenType.STAR): 4911 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4912 return self._parse_placeholder() 4913 4914 def _parse_parameter(self) -> exp.Parameter: 4915 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4916 return ( 4917 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4918 ) 4919 4920 self._match(TokenType.L_BRACE) 4921 this = _parse_parameter_part() 4922 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4923 self._match(TokenType.R_BRACE) 4924 4925 return self.expression(exp.Parameter, this=this, expression=expression) 4926 4927 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4928 if self._match_set(self.PLACEHOLDER_PARSERS): 4929 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4930 if placeholder: 4931 return placeholder 4932 self._advance(-1) 4933 return None 4934 4935 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4936 if not self._match(TokenType.EXCEPT): 4937 return None 4938 if self._match(TokenType.L_PAREN, advance=False): 4939 return self._parse_wrapped_csv(self._parse_column) 4940 4941 except_column = self._parse_column() 4942 return [except_column] if except_column else None 4943 4944 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4945 if not self._match(TokenType.REPLACE): 4946 return None 4947 if self._match(TokenType.L_PAREN, advance=False): 4948 return self._parse_wrapped_csv(self._parse_expression) 4949 4950 replace_expression = self._parse_expression() 4951 return [replace_expression] if replace_expression else None 4952 4953 def _parse_csv( 4954 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4955 ) -> t.List[exp.Expression]: 4956 parse_result = parse_method() 4957 items = [parse_result] if parse_result is not None else [] 4958 4959 while self._match(sep): 4960 self._add_comments(parse_result) 4961 parse_result = parse_method() 4962 if parse_result is not None: 4963 items.append(parse_result) 4964 4965 return items 4966 4967 def _parse_tokens( 4968 self, parse_method: t.Callable, expressions: t.Dict 4969 ) -> t.Optional[exp.Expression]: 4970 this = parse_method() 4971 4972 while self._match_set(expressions): 4973 this = self.expression( 4974 expressions[self._prev.token_type], 4975 this=this, 4976 comments=self._prev_comments, 4977 expression=parse_method(), 4978 ) 4979 4980 return this 4981 4982 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4983 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4984 4985 def _parse_wrapped_csv( 4986 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4987 ) -> t.List[exp.Expression]: 4988 return self._parse_wrapped( 4989 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4990 ) 4991 4992 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4993 wrapped = self._match(TokenType.L_PAREN) 4994 if not wrapped and not optional: 4995 self.raise_error("Expecting (") 4996 parse_result = parse_method() 4997 if wrapped: 4998 self._match_r_paren() 4999 return parse_result 5000 5001 def _parse_expressions(self) -> t.List[exp.Expression]: 5002 return self._parse_csv(self._parse_expression) 5003 5004 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5005 return self._parse_select() or self._parse_set_operations( 5006 self._parse_expression() if alias else self._parse_conjunction() 5007 ) 5008 5009 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5010 return self._parse_query_modifiers( 5011 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5012 ) 5013 5014 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5015 this = None 5016 if self._match_texts(self.TRANSACTION_KIND): 5017 this = self._prev.text 5018 5019 self._match_texts(("TRANSACTION", "WORK")) 5020 5021 modes = [] 5022 while True: 5023 mode = [] 5024 while self._match(TokenType.VAR): 5025 mode.append(self._prev.text) 5026 5027 if mode: 5028 modes.append(" ".join(mode)) 5029 if not self._match(TokenType.COMMA): 5030 break 5031 5032 return self.expression(exp.Transaction, this=this, modes=modes) 5033 5034 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5035 chain = None 5036 savepoint = None 5037 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5038 5039 self._match_texts(("TRANSACTION", "WORK")) 5040 5041 if self._match_text_seq("TO"): 5042 self._match_text_seq("SAVEPOINT") 5043 savepoint = self._parse_id_var() 5044 5045 if self._match(TokenType.AND): 5046 chain = not self._match_text_seq("NO") 5047 self._match_text_seq("CHAIN") 5048 5049 if is_rollback: 5050 return self.expression(exp.Rollback, savepoint=savepoint) 5051 5052 return self.expression(exp.Commit, chain=chain) 5053 5054 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5055 if not self._match_text_seq("ADD"): 5056 return None 5057 5058 self._match(TokenType.COLUMN) 5059 exists_column = self._parse_exists(not_=True) 5060 expression = self._parse_field_def() 5061 5062 if expression: 5063 expression.set("exists", exists_column) 5064 5065 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5066 if self._match_texts(("FIRST", "AFTER")): 5067 position = self._prev.text 5068 column_position = self.expression( 5069 exp.ColumnPosition, this=self._parse_column(), position=position 5070 ) 5071 expression.set("position", column_position) 5072 5073 return expression 5074 5075 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5076 drop = self._match(TokenType.DROP) and self._parse_drop() 5077 if drop and not isinstance(drop, exp.Command): 5078 drop.set("kind", drop.args.get("kind", "COLUMN")) 5079 return drop 5080 5081 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5082 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5083 return self.expression( 5084 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5085 ) 5086 5087 def _parse_add_constraint(self) -> exp.AddConstraint: 5088 this = None 5089 kind = self._prev.token_type 5090 5091 if kind == TokenType.CONSTRAINT: 5092 this = self._parse_id_var() 5093 5094 if self._match_text_seq("CHECK"): 5095 expression = self._parse_wrapped(self._parse_conjunction) 5096 enforced = self._match_text_seq("ENFORCED") 5097 5098 return self.expression( 5099 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5100 ) 5101 5102 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5103 expression = self._parse_foreign_key() 5104 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5105 expression = self._parse_primary_key() 5106 else: 5107 expression = None 5108 5109 return self.expression(exp.AddConstraint, this=this, expression=expression) 5110 5111 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5112 index = self._index - 1 5113 5114 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5115 return self._parse_csv(self._parse_add_constraint) 5116 5117 self._retreat(index) 5118 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5119 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5120 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5121 5122 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5123 self._match(TokenType.COLUMN) 5124 column = self._parse_field(any_token=True) 5125 5126 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5127 return self.expression(exp.AlterColumn, this=column, drop=True) 5128 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5129 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5130 5131 self._match_text_seq("SET", "DATA") 5132 return self.expression( 5133 exp.AlterColumn, 5134 this=column, 5135 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5136 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5137 using=self._match(TokenType.USING) and self._parse_conjunction(), 5138 ) 5139 5140 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5141 index = self._index - 1 5142 5143 partition_exists = self._parse_exists() 5144 if self._match(TokenType.PARTITION, advance=False): 5145 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5146 5147 self._retreat(index) 5148 return self._parse_csv(self._parse_drop_column) 5149 5150 def _parse_alter_table_rename(self) -> exp.RenameTable: 5151 self._match_text_seq("TO") 5152 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5153 5154 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5155 start = self._prev 5156 5157 if not self._match(TokenType.TABLE): 5158 return self._parse_as_command(start) 5159 5160 exists = self._parse_exists() 5161 only = self._match_text_seq("ONLY") 5162 this = self._parse_table(schema=True) 5163 5164 if self._next: 5165 self._advance() 5166 5167 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5168 if parser: 5169 actions = ensure_list(parser(self)) 5170 5171 if not self._curr: 5172 return self.expression( 5173 exp.AlterTable, 5174 this=this, 5175 exists=exists, 5176 actions=actions, 5177 only=only, 5178 ) 5179 5180 return self._parse_as_command(start) 5181 5182 def _parse_merge(self) -> exp.Merge: 5183 self._match(TokenType.INTO) 5184 target = self._parse_table() 5185 5186 if target and self._match(TokenType.ALIAS, advance=False): 5187 target.set("alias", self._parse_table_alias()) 5188 5189 self._match(TokenType.USING) 5190 using = self._parse_table() 5191 5192 self._match(TokenType.ON) 5193 on = self._parse_conjunction() 5194 5195 return self.expression( 5196 exp.Merge, 5197 this=target, 5198 using=using, 5199 on=on, 5200 expressions=self._parse_when_matched(), 5201 ) 5202 5203 def _parse_when_matched(self) -> t.List[exp.When]: 5204 whens = [] 5205 5206 while self._match(TokenType.WHEN): 5207 matched = not self._match(TokenType.NOT) 5208 self._match_text_seq("MATCHED") 5209 source = ( 5210 False 5211 if self._match_text_seq("BY", "TARGET") 5212 else self._match_text_seq("BY", "SOURCE") 5213 ) 5214 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5215 5216 self._match(TokenType.THEN) 5217 5218 if self._match(TokenType.INSERT): 5219 _this = self._parse_star() 5220 if _this: 5221 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5222 else: 5223 then = self.expression( 5224 exp.Insert, 5225 this=self._parse_value(), 5226 expression=self._match(TokenType.VALUES) and self._parse_value(), 5227 ) 5228 elif self._match(TokenType.UPDATE): 5229 expressions = self._parse_star() 5230 if expressions: 5231 then = self.expression(exp.Update, expressions=expressions) 5232 else: 5233 then = self.expression( 5234 exp.Update, 5235 expressions=self._match(TokenType.SET) 5236 and self._parse_csv(self._parse_equality), 5237 ) 5238 elif self._match(TokenType.DELETE): 5239 then = self.expression(exp.Var, this=self._prev.text) 5240 else: 5241 then = None 5242 5243 whens.append( 5244 self.expression( 5245 exp.When, 5246 matched=matched, 5247 source=source, 5248 condition=condition, 5249 then=then, 5250 ) 5251 ) 5252 return whens 5253 5254 def _parse_show(self) -> t.Optional[exp.Expression]: 5255 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5256 if parser: 5257 return parser(self) 5258 return self._parse_as_command(self._prev) 5259 5260 def _parse_set_item_assignment( 5261 self, kind: t.Optional[str] = None 5262 ) -> t.Optional[exp.Expression]: 5263 index = self._index 5264 5265 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5266 return self._parse_set_transaction(global_=kind == "GLOBAL") 5267 5268 left = self._parse_primary() or self._parse_id_var() 5269 assignment_delimiter = self._match_texts(("=", "TO")) 5270 5271 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5272 self._retreat(index) 5273 return None 5274 5275 right = self._parse_statement() or self._parse_id_var() 5276 this = self.expression(exp.EQ, this=left, expression=right) 5277 5278 return self.expression(exp.SetItem, this=this, kind=kind) 5279 5280 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5281 self._match_text_seq("TRANSACTION") 5282 characteristics = self._parse_csv( 5283 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5284 ) 5285 return self.expression( 5286 exp.SetItem, 5287 expressions=characteristics, 5288 kind="TRANSACTION", 5289 **{"global": global_}, # type: ignore 5290 ) 5291 5292 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5293 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5294 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5295 5296 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5297 index = self._index 5298 set_ = self.expression( 5299 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5300 ) 5301 5302 if self._curr: 5303 self._retreat(index) 5304 return self._parse_as_command(self._prev) 5305 5306 return set_ 5307 5308 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5309 for option in options: 5310 if self._match_text_seq(*option.split(" ")): 5311 return exp.var(option) 5312 return None 5313 5314 def _parse_as_command(self, start: Token) -> exp.Command: 5315 while self._curr: 5316 self._advance() 5317 text = self._find_sql(start, self._prev) 5318 size = len(start.text) 5319 return exp.Command(this=text[:size], expression=text[size:]) 5320 5321 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5322 settings = [] 5323 5324 self._match_l_paren() 5325 kind = self._parse_id_var() 5326 5327 if self._match(TokenType.L_PAREN): 5328 while True: 5329 key = self._parse_id_var() 5330 value = self._parse_primary() 5331 5332 if not key and value is None: 5333 break 5334 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5335 self._match(TokenType.R_PAREN) 5336 5337 self._match_r_paren() 5338 5339 return self.expression( 5340 exp.DictProperty, 5341 this=this, 5342 kind=kind.this if kind else None, 5343 settings=settings, 5344 ) 5345 5346 def _parse_dict_range(self, this: str) -> exp.DictRange: 5347 self._match_l_paren() 5348 has_min = self._match_text_seq("MIN") 5349 if has_min: 5350 min = self._parse_var() or self._parse_primary() 5351 self._match_text_seq("MAX") 5352 max = self._parse_var() or self._parse_primary() 5353 else: 5354 max = self._parse_var() or self._parse_primary() 5355 min = exp.Literal.number(0) 5356 self._match_r_paren() 5357 return self.expression(exp.DictRange, this=this, min=min, max=max) 5358 5359 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5360 index = self._index 5361 expression = self._parse_column() 5362 if not self._match(TokenType.IN): 5363 self._retreat(index - 1) 5364 return None 5365 iterator = self._parse_column() 5366 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5367 return self.expression( 5368 exp.Comprehension, 5369 this=this, 5370 expression=expression, 5371 iterator=iterator, 5372 condition=condition, 5373 ) 5374 5375 def _find_parser( 5376 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5377 ) -> t.Optional[t.Callable]: 5378 if not self._curr: 5379 return None 5380 5381 index = self._index 5382 this = [] 5383 while True: 5384 # The current token might be multiple words 5385 curr = self._curr.text.upper() 5386 key = curr.split(" ") 5387 this.append(curr) 5388 5389 self._advance() 5390 result, trie = in_trie(trie, key) 5391 if result == TrieResult.FAILED: 5392 break 5393 5394 if result == TrieResult.EXISTS: 5395 subparser = parsers[" ".join(this)] 5396 return subparser 5397 5398 self._retreat(index) 5399 return None 5400 5401 def _match(self, token_type, advance=True, expression=None): 5402 if not self._curr: 5403 return None 5404 5405 if self._curr.token_type == token_type: 5406 if advance: 5407 self._advance() 5408 self._add_comments(expression) 5409 return True 5410 5411 return None 5412 5413 def _match_set(self, types, advance=True): 5414 if not self._curr: 5415 return None 5416 5417 if self._curr.token_type in types: 5418 if advance: 5419 self._advance() 5420 return True 5421 5422 return None 5423 5424 def _match_pair(self, token_type_a, token_type_b, advance=True): 5425 if not self._curr or not self._next: 5426 return None 5427 5428 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5429 if advance: 5430 self._advance(2) 5431 return True 5432 5433 return None 5434 5435 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5436 if not self._match(TokenType.L_PAREN, expression=expression): 5437 self.raise_error("Expecting (") 5438 5439 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5440 if not self._match(TokenType.R_PAREN, expression=expression): 5441 self.raise_error("Expecting )") 5442 5443 def _match_texts(self, texts, advance=True): 5444 if self._curr and self._curr.text.upper() in texts: 5445 if advance: 5446 self._advance() 5447 return True 5448 return False 5449 5450 def _match_text_seq(self, *texts, advance=True): 5451 index = self._index 5452 for text in texts: 5453 if self._curr and self._curr.text.upper() == text: 5454 self._advance() 5455 else: 5456 self._retreat(index) 5457 return False 5458 5459 if not advance: 5460 self._retreat(index) 5461 5462 return True 5463 5464 @t.overload 5465 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5466 ... 5467 5468 @t.overload 5469 def _replace_columns_with_dots( 5470 self, this: t.Optional[exp.Expression] 5471 ) -> t.Optional[exp.Expression]: 5472 ... 5473 5474 def _replace_columns_with_dots(self, this): 5475 if isinstance(this, exp.Dot): 5476 exp.replace_children(this, self._replace_columns_with_dots) 5477 elif isinstance(this, exp.Column): 5478 exp.replace_children(this, self._replace_columns_with_dots) 5479 table = this.args.get("table") 5480 this = ( 5481 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5482 ) 5483 5484 return this 5485 5486 def _replace_lambda( 5487 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5488 ) -> t.Optional[exp.Expression]: 5489 if not node: 5490 return node 5491 5492 for column in node.find_all(exp.Column): 5493 if column.parts[0].name in lambda_variables: 5494 dot_or_id = column.to_dot() if column.table else column.this 5495 parent = column.parent 5496 5497 while isinstance(parent, exp.Dot): 5498 if not isinstance(parent.parent, exp.Dot): 5499 parent.replace(dot_or_id) 5500 break 5501 parent = parent.parent 5502 else: 5503 if column is node: 5504 node = dot_or_id 5505 else: 5506 column.replace(dot_or_id) 5507 return node 5508 5509 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5510 return [ 5511 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5512 for value in values 5513 if value 5514 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
969 def __init__( 970 self, 971 error_level: t.Optional[ErrorLevel] = None, 972 error_message_context: int = 100, 973 max_errors: int = 3, 974 ): 975 self.error_level = error_level or ErrorLevel.IMMEDIATE 976 self.error_message_context = error_message_context 977 self.max_errors = max_errors 978 self._tokenizer = self.TOKENIZER_CLASS() 979 self.reset()
991 def parse( 992 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 993 ) -> t.List[t.Optional[exp.Expression]]: 994 """ 995 Parses a list of tokens and returns a list of syntax trees, one tree 996 per parsed SQL statement. 997 998 Args: 999 raw_tokens: The list of tokens. 1000 sql: The original SQL string, used to produce helpful debug messages. 1001 1002 Returns: 1003 The list of the produced syntax trees. 1004 """ 1005 return self._parse( 1006 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1007 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1009 def parse_into( 1010 self, 1011 expression_types: exp.IntoType, 1012 raw_tokens: t.List[Token], 1013 sql: t.Optional[str] = None, 1014 ) -> t.List[t.Optional[exp.Expression]]: 1015 """ 1016 Parses a list of tokens into a given Expression type. If a collection of Expression 1017 types is given instead, this method will try to parse the token list into each one 1018 of them, stopping at the first for which the parsing succeeds. 1019 1020 Args: 1021 expression_types: The expression type(s) to try and parse the token list into. 1022 raw_tokens: The list of tokens. 1023 sql: The original SQL string, used to produce helpful debug messages. 1024 1025 Returns: 1026 The target Expression. 1027 """ 1028 errors = [] 1029 for expression_type in ensure_list(expression_types): 1030 parser = self.EXPRESSION_PARSERS.get(expression_type) 1031 if not parser: 1032 raise TypeError(f"No parser registered for {expression_type}") 1033 1034 try: 1035 return self._parse(parser, raw_tokens, sql) 1036 except ParseError as e: 1037 e.errors[0]["into_expression"] = expression_type 1038 errors.append(e) 1039 1040 raise ParseError( 1041 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1042 errors=merge_errors(errors), 1043 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1080 def check_errors(self) -> None: 1081 """Logs or raises any found errors, depending on the chosen error level setting.""" 1082 if self.error_level == ErrorLevel.WARN: 1083 for error in self.errors: 1084 logger.error(str(error)) 1085 elif self.error_level == ErrorLevel.RAISE and self.errors: 1086 raise ParseError( 1087 concat_messages(self.errors, self.max_errors), 1088 errors=merge_errors(self.errors), 1089 )
Logs or raises any found errors, depending on the chosen error level setting.
1091 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1092 """ 1093 Appends an error in the list of recorded errors or raises it, depending on the chosen 1094 error level setting. 1095 """ 1096 token = token or self._curr or self._prev or Token.string("") 1097 start = token.start 1098 end = token.end + 1 1099 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1100 highlight = self.sql[start:end] 1101 end_context = self.sql[end : end + self.error_message_context] 1102 1103 error = ParseError.new( 1104 f"{message}. Line {token.line}, Col: {token.col}.\n" 1105 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1106 description=message, 1107 line=token.line, 1108 col=token.col, 1109 start_context=start_context, 1110 highlight=highlight, 1111 end_context=end_context, 1112 ) 1113 1114 if self.error_level == ErrorLevel.IMMEDIATE: 1115 raise error 1116 1117 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1119 def expression( 1120 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1121 ) -> E: 1122 """ 1123 Creates a new, validated Expression. 1124 1125 Args: 1126 exp_class: The expression class to instantiate. 1127 comments: An optional list of comments to attach to the expression. 1128 kwargs: The arguments to set for the expression along with their respective values. 1129 1130 Returns: 1131 The target expression. 1132 """ 1133 instance = exp_class(**kwargs) 1134 instance.add_comments(comments) if comments else self._add_comments(instance) 1135 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1142 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1143 """ 1144 Validates an Expression, making sure that all its mandatory arguments are set. 1145 1146 Args: 1147 expression: The expression to validate. 1148 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1149 1150 Returns: 1151 The validated expression. 1152 """ 1153 if self.error_level != ErrorLevel.IGNORE: 1154 for error_message in expression.error_messages(args): 1155 self.raise_error(error_message) 1156 1157 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.