sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.FUNCTION, 249 TokenType.INDEX, 250 TokenType.PROCEDURE, 251 *DB_CREATABLES, 252 } 253 254 # Tokens that can represent identifiers 255 ID_VAR_TOKENS = { 256 TokenType.VAR, 257 TokenType.ANTI, 258 TokenType.APPLY, 259 TokenType.ASC, 260 TokenType.AUTO_INCREMENT, 261 TokenType.BEGIN, 262 TokenType.CACHE, 263 TokenType.CASE, 264 TokenType.COLLATE, 265 TokenType.COMMAND, 266 TokenType.COMMENT, 267 TokenType.COMMIT, 268 TokenType.CONSTRAINT, 269 TokenType.DEFAULT, 270 TokenType.DELETE, 271 TokenType.DESC, 272 TokenType.DESCRIBE, 273 TokenType.DICTIONARY, 274 TokenType.DIV, 275 TokenType.END, 276 TokenType.EXECUTE, 277 TokenType.ESCAPE, 278 TokenType.FALSE, 279 TokenType.FIRST, 280 TokenType.FILTER, 281 TokenType.FORMAT, 282 TokenType.FULL, 283 TokenType.IS, 284 TokenType.ISNULL, 285 TokenType.INTERVAL, 286 TokenType.KEEP, 287 TokenType.KILL, 288 TokenType.LEFT, 289 TokenType.LOAD, 290 TokenType.MERGE, 291 TokenType.NATURAL, 292 TokenType.NEXT, 293 TokenType.OFFSET, 294 TokenType.ORDINALITY, 295 TokenType.OVERLAPS, 296 TokenType.OVERWRITE, 297 TokenType.PARTITION, 298 TokenType.PERCENT, 299 TokenType.PIVOT, 300 TokenType.PRAGMA, 301 TokenType.RANGE, 302 TokenType.REFERENCES, 303 TokenType.RIGHT, 304 TokenType.ROW, 305 TokenType.ROWS, 306 TokenType.SEMI, 307 TokenType.SET, 308 TokenType.SETTINGS, 309 TokenType.SHOW, 310 TokenType.TEMPORARY, 311 TokenType.TOP, 312 TokenType.TRUE, 313 TokenType.UNIQUE, 314 TokenType.UNPIVOT, 315 TokenType.UPDATE, 316 TokenType.VOLATILE, 317 TokenType.WINDOW, 318 *CREATABLES, 319 *SUBQUERY_PREDICATES, 320 *TYPE_TOKENS, 321 *NO_PAREN_FUNCTIONS, 322 } 323 324 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 325 326 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 327 TokenType.ANTI, 328 TokenType.APPLY, 329 TokenType.ASOF, 330 TokenType.FULL, 331 TokenType.LEFT, 332 TokenType.LOCK, 333 TokenType.NATURAL, 334 TokenType.OFFSET, 335 TokenType.RIGHT, 336 TokenType.SEMI, 337 TokenType.WINDOW, 338 } 339 340 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 341 342 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 343 344 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 345 346 FUNC_TOKENS = { 347 TokenType.COLLATE, 348 TokenType.COMMAND, 349 TokenType.CURRENT_DATE, 350 TokenType.CURRENT_DATETIME, 351 TokenType.CURRENT_TIMESTAMP, 352 TokenType.CURRENT_TIME, 353 TokenType.CURRENT_USER, 354 TokenType.FILTER, 355 TokenType.FIRST, 356 TokenType.FORMAT, 357 TokenType.GLOB, 358 TokenType.IDENTIFIER, 359 TokenType.INDEX, 360 TokenType.ISNULL, 361 TokenType.ILIKE, 362 TokenType.INSERT, 363 TokenType.LIKE, 364 TokenType.MERGE, 365 TokenType.OFFSET, 366 TokenType.PRIMARY_KEY, 367 TokenType.RANGE, 368 TokenType.REPLACE, 369 TokenType.RLIKE, 370 TokenType.ROW, 371 TokenType.UNNEST, 372 TokenType.VAR, 373 TokenType.LEFT, 374 TokenType.RIGHT, 375 TokenType.DATE, 376 TokenType.DATETIME, 377 TokenType.TABLE, 378 TokenType.TIMESTAMP, 379 TokenType.TIMESTAMPTZ, 380 TokenType.WINDOW, 381 TokenType.XOR, 382 *TYPE_TOKENS, 383 *SUBQUERY_PREDICATES, 384 } 385 386 CONJUNCTION = { 387 TokenType.AND: exp.And, 388 TokenType.OR: exp.Or, 389 } 390 391 EQUALITY = { 392 TokenType.EQ: exp.EQ, 393 TokenType.NEQ: exp.NEQ, 394 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 395 } 396 397 COMPARISON = { 398 TokenType.GT: exp.GT, 399 TokenType.GTE: exp.GTE, 400 TokenType.LT: exp.LT, 401 TokenType.LTE: exp.LTE, 402 } 403 404 BITWISE = { 405 TokenType.AMP: exp.BitwiseAnd, 406 TokenType.CARET: exp.BitwiseXor, 407 TokenType.PIPE: exp.BitwiseOr, 408 TokenType.DPIPE: exp.DPipe, 409 } 410 411 TERM = { 412 TokenType.DASH: exp.Sub, 413 TokenType.PLUS: exp.Add, 414 TokenType.MOD: exp.Mod, 415 TokenType.COLLATE: exp.Collate, 416 } 417 418 FACTOR = { 419 TokenType.DIV: exp.IntDiv, 420 TokenType.LR_ARROW: exp.Distance, 421 TokenType.SLASH: exp.Div, 422 TokenType.STAR: exp.Mul, 423 } 424 425 TIMES = { 426 TokenType.TIME, 427 TokenType.TIMETZ, 428 } 429 430 TIMESTAMPS = { 431 TokenType.TIMESTAMP, 432 TokenType.TIMESTAMPTZ, 433 TokenType.TIMESTAMPLTZ, 434 *TIMES, 435 } 436 437 SET_OPERATIONS = { 438 TokenType.UNION, 439 TokenType.INTERSECT, 440 TokenType.EXCEPT, 441 } 442 443 JOIN_METHODS = { 444 TokenType.NATURAL, 445 TokenType.ASOF, 446 } 447 448 JOIN_SIDES = { 449 TokenType.LEFT, 450 TokenType.RIGHT, 451 TokenType.FULL, 452 } 453 454 JOIN_KINDS = { 455 TokenType.INNER, 456 TokenType.OUTER, 457 TokenType.CROSS, 458 TokenType.SEMI, 459 TokenType.ANTI, 460 } 461 462 JOIN_HINTS: t.Set[str] = set() 463 464 LAMBDAS = { 465 TokenType.ARROW: lambda self, expressions: self.expression( 466 exp.Lambda, 467 this=self._replace_lambda( 468 self._parse_conjunction(), 469 {node.name for node in expressions}, 470 ), 471 expressions=expressions, 472 ), 473 TokenType.FARROW: lambda self, expressions: self.expression( 474 exp.Kwarg, 475 this=exp.var(expressions[0].name), 476 expression=self._parse_conjunction(), 477 ), 478 } 479 480 COLUMN_OPERATORS = { 481 TokenType.DOT: None, 482 TokenType.DCOLON: lambda self, this, to: self.expression( 483 exp.Cast if self.STRICT_CAST else exp.TryCast, 484 this=this, 485 to=to, 486 ), 487 TokenType.ARROW: lambda self, this, path: self.expression( 488 exp.JSONExtract, 489 this=this, 490 expression=path, 491 ), 492 TokenType.DARROW: lambda self, this, path: self.expression( 493 exp.JSONExtractScalar, 494 this=this, 495 expression=path, 496 ), 497 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 498 exp.JSONBExtract, 499 this=this, 500 expression=path, 501 ), 502 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 503 exp.JSONBExtractScalar, 504 this=this, 505 expression=path, 506 ), 507 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 508 exp.JSONBContains, 509 this=this, 510 expression=key, 511 ), 512 } 513 514 EXPRESSION_PARSERS = { 515 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 516 exp.Column: lambda self: self._parse_column(), 517 exp.Condition: lambda self: self._parse_conjunction(), 518 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 519 exp.Expression: lambda self: self._parse_statement(), 520 exp.From: lambda self: self._parse_from(), 521 exp.Group: lambda self: self._parse_group(), 522 exp.Having: lambda self: self._parse_having(), 523 exp.Identifier: lambda self: self._parse_id_var(), 524 exp.Join: lambda self: self._parse_join(), 525 exp.Lambda: lambda self: self._parse_lambda(), 526 exp.Lateral: lambda self: self._parse_lateral(), 527 exp.Limit: lambda self: self._parse_limit(), 528 exp.Offset: lambda self: self._parse_offset(), 529 exp.Order: lambda self: self._parse_order(), 530 exp.Ordered: lambda self: self._parse_ordered(), 531 exp.Properties: lambda self: self._parse_properties(), 532 exp.Qualify: lambda self: self._parse_qualify(), 533 exp.Returning: lambda self: self._parse_returning(), 534 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 535 exp.Table: lambda self: self._parse_table_parts(), 536 exp.TableAlias: lambda self: self._parse_table_alias(), 537 exp.Where: lambda self: self._parse_where(), 538 exp.Window: lambda self: self._parse_named_window(), 539 exp.With: lambda self: self._parse_with(), 540 "JOIN_TYPE": lambda self: self._parse_join_parts(), 541 } 542 543 STATEMENT_PARSERS = { 544 TokenType.ALTER: lambda self: self._parse_alter(), 545 TokenType.BEGIN: lambda self: self._parse_transaction(), 546 TokenType.CACHE: lambda self: self._parse_cache(), 547 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 548 TokenType.COMMENT: lambda self: self._parse_comment(), 549 TokenType.CREATE: lambda self: self._parse_create(), 550 TokenType.DELETE: lambda self: self._parse_delete(), 551 TokenType.DESC: lambda self: self._parse_describe(), 552 TokenType.DESCRIBE: lambda self: self._parse_describe(), 553 TokenType.DROP: lambda self: self._parse_drop(), 554 TokenType.INSERT: lambda self: self._parse_insert(), 555 TokenType.KILL: lambda self: self._parse_kill(), 556 TokenType.LOAD: lambda self: self._parse_load(), 557 TokenType.MERGE: lambda self: self._parse_merge(), 558 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 559 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 560 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 561 TokenType.SET: lambda self: self._parse_set(), 562 TokenType.UNCACHE: lambda self: self._parse_uncache(), 563 TokenType.UPDATE: lambda self: self._parse_update(), 564 TokenType.USE: lambda self: self.expression( 565 exp.Use, 566 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 567 and exp.var(self._prev.text), 568 this=self._parse_table(schema=False), 569 ), 570 } 571 572 UNARY_PARSERS = { 573 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 574 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 575 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 576 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 577 } 578 579 PRIMARY_PARSERS = { 580 TokenType.STRING: lambda self, token: self.expression( 581 exp.Literal, this=token.text, is_string=True 582 ), 583 TokenType.NUMBER: lambda self, token: self.expression( 584 exp.Literal, this=token.text, is_string=False 585 ), 586 TokenType.STAR: lambda self, _: self.expression( 587 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 588 ), 589 TokenType.NULL: lambda self, _: self.expression(exp.Null), 590 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 591 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 592 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 593 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 594 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 595 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 596 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 597 exp.National, this=token.text 598 ), 599 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 600 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 601 exp.RawString, this=token.text 602 ), 603 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 604 } 605 606 PLACEHOLDER_PARSERS = { 607 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 608 TokenType.PARAMETER: lambda self: self._parse_parameter(), 609 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 610 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 611 else None, 612 } 613 614 RANGE_PARSERS = { 615 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 616 TokenType.GLOB: binary_range_parser(exp.Glob), 617 TokenType.ILIKE: binary_range_parser(exp.ILike), 618 TokenType.IN: lambda self, this: self._parse_in(this), 619 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 620 TokenType.IS: lambda self, this: self._parse_is(this), 621 TokenType.LIKE: binary_range_parser(exp.Like), 622 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 623 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 624 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 625 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 626 } 627 628 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 629 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 630 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 631 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 632 "CHARACTER SET": lambda self: self._parse_character_set(), 633 "CHECKSUM": lambda self: self._parse_checksum(), 634 "CLUSTER BY": lambda self: self._parse_cluster(), 635 "CLUSTERED": lambda self: self._parse_clustered_by(), 636 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 637 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 638 "COPY": lambda self: self._parse_copy_property(), 639 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 640 "DEFINER": lambda self: self._parse_definer(), 641 "DETERMINISTIC": lambda self: self.expression( 642 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 643 ), 644 "DISTKEY": lambda self: self._parse_distkey(), 645 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 646 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 647 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 648 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 649 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 650 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 651 "FREESPACE": lambda self: self._parse_freespace(), 652 "HEAP": lambda self: self.expression(exp.HeapProperty), 653 "IMMUTABLE": lambda self: self.expression( 654 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 655 ), 656 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 657 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 658 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 659 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 660 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 661 "LIKE": lambda self: self._parse_create_like(), 662 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 663 "LOCK": lambda self: self._parse_locking(), 664 "LOCKING": lambda self: self._parse_locking(), 665 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 666 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 667 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 668 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 669 "NO": lambda self: self._parse_no_property(), 670 "ON": lambda self: self._parse_on_property(), 671 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 672 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 673 "PARTITION BY": lambda self: self._parse_partitioned_by(), 674 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 675 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 676 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 677 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 678 "REMOTE": lambda self: self._parse_remote_with_connection(), 679 "RETURNS": lambda self: self._parse_returns(), 680 "ROW": lambda self: self._parse_row(), 681 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 682 "SAMPLE": lambda self: self.expression( 683 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 684 ), 685 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 686 "SETTINGS": lambda self: self.expression( 687 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 688 ), 689 "SORTKEY": lambda self: self._parse_sortkey(), 690 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 691 "STABLE": lambda self: self.expression( 692 exp.StabilityProperty, this=exp.Literal.string("STABLE") 693 ), 694 "STORED": lambda self: self._parse_stored(), 695 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 696 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 697 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 698 "TO": lambda self: self._parse_to_table(), 699 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 700 "TRANSFORM": lambda self: self.expression( 701 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 702 ), 703 "TTL": lambda self: self._parse_ttl(), 704 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 705 "VOLATILE": lambda self: self._parse_volatile_property(), 706 "WITH": lambda self: self._parse_with_property(), 707 } 708 709 CONSTRAINT_PARSERS = { 710 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 711 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 712 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 713 "CHARACTER SET": lambda self: self.expression( 714 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 715 ), 716 "CHECK": lambda self: self.expression( 717 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 718 ), 719 "COLLATE": lambda self: self.expression( 720 exp.CollateColumnConstraint, this=self._parse_var() 721 ), 722 "COMMENT": lambda self: self.expression( 723 exp.CommentColumnConstraint, this=self._parse_string() 724 ), 725 "COMPRESS": lambda self: self._parse_compress(), 726 "CLUSTERED": lambda self: self.expression( 727 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 728 ), 729 "NONCLUSTERED": lambda self: self.expression( 730 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 731 ), 732 "DEFAULT": lambda self: self.expression( 733 exp.DefaultColumnConstraint, this=self._parse_bitwise() 734 ), 735 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 736 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 737 "FORMAT": lambda self: self.expression( 738 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 739 ), 740 "GENERATED": lambda self: self._parse_generated_as_identity(), 741 "IDENTITY": lambda self: self._parse_auto_increment(), 742 "INLINE": lambda self: self._parse_inline(), 743 "LIKE": lambda self: self._parse_create_like(), 744 "NOT": lambda self: self._parse_not_constraint(), 745 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 746 "ON": lambda self: ( 747 self._match(TokenType.UPDATE) 748 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 749 ) 750 or self.expression(exp.OnProperty, this=self._parse_id_var()), 751 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 752 "PRIMARY KEY": lambda self: self._parse_primary_key(), 753 "REFERENCES": lambda self: self._parse_references(match=False), 754 "TITLE": lambda self: self.expression( 755 exp.TitleColumnConstraint, this=self._parse_var_or_string() 756 ), 757 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 758 "UNIQUE": lambda self: self._parse_unique(), 759 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 760 "WITH": lambda self: self.expression( 761 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 762 ), 763 } 764 765 ALTER_PARSERS = { 766 "ADD": lambda self: self._parse_alter_table_add(), 767 "ALTER": lambda self: self._parse_alter_table_alter(), 768 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 769 "DROP": lambda self: self._parse_alter_table_drop(), 770 "RENAME": lambda self: self._parse_alter_table_rename(), 771 } 772 773 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 774 775 NO_PAREN_FUNCTION_PARSERS = { 776 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 777 "CASE": lambda self: self._parse_case(), 778 "IF": lambda self: self._parse_if(), 779 "NEXT": lambda self: self._parse_next_value_for(), 780 } 781 782 INVALID_FUNC_NAME_TOKENS = { 783 TokenType.IDENTIFIER, 784 TokenType.STRING, 785 } 786 787 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 788 789 FUNCTION_PARSERS = { 790 "ANY_VALUE": lambda self: self._parse_any_value(), 791 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 792 "CONCAT": lambda self: self._parse_concat(), 793 "CONCAT_WS": lambda self: self._parse_concat_ws(), 794 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 795 "DECODE": lambda self: self._parse_decode(), 796 "EXTRACT": lambda self: self._parse_extract(), 797 "JSON_OBJECT": lambda self: self._parse_json_object(), 798 "JSON_TABLE": lambda self: self._parse_json_table(), 799 "LOG": lambda self: self._parse_logarithm(), 800 "MATCH": lambda self: self._parse_match_against(), 801 "OPENJSON": lambda self: self._parse_open_json(), 802 "POSITION": lambda self: self._parse_position(), 803 "PREDICT": lambda self: self._parse_predict(), 804 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 805 "STRING_AGG": lambda self: self._parse_string_agg(), 806 "SUBSTRING": lambda self: self._parse_substring(), 807 "TRIM": lambda self: self._parse_trim(), 808 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 809 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 810 } 811 812 QUERY_MODIFIER_PARSERS = { 813 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 814 TokenType.WHERE: lambda self: ("where", self._parse_where()), 815 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 816 TokenType.HAVING: lambda self: ("having", self._parse_having()), 817 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 818 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 819 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 820 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 821 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 822 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 823 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 824 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 825 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 826 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 827 TokenType.CLUSTER_BY: lambda self: ( 828 "cluster", 829 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 830 ), 831 TokenType.DISTRIBUTE_BY: lambda self: ( 832 "distribute", 833 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 834 ), 835 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 836 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 837 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 838 } 839 840 SET_PARSERS = { 841 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 842 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 843 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 844 "TRANSACTION": lambda self: self._parse_set_transaction(), 845 } 846 847 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 848 849 TYPE_LITERAL_PARSERS = { 850 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 851 } 852 853 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 854 855 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 856 857 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 858 859 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 860 TRANSACTION_CHARACTERISTICS = { 861 "ISOLATION LEVEL REPEATABLE READ", 862 "ISOLATION LEVEL READ COMMITTED", 863 "ISOLATION LEVEL READ UNCOMMITTED", 864 "ISOLATION LEVEL SERIALIZABLE", 865 "READ WRITE", 866 "READ ONLY", 867 } 868 869 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 870 871 CLONE_KEYWORDS = {"CLONE", "COPY"} 872 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 873 874 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 875 876 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 877 878 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 879 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 880 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 881 882 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 883 884 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 885 886 DISTINCT_TOKENS = {TokenType.DISTINCT} 887 888 NULL_TOKENS = {TokenType.NULL} 889 890 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 891 892 STRICT_CAST = True 893 894 # A NULL arg in CONCAT yields NULL by default 895 CONCAT_NULL_OUTPUTS_STRING = False 896 897 PREFIXED_PIVOT_COLUMNS = False 898 IDENTIFY_PIVOT_STRINGS = False 899 900 LOG_BASE_FIRST = True 901 LOG_DEFAULTS_TO_LN = False 902 903 # Whether or not ADD is present for each column added by ALTER TABLE 904 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 905 906 # Whether or not the table sample clause expects CSV syntax 907 TABLESAMPLE_CSV = False 908 909 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 910 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 911 912 # Whether the TRIM function expects the characters to trim as its first argument 913 TRIM_PATTERN_FIRST = False 914 915 __slots__ = ( 916 "error_level", 917 "error_message_context", 918 "max_errors", 919 "sql", 920 "errors", 921 "_tokens", 922 "_index", 923 "_curr", 924 "_next", 925 "_prev", 926 "_prev_comments", 927 "_tokenizer", 928 ) 929 930 # Autofilled 931 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 932 INDEX_OFFSET: int = 0 933 UNNEST_COLUMN_ONLY: bool = False 934 ALIAS_POST_TABLESAMPLE: bool = False 935 STRICT_STRING_CONCAT = False 936 SUPPORTS_USER_DEFINED_TYPES = True 937 NORMALIZE_FUNCTIONS = "upper" 938 NULL_ORDERING: str = "nulls_are_small" 939 SHOW_TRIE: t.Dict = {} 940 SET_TRIE: t.Dict = {} 941 FORMAT_MAPPING: t.Dict[str, str] = {} 942 FORMAT_TRIE: t.Dict = {} 943 TIME_MAPPING: t.Dict[str, str] = {} 944 TIME_TRIE: t.Dict = {} 945 946 def __init__( 947 self, 948 error_level: t.Optional[ErrorLevel] = None, 949 error_message_context: int = 100, 950 max_errors: int = 3, 951 ): 952 self.error_level = error_level or ErrorLevel.IMMEDIATE 953 self.error_message_context = error_message_context 954 self.max_errors = max_errors 955 self._tokenizer = self.TOKENIZER_CLASS() 956 self.reset() 957 958 def reset(self): 959 self.sql = "" 960 self.errors = [] 961 self._tokens = [] 962 self._index = 0 963 self._curr = None 964 self._next = None 965 self._prev = None 966 self._prev_comments = None 967 968 def parse( 969 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 970 ) -> t.List[t.Optional[exp.Expression]]: 971 """ 972 Parses a list of tokens and returns a list of syntax trees, one tree 973 per parsed SQL statement. 974 975 Args: 976 raw_tokens: The list of tokens. 977 sql: The original SQL string, used to produce helpful debug messages. 978 979 Returns: 980 The list of the produced syntax trees. 981 """ 982 return self._parse( 983 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 984 ) 985 986 def parse_into( 987 self, 988 expression_types: exp.IntoType, 989 raw_tokens: t.List[Token], 990 sql: t.Optional[str] = None, 991 ) -> t.List[t.Optional[exp.Expression]]: 992 """ 993 Parses a list of tokens into a given Expression type. If a collection of Expression 994 types is given instead, this method will try to parse the token list into each one 995 of them, stopping at the first for which the parsing succeeds. 996 997 Args: 998 expression_types: The expression type(s) to try and parse the token list into. 999 raw_tokens: The list of tokens. 1000 sql: The original SQL string, used to produce helpful debug messages. 1001 1002 Returns: 1003 The target Expression. 1004 """ 1005 errors = [] 1006 for expression_type in ensure_list(expression_types): 1007 parser = self.EXPRESSION_PARSERS.get(expression_type) 1008 if not parser: 1009 raise TypeError(f"No parser registered for {expression_type}") 1010 1011 try: 1012 return self._parse(parser, raw_tokens, sql) 1013 except ParseError as e: 1014 e.errors[0]["into_expression"] = expression_type 1015 errors.append(e) 1016 1017 raise ParseError( 1018 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1019 errors=merge_errors(errors), 1020 ) from errors[-1] 1021 1022 def _parse( 1023 self, 1024 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1025 raw_tokens: t.List[Token], 1026 sql: t.Optional[str] = None, 1027 ) -> t.List[t.Optional[exp.Expression]]: 1028 self.reset() 1029 self.sql = sql or "" 1030 1031 total = len(raw_tokens) 1032 chunks: t.List[t.List[Token]] = [[]] 1033 1034 for i, token in enumerate(raw_tokens): 1035 if token.token_type == TokenType.SEMICOLON: 1036 if i < total - 1: 1037 chunks.append([]) 1038 else: 1039 chunks[-1].append(token) 1040 1041 expressions = [] 1042 1043 for tokens in chunks: 1044 self._index = -1 1045 self._tokens = tokens 1046 self._advance() 1047 1048 expressions.append(parse_method(self)) 1049 1050 if self._index < len(self._tokens): 1051 self.raise_error("Invalid expression / Unexpected token") 1052 1053 self.check_errors() 1054 1055 return expressions 1056 1057 def check_errors(self) -> None: 1058 """Logs or raises any found errors, depending on the chosen error level setting.""" 1059 if self.error_level == ErrorLevel.WARN: 1060 for error in self.errors: 1061 logger.error(str(error)) 1062 elif self.error_level == ErrorLevel.RAISE and self.errors: 1063 raise ParseError( 1064 concat_messages(self.errors, self.max_errors), 1065 errors=merge_errors(self.errors), 1066 ) 1067 1068 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1069 """ 1070 Appends an error in the list of recorded errors or raises it, depending on the chosen 1071 error level setting. 1072 """ 1073 token = token or self._curr or self._prev or Token.string("") 1074 start = token.start 1075 end = token.end + 1 1076 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1077 highlight = self.sql[start:end] 1078 end_context = self.sql[end : end + self.error_message_context] 1079 1080 error = ParseError.new( 1081 f"{message}. Line {token.line}, Col: {token.col}.\n" 1082 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1083 description=message, 1084 line=token.line, 1085 col=token.col, 1086 start_context=start_context, 1087 highlight=highlight, 1088 end_context=end_context, 1089 ) 1090 1091 if self.error_level == ErrorLevel.IMMEDIATE: 1092 raise error 1093 1094 self.errors.append(error) 1095 1096 def expression( 1097 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1098 ) -> E: 1099 """ 1100 Creates a new, validated Expression. 1101 1102 Args: 1103 exp_class: The expression class to instantiate. 1104 comments: An optional list of comments to attach to the expression. 1105 kwargs: The arguments to set for the expression along with their respective values. 1106 1107 Returns: 1108 The target expression. 1109 """ 1110 instance = exp_class(**kwargs) 1111 instance.add_comments(comments) if comments else self._add_comments(instance) 1112 return self.validate_expression(instance) 1113 1114 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1115 if expression and self._prev_comments: 1116 expression.add_comments(self._prev_comments) 1117 self._prev_comments = None 1118 1119 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1120 """ 1121 Validates an Expression, making sure that all its mandatory arguments are set. 1122 1123 Args: 1124 expression: The expression to validate. 1125 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1126 1127 Returns: 1128 The validated expression. 1129 """ 1130 if self.error_level != ErrorLevel.IGNORE: 1131 for error_message in expression.error_messages(args): 1132 self.raise_error(error_message) 1133 1134 return expression 1135 1136 def _find_sql(self, start: Token, end: Token) -> str: 1137 return self.sql[start.start : end.end + 1] 1138 1139 def _advance(self, times: int = 1) -> None: 1140 self._index += times 1141 self._curr = seq_get(self._tokens, self._index) 1142 self._next = seq_get(self._tokens, self._index + 1) 1143 1144 if self._index > 0: 1145 self._prev = self._tokens[self._index - 1] 1146 self._prev_comments = self._prev.comments 1147 else: 1148 self._prev = None 1149 self._prev_comments = None 1150 1151 def _retreat(self, index: int) -> None: 1152 if index != self._index: 1153 self._advance(index - self._index) 1154 1155 def _parse_command(self) -> exp.Command: 1156 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1157 1158 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1159 start = self._prev 1160 exists = self._parse_exists() if allow_exists else None 1161 1162 self._match(TokenType.ON) 1163 1164 kind = self._match_set(self.CREATABLES) and self._prev 1165 if not kind: 1166 return self._parse_as_command(start) 1167 1168 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1169 this = self._parse_user_defined_function(kind=kind.token_type) 1170 elif kind.token_type == TokenType.TABLE: 1171 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1172 elif kind.token_type == TokenType.COLUMN: 1173 this = self._parse_column() 1174 else: 1175 this = self._parse_id_var() 1176 1177 self._match(TokenType.IS) 1178 1179 return self.expression( 1180 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1181 ) 1182 1183 def _parse_to_table( 1184 self, 1185 ) -> exp.ToTableProperty: 1186 table = self._parse_table_parts(schema=True) 1187 return self.expression(exp.ToTableProperty, this=table) 1188 1189 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1190 def _parse_ttl(self) -> exp.Expression: 1191 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1192 this = self._parse_bitwise() 1193 1194 if self._match_text_seq("DELETE"): 1195 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1196 if self._match_text_seq("RECOMPRESS"): 1197 return self.expression( 1198 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1199 ) 1200 if self._match_text_seq("TO", "DISK"): 1201 return self.expression( 1202 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1203 ) 1204 if self._match_text_seq("TO", "VOLUME"): 1205 return self.expression( 1206 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1207 ) 1208 1209 return this 1210 1211 expressions = self._parse_csv(_parse_ttl_action) 1212 where = self._parse_where() 1213 group = self._parse_group() 1214 1215 aggregates = None 1216 if group and self._match(TokenType.SET): 1217 aggregates = self._parse_csv(self._parse_set_item) 1218 1219 return self.expression( 1220 exp.MergeTreeTTL, 1221 expressions=expressions, 1222 where=where, 1223 group=group, 1224 aggregates=aggregates, 1225 ) 1226 1227 def _parse_statement(self) -> t.Optional[exp.Expression]: 1228 if self._curr is None: 1229 return None 1230 1231 if self._match_set(self.STATEMENT_PARSERS): 1232 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1233 1234 if self._match_set(Tokenizer.COMMANDS): 1235 return self._parse_command() 1236 1237 expression = self._parse_expression() 1238 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1239 return self._parse_query_modifiers(expression) 1240 1241 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1242 start = self._prev 1243 temporary = self._match(TokenType.TEMPORARY) 1244 materialized = self._match_text_seq("MATERIALIZED") 1245 1246 kind = self._match_set(self.CREATABLES) and self._prev.text 1247 if not kind: 1248 return self._parse_as_command(start) 1249 1250 return self.expression( 1251 exp.Drop, 1252 comments=start.comments, 1253 exists=exists or self._parse_exists(), 1254 this=self._parse_table(schema=True), 1255 kind=kind, 1256 temporary=temporary, 1257 materialized=materialized, 1258 cascade=self._match_text_seq("CASCADE"), 1259 constraints=self._match_text_seq("CONSTRAINTS"), 1260 purge=self._match_text_seq("PURGE"), 1261 ) 1262 1263 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1264 return ( 1265 self._match_text_seq("IF") 1266 and (not not_ or self._match(TokenType.NOT)) 1267 and self._match(TokenType.EXISTS) 1268 ) 1269 1270 def _parse_create(self) -> exp.Create | exp.Command: 1271 # Note: this can't be None because we've matched a statement parser 1272 start = self._prev 1273 comments = self._prev_comments 1274 1275 replace = start.text.upper() == "REPLACE" or self._match_pair( 1276 TokenType.OR, TokenType.REPLACE 1277 ) 1278 unique = self._match(TokenType.UNIQUE) 1279 1280 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1281 self._advance() 1282 1283 properties = None 1284 create_token = self._match_set(self.CREATABLES) and self._prev 1285 1286 if not create_token: 1287 # exp.Properties.Location.POST_CREATE 1288 properties = self._parse_properties() 1289 create_token = self._match_set(self.CREATABLES) and self._prev 1290 1291 if not properties or not create_token: 1292 return self._parse_as_command(start) 1293 1294 exists = self._parse_exists(not_=True) 1295 this = None 1296 expression: t.Optional[exp.Expression] = None 1297 indexes = None 1298 no_schema_binding = None 1299 begin = None 1300 end = None 1301 clone = None 1302 1303 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1304 nonlocal properties 1305 if properties and temp_props: 1306 properties.expressions.extend(temp_props.expressions) 1307 elif temp_props: 1308 properties = temp_props 1309 1310 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1311 this = self._parse_user_defined_function(kind=create_token.token_type) 1312 1313 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1314 extend_props(self._parse_properties()) 1315 1316 self._match(TokenType.ALIAS) 1317 1318 if self._match(TokenType.COMMAND): 1319 expression = self._parse_as_command(self._prev) 1320 else: 1321 begin = self._match(TokenType.BEGIN) 1322 return_ = self._match_text_seq("RETURN") 1323 1324 if self._match(TokenType.STRING, advance=False): 1325 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1326 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1327 expression = self._parse_string() 1328 extend_props(self._parse_properties()) 1329 else: 1330 expression = self._parse_statement() 1331 1332 end = self._match_text_seq("END") 1333 1334 if return_: 1335 expression = self.expression(exp.Return, this=expression) 1336 elif create_token.token_type == TokenType.INDEX: 1337 this = self._parse_index(index=self._parse_id_var()) 1338 elif create_token.token_type in self.DB_CREATABLES: 1339 table_parts = self._parse_table_parts(schema=True) 1340 1341 # exp.Properties.Location.POST_NAME 1342 self._match(TokenType.COMMA) 1343 extend_props(self._parse_properties(before=True)) 1344 1345 this = self._parse_schema(this=table_parts) 1346 1347 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1348 extend_props(self._parse_properties()) 1349 1350 self._match(TokenType.ALIAS) 1351 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1352 # exp.Properties.Location.POST_ALIAS 1353 extend_props(self._parse_properties()) 1354 1355 expression = self._parse_ddl_select() 1356 1357 if create_token.token_type == TokenType.TABLE: 1358 # exp.Properties.Location.POST_EXPRESSION 1359 extend_props(self._parse_properties()) 1360 1361 indexes = [] 1362 while True: 1363 index = self._parse_index() 1364 1365 # exp.Properties.Location.POST_INDEX 1366 extend_props(self._parse_properties()) 1367 1368 if not index: 1369 break 1370 else: 1371 self._match(TokenType.COMMA) 1372 indexes.append(index) 1373 elif create_token.token_type == TokenType.VIEW: 1374 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1375 no_schema_binding = True 1376 1377 shallow = self._match_text_seq("SHALLOW") 1378 1379 if self._match_texts(self.CLONE_KEYWORDS): 1380 copy = self._prev.text.lower() == "copy" 1381 clone = self._parse_table(schema=True) 1382 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1383 clone_kind = ( 1384 self._match(TokenType.L_PAREN) 1385 and self._match_texts(self.CLONE_KINDS) 1386 and self._prev.text.upper() 1387 ) 1388 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1389 self._match(TokenType.R_PAREN) 1390 clone = self.expression( 1391 exp.Clone, 1392 this=clone, 1393 when=when, 1394 kind=clone_kind, 1395 shallow=shallow, 1396 expression=clone_expression, 1397 copy=copy, 1398 ) 1399 1400 return self.expression( 1401 exp.Create, 1402 comments=comments, 1403 this=this, 1404 kind=create_token.text, 1405 replace=replace, 1406 unique=unique, 1407 expression=expression, 1408 exists=exists, 1409 properties=properties, 1410 indexes=indexes, 1411 no_schema_binding=no_schema_binding, 1412 begin=begin, 1413 end=end, 1414 clone=clone, 1415 ) 1416 1417 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1418 # only used for teradata currently 1419 self._match(TokenType.COMMA) 1420 1421 kwargs = { 1422 "no": self._match_text_seq("NO"), 1423 "dual": self._match_text_seq("DUAL"), 1424 "before": self._match_text_seq("BEFORE"), 1425 "default": self._match_text_seq("DEFAULT"), 1426 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1427 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1428 "after": self._match_text_seq("AFTER"), 1429 "minimum": self._match_texts(("MIN", "MINIMUM")), 1430 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1431 } 1432 1433 if self._match_texts(self.PROPERTY_PARSERS): 1434 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1435 try: 1436 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1437 except TypeError: 1438 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1439 1440 return None 1441 1442 def _parse_property(self) -> t.Optional[exp.Expression]: 1443 if self._match_texts(self.PROPERTY_PARSERS): 1444 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1445 1446 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1447 return self._parse_character_set(default=True) 1448 1449 if self._match_text_seq("COMPOUND", "SORTKEY"): 1450 return self._parse_sortkey(compound=True) 1451 1452 if self._match_text_seq("SQL", "SECURITY"): 1453 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1454 1455 index = self._index 1456 key = self._parse_column() 1457 1458 if not self._match(TokenType.EQ): 1459 self._retreat(index) 1460 return None 1461 1462 return self.expression( 1463 exp.Property, 1464 this=key.to_dot() if isinstance(key, exp.Column) else key, 1465 value=self._parse_column() or self._parse_var(any_token=True), 1466 ) 1467 1468 def _parse_stored(self) -> exp.FileFormatProperty: 1469 self._match(TokenType.ALIAS) 1470 1471 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1472 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1473 1474 return self.expression( 1475 exp.FileFormatProperty, 1476 this=self.expression( 1477 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1478 ) 1479 if input_format or output_format 1480 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1481 ) 1482 1483 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1484 self._match(TokenType.EQ) 1485 self._match(TokenType.ALIAS) 1486 return self.expression(exp_class, this=self._parse_field()) 1487 1488 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1489 properties = [] 1490 while True: 1491 if before: 1492 prop = self._parse_property_before() 1493 else: 1494 prop = self._parse_property() 1495 1496 if not prop: 1497 break 1498 for p in ensure_list(prop): 1499 properties.append(p) 1500 1501 if properties: 1502 return self.expression(exp.Properties, expressions=properties) 1503 1504 return None 1505 1506 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1507 return self.expression( 1508 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1509 ) 1510 1511 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1512 if self._index >= 2: 1513 pre_volatile_token = self._tokens[self._index - 2] 1514 else: 1515 pre_volatile_token = None 1516 1517 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1518 return exp.VolatileProperty() 1519 1520 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1521 1522 def _parse_with_property( 1523 self, 1524 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1525 if self._match(TokenType.L_PAREN, advance=False): 1526 return self._parse_wrapped_csv(self._parse_property) 1527 1528 if self._match_text_seq("JOURNAL"): 1529 return self._parse_withjournaltable() 1530 1531 if self._match_text_seq("DATA"): 1532 return self._parse_withdata(no=False) 1533 elif self._match_text_seq("NO", "DATA"): 1534 return self._parse_withdata(no=True) 1535 1536 if not self._next: 1537 return None 1538 1539 return self._parse_withisolatedloading() 1540 1541 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1542 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1543 self._match(TokenType.EQ) 1544 1545 user = self._parse_id_var() 1546 self._match(TokenType.PARAMETER) 1547 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1548 1549 if not user or not host: 1550 return None 1551 1552 return exp.DefinerProperty(this=f"{user}@{host}") 1553 1554 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1555 self._match(TokenType.TABLE) 1556 self._match(TokenType.EQ) 1557 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1558 1559 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1560 return self.expression(exp.LogProperty, no=no) 1561 1562 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1563 return self.expression(exp.JournalProperty, **kwargs) 1564 1565 def _parse_checksum(self) -> exp.ChecksumProperty: 1566 self._match(TokenType.EQ) 1567 1568 on = None 1569 if self._match(TokenType.ON): 1570 on = True 1571 elif self._match_text_seq("OFF"): 1572 on = False 1573 1574 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1575 1576 def _parse_cluster(self) -> exp.Cluster: 1577 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1578 1579 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1580 self._match_text_seq("BY") 1581 1582 self._match_l_paren() 1583 expressions = self._parse_csv(self._parse_column) 1584 self._match_r_paren() 1585 1586 if self._match_text_seq("SORTED", "BY"): 1587 self._match_l_paren() 1588 sorted_by = self._parse_csv(self._parse_ordered) 1589 self._match_r_paren() 1590 else: 1591 sorted_by = None 1592 1593 self._match(TokenType.INTO) 1594 buckets = self._parse_number() 1595 self._match_text_seq("BUCKETS") 1596 1597 return self.expression( 1598 exp.ClusteredByProperty, 1599 expressions=expressions, 1600 sorted_by=sorted_by, 1601 buckets=buckets, 1602 ) 1603 1604 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1605 if not self._match_text_seq("GRANTS"): 1606 self._retreat(self._index - 1) 1607 return None 1608 1609 return self.expression(exp.CopyGrantsProperty) 1610 1611 def _parse_freespace(self) -> exp.FreespaceProperty: 1612 self._match(TokenType.EQ) 1613 return self.expression( 1614 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1615 ) 1616 1617 def _parse_mergeblockratio( 1618 self, no: bool = False, default: bool = False 1619 ) -> exp.MergeBlockRatioProperty: 1620 if self._match(TokenType.EQ): 1621 return self.expression( 1622 exp.MergeBlockRatioProperty, 1623 this=self._parse_number(), 1624 percent=self._match(TokenType.PERCENT), 1625 ) 1626 1627 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1628 1629 def _parse_datablocksize( 1630 self, 1631 default: t.Optional[bool] = None, 1632 minimum: t.Optional[bool] = None, 1633 maximum: t.Optional[bool] = None, 1634 ) -> exp.DataBlocksizeProperty: 1635 self._match(TokenType.EQ) 1636 size = self._parse_number() 1637 1638 units = None 1639 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1640 units = self._prev.text 1641 1642 return self.expression( 1643 exp.DataBlocksizeProperty, 1644 size=size, 1645 units=units, 1646 default=default, 1647 minimum=minimum, 1648 maximum=maximum, 1649 ) 1650 1651 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1652 self._match(TokenType.EQ) 1653 always = self._match_text_seq("ALWAYS") 1654 manual = self._match_text_seq("MANUAL") 1655 never = self._match_text_seq("NEVER") 1656 default = self._match_text_seq("DEFAULT") 1657 1658 autotemp = None 1659 if self._match_text_seq("AUTOTEMP"): 1660 autotemp = self._parse_schema() 1661 1662 return self.expression( 1663 exp.BlockCompressionProperty, 1664 always=always, 1665 manual=manual, 1666 never=never, 1667 default=default, 1668 autotemp=autotemp, 1669 ) 1670 1671 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1672 no = self._match_text_seq("NO") 1673 concurrent = self._match_text_seq("CONCURRENT") 1674 self._match_text_seq("ISOLATED", "LOADING") 1675 for_all = self._match_text_seq("FOR", "ALL") 1676 for_insert = self._match_text_seq("FOR", "INSERT") 1677 for_none = self._match_text_seq("FOR", "NONE") 1678 return self.expression( 1679 exp.IsolatedLoadingProperty, 1680 no=no, 1681 concurrent=concurrent, 1682 for_all=for_all, 1683 for_insert=for_insert, 1684 for_none=for_none, 1685 ) 1686 1687 def _parse_locking(self) -> exp.LockingProperty: 1688 if self._match(TokenType.TABLE): 1689 kind = "TABLE" 1690 elif self._match(TokenType.VIEW): 1691 kind = "VIEW" 1692 elif self._match(TokenType.ROW): 1693 kind = "ROW" 1694 elif self._match_text_seq("DATABASE"): 1695 kind = "DATABASE" 1696 else: 1697 kind = None 1698 1699 if kind in ("DATABASE", "TABLE", "VIEW"): 1700 this = self._parse_table_parts() 1701 else: 1702 this = None 1703 1704 if self._match(TokenType.FOR): 1705 for_or_in = "FOR" 1706 elif self._match(TokenType.IN): 1707 for_or_in = "IN" 1708 else: 1709 for_or_in = None 1710 1711 if self._match_text_seq("ACCESS"): 1712 lock_type = "ACCESS" 1713 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1714 lock_type = "EXCLUSIVE" 1715 elif self._match_text_seq("SHARE"): 1716 lock_type = "SHARE" 1717 elif self._match_text_seq("READ"): 1718 lock_type = "READ" 1719 elif self._match_text_seq("WRITE"): 1720 lock_type = "WRITE" 1721 elif self._match_text_seq("CHECKSUM"): 1722 lock_type = "CHECKSUM" 1723 else: 1724 lock_type = None 1725 1726 override = self._match_text_seq("OVERRIDE") 1727 1728 return self.expression( 1729 exp.LockingProperty, 1730 this=this, 1731 kind=kind, 1732 for_or_in=for_or_in, 1733 lock_type=lock_type, 1734 override=override, 1735 ) 1736 1737 def _parse_partition_by(self) -> t.List[exp.Expression]: 1738 if self._match(TokenType.PARTITION_BY): 1739 return self._parse_csv(self._parse_conjunction) 1740 return [] 1741 1742 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1743 self._match(TokenType.EQ) 1744 return self.expression( 1745 exp.PartitionedByProperty, 1746 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1747 ) 1748 1749 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1750 if self._match_text_seq("AND", "STATISTICS"): 1751 statistics = True 1752 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1753 statistics = False 1754 else: 1755 statistics = None 1756 1757 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1758 1759 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1760 if self._match_text_seq("PRIMARY", "INDEX"): 1761 return exp.NoPrimaryIndexProperty() 1762 return None 1763 1764 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1765 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1766 return exp.OnCommitProperty() 1767 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1768 return exp.OnCommitProperty(delete=True) 1769 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1770 1771 def _parse_distkey(self) -> exp.DistKeyProperty: 1772 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1773 1774 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1775 table = self._parse_table(schema=True) 1776 1777 options = [] 1778 while self._match_texts(("INCLUDING", "EXCLUDING")): 1779 this = self._prev.text.upper() 1780 1781 id_var = self._parse_id_var() 1782 if not id_var: 1783 return None 1784 1785 options.append( 1786 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1787 ) 1788 1789 return self.expression(exp.LikeProperty, this=table, expressions=options) 1790 1791 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1792 return self.expression( 1793 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1794 ) 1795 1796 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1797 self._match(TokenType.EQ) 1798 return self.expression( 1799 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1800 ) 1801 1802 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1803 self._match_text_seq("WITH", "CONNECTION") 1804 return self.expression( 1805 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1806 ) 1807 1808 def _parse_returns(self) -> exp.ReturnsProperty: 1809 value: t.Optional[exp.Expression] 1810 is_table = self._match(TokenType.TABLE) 1811 1812 if is_table: 1813 if self._match(TokenType.LT): 1814 value = self.expression( 1815 exp.Schema, 1816 this="TABLE", 1817 expressions=self._parse_csv(self._parse_struct_types), 1818 ) 1819 if not self._match(TokenType.GT): 1820 self.raise_error("Expecting >") 1821 else: 1822 value = self._parse_schema(exp.var("TABLE")) 1823 else: 1824 value = self._parse_types() 1825 1826 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1827 1828 def _parse_describe(self) -> exp.Describe: 1829 kind = self._match_set(self.CREATABLES) and self._prev.text 1830 this = self._parse_table(schema=True) 1831 properties = self._parse_properties() 1832 expressions = properties.expressions if properties else None 1833 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1834 1835 def _parse_insert(self) -> exp.Insert: 1836 comments = ensure_list(self._prev_comments) 1837 overwrite = self._match(TokenType.OVERWRITE) 1838 ignore = self._match(TokenType.IGNORE) 1839 local = self._match_text_seq("LOCAL") 1840 alternative = None 1841 1842 if self._match_text_seq("DIRECTORY"): 1843 this: t.Optional[exp.Expression] = self.expression( 1844 exp.Directory, 1845 this=self._parse_var_or_string(), 1846 local=local, 1847 row_format=self._parse_row_format(match_row=True), 1848 ) 1849 else: 1850 if self._match(TokenType.OR): 1851 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1852 1853 self._match(TokenType.INTO) 1854 comments += ensure_list(self._prev_comments) 1855 self._match(TokenType.TABLE) 1856 this = self._parse_table(schema=True) 1857 1858 returning = self._parse_returning() 1859 1860 return self.expression( 1861 exp.Insert, 1862 comments=comments, 1863 this=this, 1864 by_name=self._match_text_seq("BY", "NAME"), 1865 exists=self._parse_exists(), 1866 partition=self._parse_partition(), 1867 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1868 and self._parse_conjunction(), 1869 expression=self._parse_ddl_select(), 1870 conflict=self._parse_on_conflict(), 1871 returning=returning or self._parse_returning(), 1872 overwrite=overwrite, 1873 alternative=alternative, 1874 ignore=ignore, 1875 ) 1876 1877 def _parse_kill(self) -> exp.Kill: 1878 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1879 1880 return self.expression( 1881 exp.Kill, 1882 this=self._parse_primary(), 1883 kind=kind, 1884 ) 1885 1886 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1887 conflict = self._match_text_seq("ON", "CONFLICT") 1888 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1889 1890 if not conflict and not duplicate: 1891 return None 1892 1893 nothing = None 1894 expressions = None 1895 key = None 1896 constraint = None 1897 1898 if conflict: 1899 if self._match_text_seq("ON", "CONSTRAINT"): 1900 constraint = self._parse_id_var() 1901 else: 1902 key = self._parse_csv(self._parse_value) 1903 1904 self._match_text_seq("DO") 1905 if self._match_text_seq("NOTHING"): 1906 nothing = True 1907 else: 1908 self._match(TokenType.UPDATE) 1909 self._match(TokenType.SET) 1910 expressions = self._parse_csv(self._parse_equality) 1911 1912 return self.expression( 1913 exp.OnConflict, 1914 duplicate=duplicate, 1915 expressions=expressions, 1916 nothing=nothing, 1917 key=key, 1918 constraint=constraint, 1919 ) 1920 1921 def _parse_returning(self) -> t.Optional[exp.Returning]: 1922 if not self._match(TokenType.RETURNING): 1923 return None 1924 return self.expression( 1925 exp.Returning, 1926 expressions=self._parse_csv(self._parse_expression), 1927 into=self._match(TokenType.INTO) and self._parse_table_part(), 1928 ) 1929 1930 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1931 if not self._match(TokenType.FORMAT): 1932 return None 1933 return self._parse_row_format() 1934 1935 def _parse_row_format( 1936 self, match_row: bool = False 1937 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1938 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1939 return None 1940 1941 if self._match_text_seq("SERDE"): 1942 this = self._parse_string() 1943 1944 serde_properties = None 1945 if self._match(TokenType.SERDE_PROPERTIES): 1946 serde_properties = self.expression( 1947 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1948 ) 1949 1950 return self.expression( 1951 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1952 ) 1953 1954 self._match_text_seq("DELIMITED") 1955 1956 kwargs = {} 1957 1958 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1959 kwargs["fields"] = self._parse_string() 1960 if self._match_text_seq("ESCAPED", "BY"): 1961 kwargs["escaped"] = self._parse_string() 1962 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1963 kwargs["collection_items"] = self._parse_string() 1964 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1965 kwargs["map_keys"] = self._parse_string() 1966 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1967 kwargs["lines"] = self._parse_string() 1968 if self._match_text_seq("NULL", "DEFINED", "AS"): 1969 kwargs["null"] = self._parse_string() 1970 1971 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1972 1973 def _parse_load(self) -> exp.LoadData | exp.Command: 1974 if self._match_text_seq("DATA"): 1975 local = self._match_text_seq("LOCAL") 1976 self._match_text_seq("INPATH") 1977 inpath = self._parse_string() 1978 overwrite = self._match(TokenType.OVERWRITE) 1979 self._match_pair(TokenType.INTO, TokenType.TABLE) 1980 1981 return self.expression( 1982 exp.LoadData, 1983 this=self._parse_table(schema=True), 1984 local=local, 1985 overwrite=overwrite, 1986 inpath=inpath, 1987 partition=self._parse_partition(), 1988 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1989 serde=self._match_text_seq("SERDE") and self._parse_string(), 1990 ) 1991 return self._parse_as_command(self._prev) 1992 1993 def _parse_delete(self) -> exp.Delete: 1994 # This handles MySQL's "Multiple-Table Syntax" 1995 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1996 tables = None 1997 comments = self._prev_comments 1998 if not self._match(TokenType.FROM, advance=False): 1999 tables = self._parse_csv(self._parse_table) or None 2000 2001 returning = self._parse_returning() 2002 2003 return self.expression( 2004 exp.Delete, 2005 comments=comments, 2006 tables=tables, 2007 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2008 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2009 where=self._parse_where(), 2010 returning=returning or self._parse_returning(), 2011 limit=self._parse_limit(), 2012 ) 2013 2014 def _parse_update(self) -> exp.Update: 2015 comments = self._prev_comments 2016 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2017 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2018 returning = self._parse_returning() 2019 return self.expression( 2020 exp.Update, 2021 comments=comments, 2022 **{ # type: ignore 2023 "this": this, 2024 "expressions": expressions, 2025 "from": self._parse_from(joins=True), 2026 "where": self._parse_where(), 2027 "returning": returning or self._parse_returning(), 2028 "order": self._parse_order(), 2029 "limit": self._parse_limit(), 2030 }, 2031 ) 2032 2033 def _parse_uncache(self) -> exp.Uncache: 2034 if not self._match(TokenType.TABLE): 2035 self.raise_error("Expecting TABLE after UNCACHE") 2036 2037 return self.expression( 2038 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2039 ) 2040 2041 def _parse_cache(self) -> exp.Cache: 2042 lazy = self._match_text_seq("LAZY") 2043 self._match(TokenType.TABLE) 2044 table = self._parse_table(schema=True) 2045 2046 options = [] 2047 if self._match_text_seq("OPTIONS"): 2048 self._match_l_paren() 2049 k = self._parse_string() 2050 self._match(TokenType.EQ) 2051 v = self._parse_string() 2052 options = [k, v] 2053 self._match_r_paren() 2054 2055 self._match(TokenType.ALIAS) 2056 return self.expression( 2057 exp.Cache, 2058 this=table, 2059 lazy=lazy, 2060 options=options, 2061 expression=self._parse_select(nested=True), 2062 ) 2063 2064 def _parse_partition(self) -> t.Optional[exp.Partition]: 2065 if not self._match(TokenType.PARTITION): 2066 return None 2067 2068 return self.expression( 2069 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2070 ) 2071 2072 def _parse_value(self) -> exp.Tuple: 2073 if self._match(TokenType.L_PAREN): 2074 expressions = self._parse_csv(self._parse_conjunction) 2075 self._match_r_paren() 2076 return self.expression(exp.Tuple, expressions=expressions) 2077 2078 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2079 # https://prestodb.io/docs/current/sql/values.html 2080 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2081 2082 def _parse_projections(self) -> t.List[exp.Expression]: 2083 return self._parse_expressions() 2084 2085 def _parse_select( 2086 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2087 ) -> t.Optional[exp.Expression]: 2088 cte = self._parse_with() 2089 2090 if cte: 2091 this = self._parse_statement() 2092 2093 if not this: 2094 self.raise_error("Failed to parse any statement following CTE") 2095 return cte 2096 2097 if "with" in this.arg_types: 2098 this.set("with", cte) 2099 else: 2100 self.raise_error(f"{this.key} does not support CTE") 2101 this = cte 2102 2103 return this 2104 2105 # duckdb supports leading with FROM x 2106 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2107 2108 if self._match(TokenType.SELECT): 2109 comments = self._prev_comments 2110 2111 hint = self._parse_hint() 2112 all_ = self._match(TokenType.ALL) 2113 distinct = self._match_set(self.DISTINCT_TOKENS) 2114 2115 kind = ( 2116 self._match(TokenType.ALIAS) 2117 and self._match_texts(("STRUCT", "VALUE")) 2118 and self._prev.text 2119 ) 2120 2121 if distinct: 2122 distinct = self.expression( 2123 exp.Distinct, 2124 on=self._parse_value() if self._match(TokenType.ON) else None, 2125 ) 2126 2127 if all_ and distinct: 2128 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2129 2130 limit = self._parse_limit(top=True) 2131 projections = self._parse_projections() 2132 2133 this = self.expression( 2134 exp.Select, 2135 kind=kind, 2136 hint=hint, 2137 distinct=distinct, 2138 expressions=projections, 2139 limit=limit, 2140 ) 2141 this.comments = comments 2142 2143 into = self._parse_into() 2144 if into: 2145 this.set("into", into) 2146 2147 if not from_: 2148 from_ = self._parse_from() 2149 2150 if from_: 2151 this.set("from", from_) 2152 2153 this = self._parse_query_modifiers(this) 2154 elif (table or nested) and self._match(TokenType.L_PAREN): 2155 if self._match(TokenType.PIVOT): 2156 this = self._parse_simplified_pivot() 2157 elif self._match(TokenType.FROM): 2158 this = exp.select("*").from_( 2159 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2160 ) 2161 else: 2162 this = self._parse_table() if table else self._parse_select(nested=True) 2163 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2164 2165 self._match_r_paren() 2166 2167 # We return early here so that the UNION isn't attached to the subquery by the 2168 # following call to _parse_set_operations, but instead becomes the parent node 2169 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2170 elif self._match(TokenType.VALUES): 2171 this = self.expression( 2172 exp.Values, 2173 expressions=self._parse_csv(self._parse_value), 2174 alias=self._parse_table_alias(), 2175 ) 2176 elif from_: 2177 this = exp.select("*").from_(from_.this, copy=False) 2178 else: 2179 this = None 2180 2181 return self._parse_set_operations(this) 2182 2183 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2184 if not skip_with_token and not self._match(TokenType.WITH): 2185 return None 2186 2187 comments = self._prev_comments 2188 recursive = self._match(TokenType.RECURSIVE) 2189 2190 expressions = [] 2191 while True: 2192 expressions.append(self._parse_cte()) 2193 2194 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2195 break 2196 else: 2197 self._match(TokenType.WITH) 2198 2199 return self.expression( 2200 exp.With, comments=comments, expressions=expressions, recursive=recursive 2201 ) 2202 2203 def _parse_cte(self) -> exp.CTE: 2204 alias = self._parse_table_alias() 2205 if not alias or not alias.this: 2206 self.raise_error("Expected CTE to have alias") 2207 2208 self._match(TokenType.ALIAS) 2209 return self.expression( 2210 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2211 ) 2212 2213 def _parse_table_alias( 2214 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2215 ) -> t.Optional[exp.TableAlias]: 2216 any_token = self._match(TokenType.ALIAS) 2217 alias = ( 2218 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2219 or self._parse_string_as_identifier() 2220 ) 2221 2222 index = self._index 2223 if self._match(TokenType.L_PAREN): 2224 columns = self._parse_csv(self._parse_function_parameter) 2225 self._match_r_paren() if columns else self._retreat(index) 2226 else: 2227 columns = None 2228 2229 if not alias and not columns: 2230 return None 2231 2232 return self.expression(exp.TableAlias, this=alias, columns=columns) 2233 2234 def _parse_subquery( 2235 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2236 ) -> t.Optional[exp.Subquery]: 2237 if not this: 2238 return None 2239 2240 return self.expression( 2241 exp.Subquery, 2242 this=this, 2243 pivots=self._parse_pivots(), 2244 alias=self._parse_table_alias() if parse_alias else None, 2245 ) 2246 2247 def _parse_query_modifiers( 2248 self, this: t.Optional[exp.Expression] 2249 ) -> t.Optional[exp.Expression]: 2250 if isinstance(this, self.MODIFIABLES): 2251 for join in iter(self._parse_join, None): 2252 this.append("joins", join) 2253 for lateral in iter(self._parse_lateral, None): 2254 this.append("laterals", lateral) 2255 2256 while True: 2257 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2258 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2259 key, expression = parser(self) 2260 2261 if expression: 2262 this.set(key, expression) 2263 if key == "limit": 2264 offset = expression.args.pop("offset", None) 2265 if offset: 2266 this.set("offset", exp.Offset(expression=offset)) 2267 continue 2268 break 2269 return this 2270 2271 def _parse_hint(self) -> t.Optional[exp.Hint]: 2272 if self._match(TokenType.HINT): 2273 hints = [] 2274 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2275 hints.extend(hint) 2276 2277 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2278 self.raise_error("Expected */ after HINT") 2279 2280 return self.expression(exp.Hint, expressions=hints) 2281 2282 return None 2283 2284 def _parse_into(self) -> t.Optional[exp.Into]: 2285 if not self._match(TokenType.INTO): 2286 return None 2287 2288 temp = self._match(TokenType.TEMPORARY) 2289 unlogged = self._match_text_seq("UNLOGGED") 2290 self._match(TokenType.TABLE) 2291 2292 return self.expression( 2293 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2294 ) 2295 2296 def _parse_from( 2297 self, joins: bool = False, skip_from_token: bool = False 2298 ) -> t.Optional[exp.From]: 2299 if not skip_from_token and not self._match(TokenType.FROM): 2300 return None 2301 2302 return self.expression( 2303 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2304 ) 2305 2306 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2307 if not self._match(TokenType.MATCH_RECOGNIZE): 2308 return None 2309 2310 self._match_l_paren() 2311 2312 partition = self._parse_partition_by() 2313 order = self._parse_order() 2314 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2315 2316 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2317 rows = exp.var("ONE ROW PER MATCH") 2318 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2319 text = "ALL ROWS PER MATCH" 2320 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2321 text += f" SHOW EMPTY MATCHES" 2322 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2323 text += f" OMIT EMPTY MATCHES" 2324 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2325 text += f" WITH UNMATCHED ROWS" 2326 rows = exp.var(text) 2327 else: 2328 rows = None 2329 2330 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2331 text = "AFTER MATCH SKIP" 2332 if self._match_text_seq("PAST", "LAST", "ROW"): 2333 text += f" PAST LAST ROW" 2334 elif self._match_text_seq("TO", "NEXT", "ROW"): 2335 text += f" TO NEXT ROW" 2336 elif self._match_text_seq("TO", "FIRST"): 2337 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2338 elif self._match_text_seq("TO", "LAST"): 2339 text += f" TO LAST {self._advance_any().text}" # type: ignore 2340 after = exp.var(text) 2341 else: 2342 after = None 2343 2344 if self._match_text_seq("PATTERN"): 2345 self._match_l_paren() 2346 2347 if not self._curr: 2348 self.raise_error("Expecting )", self._curr) 2349 2350 paren = 1 2351 start = self._curr 2352 2353 while self._curr and paren > 0: 2354 if self._curr.token_type == TokenType.L_PAREN: 2355 paren += 1 2356 if self._curr.token_type == TokenType.R_PAREN: 2357 paren -= 1 2358 2359 end = self._prev 2360 self._advance() 2361 2362 if paren > 0: 2363 self.raise_error("Expecting )", self._curr) 2364 2365 pattern = exp.var(self._find_sql(start, end)) 2366 else: 2367 pattern = None 2368 2369 define = ( 2370 self._parse_csv( 2371 lambda: self.expression( 2372 exp.Alias, 2373 alias=self._parse_id_var(any_token=True), 2374 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2375 ) 2376 ) 2377 if self._match_text_seq("DEFINE") 2378 else None 2379 ) 2380 2381 self._match_r_paren() 2382 2383 return self.expression( 2384 exp.MatchRecognize, 2385 partition_by=partition, 2386 order=order, 2387 measures=measures, 2388 rows=rows, 2389 after=after, 2390 pattern=pattern, 2391 define=define, 2392 alias=self._parse_table_alias(), 2393 ) 2394 2395 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2396 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2397 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2398 2399 if outer_apply or cross_apply: 2400 this = self._parse_select(table=True) 2401 view = None 2402 outer = not cross_apply 2403 elif self._match(TokenType.LATERAL): 2404 this = self._parse_select(table=True) 2405 view = self._match(TokenType.VIEW) 2406 outer = self._match(TokenType.OUTER) 2407 else: 2408 return None 2409 2410 if not this: 2411 this = ( 2412 self._parse_unnest() 2413 or self._parse_function() 2414 or self._parse_id_var(any_token=False) 2415 ) 2416 2417 while self._match(TokenType.DOT): 2418 this = exp.Dot( 2419 this=this, 2420 expression=self._parse_function() or self._parse_id_var(any_token=False), 2421 ) 2422 2423 if view: 2424 table = self._parse_id_var(any_token=False) 2425 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2426 table_alias: t.Optional[exp.TableAlias] = self.expression( 2427 exp.TableAlias, this=table, columns=columns 2428 ) 2429 elif isinstance(this, exp.Subquery) and this.alias: 2430 # Ensures parity between the Subquery's and the Lateral's "alias" args 2431 table_alias = this.args["alias"].copy() 2432 else: 2433 table_alias = self._parse_table_alias() 2434 2435 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2436 2437 def _parse_join_parts( 2438 self, 2439 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2440 return ( 2441 self._match_set(self.JOIN_METHODS) and self._prev, 2442 self._match_set(self.JOIN_SIDES) and self._prev, 2443 self._match_set(self.JOIN_KINDS) and self._prev, 2444 ) 2445 2446 def _parse_join( 2447 self, skip_join_token: bool = False, parse_bracket: bool = False 2448 ) -> t.Optional[exp.Join]: 2449 if self._match(TokenType.COMMA): 2450 return self.expression(exp.Join, this=self._parse_table()) 2451 2452 index = self._index 2453 method, side, kind = self._parse_join_parts() 2454 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2455 join = self._match(TokenType.JOIN) 2456 2457 if not skip_join_token and not join: 2458 self._retreat(index) 2459 kind = None 2460 method = None 2461 side = None 2462 2463 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2464 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2465 2466 if not skip_join_token and not join and not outer_apply and not cross_apply: 2467 return None 2468 2469 if outer_apply: 2470 side = Token(TokenType.LEFT, "LEFT") 2471 2472 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2473 2474 if method: 2475 kwargs["method"] = method.text 2476 if side: 2477 kwargs["side"] = side.text 2478 if kind: 2479 kwargs["kind"] = kind.text 2480 if hint: 2481 kwargs["hint"] = hint 2482 2483 if self._match(TokenType.ON): 2484 kwargs["on"] = self._parse_conjunction() 2485 elif self._match(TokenType.USING): 2486 kwargs["using"] = self._parse_wrapped_id_vars() 2487 elif not (kind and kind.token_type == TokenType.CROSS): 2488 index = self._index 2489 join = self._parse_join() 2490 2491 if join and self._match(TokenType.ON): 2492 kwargs["on"] = self._parse_conjunction() 2493 elif join and self._match(TokenType.USING): 2494 kwargs["using"] = self._parse_wrapped_id_vars() 2495 else: 2496 join = None 2497 self._retreat(index) 2498 2499 kwargs["this"].set("joins", [join] if join else None) 2500 2501 comments = [c for token in (method, side, kind) if token for c in token.comments] 2502 return self.expression(exp.Join, comments=comments, **kwargs) 2503 2504 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2505 this = self._parse_conjunction() 2506 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2507 return this 2508 2509 opclass = self._parse_var(any_token=True) 2510 if opclass: 2511 return self.expression(exp.Opclass, this=this, expression=opclass) 2512 2513 return this 2514 2515 def _parse_index( 2516 self, 2517 index: t.Optional[exp.Expression] = None, 2518 ) -> t.Optional[exp.Index]: 2519 if index: 2520 unique = None 2521 primary = None 2522 amp = None 2523 2524 self._match(TokenType.ON) 2525 self._match(TokenType.TABLE) # hive 2526 table = self._parse_table_parts(schema=True) 2527 else: 2528 unique = self._match(TokenType.UNIQUE) 2529 primary = self._match_text_seq("PRIMARY") 2530 amp = self._match_text_seq("AMP") 2531 2532 if not self._match(TokenType.INDEX): 2533 return None 2534 2535 index = self._parse_id_var() 2536 table = None 2537 2538 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2539 2540 if self._match(TokenType.L_PAREN, advance=False): 2541 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2542 else: 2543 columns = None 2544 2545 return self.expression( 2546 exp.Index, 2547 this=index, 2548 table=table, 2549 using=using, 2550 columns=columns, 2551 unique=unique, 2552 primary=primary, 2553 amp=amp, 2554 partition_by=self._parse_partition_by(), 2555 where=self._parse_where(), 2556 ) 2557 2558 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2559 hints: t.List[exp.Expression] = [] 2560 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2561 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2562 hints.append( 2563 self.expression( 2564 exp.WithTableHint, 2565 expressions=self._parse_csv( 2566 lambda: self._parse_function() or self._parse_var(any_token=True) 2567 ), 2568 ) 2569 ) 2570 self._match_r_paren() 2571 else: 2572 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2573 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2574 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2575 2576 self._match_texts({"INDEX", "KEY"}) 2577 if self._match(TokenType.FOR): 2578 hint.set("target", self._advance_any() and self._prev.text.upper()) 2579 2580 hint.set("expressions", self._parse_wrapped_id_vars()) 2581 hints.append(hint) 2582 2583 return hints or None 2584 2585 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2586 return ( 2587 (not schema and self._parse_function(optional_parens=False)) 2588 or self._parse_id_var(any_token=False) 2589 or self._parse_string_as_identifier() 2590 or self._parse_placeholder() 2591 ) 2592 2593 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2594 catalog = None 2595 db = None 2596 table = self._parse_table_part(schema=schema) 2597 2598 while self._match(TokenType.DOT): 2599 if catalog: 2600 # This allows nesting the table in arbitrarily many dot expressions if needed 2601 table = self.expression( 2602 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2603 ) 2604 else: 2605 catalog = db 2606 db = table 2607 table = self._parse_table_part(schema=schema) 2608 2609 if not table: 2610 self.raise_error(f"Expected table name but got {self._curr}") 2611 2612 return self.expression( 2613 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2614 ) 2615 2616 def _parse_table( 2617 self, 2618 schema: bool = False, 2619 joins: bool = False, 2620 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2621 parse_bracket: bool = False, 2622 ) -> t.Optional[exp.Expression]: 2623 lateral = self._parse_lateral() 2624 if lateral: 2625 return lateral 2626 2627 unnest = self._parse_unnest() 2628 if unnest: 2629 return unnest 2630 2631 values = self._parse_derived_table_values() 2632 if values: 2633 return values 2634 2635 subquery = self._parse_select(table=True) 2636 if subquery: 2637 if not subquery.args.get("pivots"): 2638 subquery.set("pivots", self._parse_pivots()) 2639 return subquery 2640 2641 bracket = parse_bracket and self._parse_bracket(None) 2642 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2643 this = t.cast( 2644 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2645 ) 2646 2647 if schema: 2648 return self._parse_schema(this=this) 2649 2650 version = self._parse_version() 2651 2652 if version: 2653 this.set("version", version) 2654 2655 if self.ALIAS_POST_TABLESAMPLE: 2656 table_sample = self._parse_table_sample() 2657 2658 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2659 if alias: 2660 this.set("alias", alias) 2661 2662 if self._match_text_seq("AT"): 2663 this.set("index", self._parse_id_var()) 2664 2665 this.set("hints", self._parse_table_hints()) 2666 2667 if not this.args.get("pivots"): 2668 this.set("pivots", self._parse_pivots()) 2669 2670 if not self.ALIAS_POST_TABLESAMPLE: 2671 table_sample = self._parse_table_sample() 2672 2673 if table_sample: 2674 table_sample.set("this", this) 2675 this = table_sample 2676 2677 if joins: 2678 for join in iter(self._parse_join, None): 2679 this.append("joins", join) 2680 2681 return this 2682 2683 def _parse_version(self) -> t.Optional[exp.Version]: 2684 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2685 this = "TIMESTAMP" 2686 elif self._match(TokenType.VERSION_SNAPSHOT): 2687 this = "VERSION" 2688 else: 2689 return None 2690 2691 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2692 kind = self._prev.text.upper() 2693 start = self._parse_bitwise() 2694 self._match_texts(("TO", "AND")) 2695 end = self._parse_bitwise() 2696 expression: t.Optional[exp.Expression] = self.expression( 2697 exp.Tuple, expressions=[start, end] 2698 ) 2699 elif self._match_text_seq("CONTAINED", "IN"): 2700 kind = "CONTAINED IN" 2701 expression = self.expression( 2702 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2703 ) 2704 elif self._match(TokenType.ALL): 2705 kind = "ALL" 2706 expression = None 2707 else: 2708 self._match_text_seq("AS", "OF") 2709 kind = "AS OF" 2710 expression = self._parse_type() 2711 2712 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2713 2714 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2715 if not self._match(TokenType.UNNEST): 2716 return None 2717 2718 expressions = self._parse_wrapped_csv(self._parse_type) 2719 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2720 2721 alias = self._parse_table_alias() if with_alias else None 2722 2723 if alias: 2724 if self.UNNEST_COLUMN_ONLY: 2725 if alias.args.get("columns"): 2726 self.raise_error("Unexpected extra column alias in unnest.") 2727 2728 alias.set("columns", [alias.this]) 2729 alias.set("this", None) 2730 2731 columns = alias.args.get("columns") or [] 2732 if offset and len(expressions) < len(columns): 2733 offset = columns.pop() 2734 2735 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2736 self._match(TokenType.ALIAS) 2737 offset = self._parse_id_var( 2738 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2739 ) or exp.to_identifier("offset") 2740 2741 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2742 2743 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2744 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2745 if not is_derived and not self._match(TokenType.VALUES): 2746 return None 2747 2748 expressions = self._parse_csv(self._parse_value) 2749 alias = self._parse_table_alias() 2750 2751 if is_derived: 2752 self._match_r_paren() 2753 2754 return self.expression( 2755 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2756 ) 2757 2758 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2759 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2760 as_modifier and self._match_text_seq("USING", "SAMPLE") 2761 ): 2762 return None 2763 2764 bucket_numerator = None 2765 bucket_denominator = None 2766 bucket_field = None 2767 percent = None 2768 rows = None 2769 size = None 2770 seed = None 2771 2772 kind = ( 2773 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2774 ) 2775 method = self._parse_var(tokens=(TokenType.ROW,)) 2776 2777 matched_l_paren = self._match(TokenType.L_PAREN) 2778 2779 if self.TABLESAMPLE_CSV: 2780 num = None 2781 expressions = self._parse_csv(self._parse_primary) 2782 else: 2783 expressions = None 2784 num = ( 2785 self._parse_factor() 2786 if self._match(TokenType.NUMBER, advance=False) 2787 else self._parse_primary() 2788 ) 2789 2790 if self._match_text_seq("BUCKET"): 2791 bucket_numerator = self._parse_number() 2792 self._match_text_seq("OUT", "OF") 2793 bucket_denominator = bucket_denominator = self._parse_number() 2794 self._match(TokenType.ON) 2795 bucket_field = self._parse_field() 2796 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2797 percent = num 2798 elif self._match(TokenType.ROWS): 2799 rows = num 2800 elif num: 2801 size = num 2802 2803 if matched_l_paren: 2804 self._match_r_paren() 2805 2806 if self._match(TokenType.L_PAREN): 2807 method = self._parse_var() 2808 seed = self._match(TokenType.COMMA) and self._parse_number() 2809 self._match_r_paren() 2810 elif self._match_texts(("SEED", "REPEATABLE")): 2811 seed = self._parse_wrapped(self._parse_number) 2812 2813 return self.expression( 2814 exp.TableSample, 2815 expressions=expressions, 2816 method=method, 2817 bucket_numerator=bucket_numerator, 2818 bucket_denominator=bucket_denominator, 2819 bucket_field=bucket_field, 2820 percent=percent, 2821 rows=rows, 2822 size=size, 2823 seed=seed, 2824 kind=kind, 2825 ) 2826 2827 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2828 return list(iter(self._parse_pivot, None)) or None 2829 2830 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2831 return list(iter(self._parse_join, None)) or None 2832 2833 # https://duckdb.org/docs/sql/statements/pivot 2834 def _parse_simplified_pivot(self) -> exp.Pivot: 2835 def _parse_on() -> t.Optional[exp.Expression]: 2836 this = self._parse_bitwise() 2837 return self._parse_in(this) if self._match(TokenType.IN) else this 2838 2839 this = self._parse_table() 2840 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2841 using = self._match(TokenType.USING) and self._parse_csv( 2842 lambda: self._parse_alias(self._parse_function()) 2843 ) 2844 group = self._parse_group() 2845 return self.expression( 2846 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2847 ) 2848 2849 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2850 index = self._index 2851 include_nulls = None 2852 2853 if self._match(TokenType.PIVOT): 2854 unpivot = False 2855 elif self._match(TokenType.UNPIVOT): 2856 unpivot = True 2857 2858 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2859 if self._match_text_seq("INCLUDE", "NULLS"): 2860 include_nulls = True 2861 elif self._match_text_seq("EXCLUDE", "NULLS"): 2862 include_nulls = False 2863 else: 2864 return None 2865 2866 expressions = [] 2867 field = None 2868 2869 if not self._match(TokenType.L_PAREN): 2870 self._retreat(index) 2871 return None 2872 2873 if unpivot: 2874 expressions = self._parse_csv(self._parse_column) 2875 else: 2876 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2877 2878 if not expressions: 2879 self.raise_error("Failed to parse PIVOT's aggregation list") 2880 2881 if not self._match(TokenType.FOR): 2882 self.raise_error("Expecting FOR") 2883 2884 value = self._parse_column() 2885 2886 if not self._match(TokenType.IN): 2887 self.raise_error("Expecting IN") 2888 2889 field = self._parse_in(value, alias=True) 2890 2891 self._match_r_paren() 2892 2893 pivot = self.expression( 2894 exp.Pivot, 2895 expressions=expressions, 2896 field=field, 2897 unpivot=unpivot, 2898 include_nulls=include_nulls, 2899 ) 2900 2901 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2902 pivot.set("alias", self._parse_table_alias()) 2903 2904 if not unpivot: 2905 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2906 2907 columns: t.List[exp.Expression] = [] 2908 for fld in pivot.args["field"].expressions: 2909 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2910 for name in names: 2911 if self.PREFIXED_PIVOT_COLUMNS: 2912 name = f"{name}_{field_name}" if name else field_name 2913 else: 2914 name = f"{field_name}_{name}" if name else field_name 2915 2916 columns.append(exp.to_identifier(name)) 2917 2918 pivot.set("columns", columns) 2919 2920 return pivot 2921 2922 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2923 return [agg.alias for agg in aggregations] 2924 2925 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2926 if not skip_where_token and not self._match(TokenType.WHERE): 2927 return None 2928 2929 return self.expression( 2930 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2931 ) 2932 2933 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2934 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2935 return None 2936 2937 elements = defaultdict(list) 2938 2939 if self._match(TokenType.ALL): 2940 return self.expression(exp.Group, all=True) 2941 2942 while True: 2943 expressions = self._parse_csv(self._parse_conjunction) 2944 if expressions: 2945 elements["expressions"].extend(expressions) 2946 2947 grouping_sets = self._parse_grouping_sets() 2948 if grouping_sets: 2949 elements["grouping_sets"].extend(grouping_sets) 2950 2951 rollup = None 2952 cube = None 2953 totals = None 2954 2955 with_ = self._match(TokenType.WITH) 2956 if self._match(TokenType.ROLLUP): 2957 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2958 elements["rollup"].extend(ensure_list(rollup)) 2959 2960 if self._match(TokenType.CUBE): 2961 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2962 elements["cube"].extend(ensure_list(cube)) 2963 2964 if self._match_text_seq("TOTALS"): 2965 totals = True 2966 elements["totals"] = True # type: ignore 2967 2968 if not (grouping_sets or rollup or cube or totals): 2969 break 2970 2971 return self.expression(exp.Group, **elements) # type: ignore 2972 2973 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2974 if not self._match(TokenType.GROUPING_SETS): 2975 return None 2976 2977 return self._parse_wrapped_csv(self._parse_grouping_set) 2978 2979 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2980 if self._match(TokenType.L_PAREN): 2981 grouping_set = self._parse_csv(self._parse_column) 2982 self._match_r_paren() 2983 return self.expression(exp.Tuple, expressions=grouping_set) 2984 2985 return self._parse_column() 2986 2987 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2988 if not skip_having_token and not self._match(TokenType.HAVING): 2989 return None 2990 return self.expression(exp.Having, this=self._parse_conjunction()) 2991 2992 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2993 if not self._match(TokenType.QUALIFY): 2994 return None 2995 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2996 2997 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2998 if skip_start_token: 2999 start = None 3000 elif self._match(TokenType.START_WITH): 3001 start = self._parse_conjunction() 3002 else: 3003 return None 3004 3005 self._match(TokenType.CONNECT_BY) 3006 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3007 exp.Prior, this=self._parse_bitwise() 3008 ) 3009 connect = self._parse_conjunction() 3010 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3011 3012 if not start and self._match(TokenType.START_WITH): 3013 start = self._parse_conjunction() 3014 3015 return self.expression(exp.Connect, start=start, connect=connect) 3016 3017 def _parse_order( 3018 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3019 ) -> t.Optional[exp.Expression]: 3020 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3021 return this 3022 3023 return self.expression( 3024 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3025 ) 3026 3027 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3028 if not self._match(token): 3029 return None 3030 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3031 3032 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3033 this = parse_method() if parse_method else self._parse_conjunction() 3034 3035 asc = self._match(TokenType.ASC) 3036 desc = self._match(TokenType.DESC) or (asc and False) 3037 3038 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3039 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3040 3041 nulls_first = is_nulls_first or False 3042 explicitly_null_ordered = is_nulls_first or is_nulls_last 3043 3044 if ( 3045 not explicitly_null_ordered 3046 and ( 3047 (not desc and self.NULL_ORDERING == "nulls_are_small") 3048 or (desc and self.NULL_ORDERING != "nulls_are_small") 3049 ) 3050 and self.NULL_ORDERING != "nulls_are_last" 3051 ): 3052 nulls_first = True 3053 3054 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3055 3056 def _parse_limit( 3057 self, this: t.Optional[exp.Expression] = None, top: bool = False 3058 ) -> t.Optional[exp.Expression]: 3059 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3060 comments = self._prev_comments 3061 if top: 3062 limit_paren = self._match(TokenType.L_PAREN) 3063 expression = self._parse_number() 3064 3065 if limit_paren: 3066 self._match_r_paren() 3067 else: 3068 expression = self._parse_term() 3069 3070 if self._match(TokenType.COMMA): 3071 offset = expression 3072 expression = self._parse_term() 3073 else: 3074 offset = None 3075 3076 limit_exp = self.expression( 3077 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3078 ) 3079 3080 return limit_exp 3081 3082 if self._match(TokenType.FETCH): 3083 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3084 direction = self._prev.text if direction else "FIRST" 3085 3086 count = self._parse_field(tokens=self.FETCH_TOKENS) 3087 percent = self._match(TokenType.PERCENT) 3088 3089 self._match_set((TokenType.ROW, TokenType.ROWS)) 3090 3091 only = self._match_text_seq("ONLY") 3092 with_ties = self._match_text_seq("WITH", "TIES") 3093 3094 if only and with_ties: 3095 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3096 3097 return self.expression( 3098 exp.Fetch, 3099 direction=direction, 3100 count=count, 3101 percent=percent, 3102 with_ties=with_ties, 3103 ) 3104 3105 return this 3106 3107 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3108 if not self._match(TokenType.OFFSET): 3109 return this 3110 3111 count = self._parse_term() 3112 self._match_set((TokenType.ROW, TokenType.ROWS)) 3113 return self.expression(exp.Offset, this=this, expression=count) 3114 3115 def _parse_locks(self) -> t.List[exp.Lock]: 3116 locks = [] 3117 while True: 3118 if self._match_text_seq("FOR", "UPDATE"): 3119 update = True 3120 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3121 "LOCK", "IN", "SHARE", "MODE" 3122 ): 3123 update = False 3124 else: 3125 break 3126 3127 expressions = None 3128 if self._match_text_seq("OF"): 3129 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3130 3131 wait: t.Optional[bool | exp.Expression] = None 3132 if self._match_text_seq("NOWAIT"): 3133 wait = True 3134 elif self._match_text_seq("WAIT"): 3135 wait = self._parse_primary() 3136 elif self._match_text_seq("SKIP", "LOCKED"): 3137 wait = False 3138 3139 locks.append( 3140 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3141 ) 3142 3143 return locks 3144 3145 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3146 if not self._match_set(self.SET_OPERATIONS): 3147 return this 3148 3149 token_type = self._prev.token_type 3150 3151 if token_type == TokenType.UNION: 3152 expression = exp.Union 3153 elif token_type == TokenType.EXCEPT: 3154 expression = exp.Except 3155 else: 3156 expression = exp.Intersect 3157 3158 return self.expression( 3159 expression, 3160 this=this, 3161 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3162 by_name=self._match_text_seq("BY", "NAME"), 3163 expression=self._parse_set_operations(self._parse_select(nested=True)), 3164 ) 3165 3166 def _parse_expression(self) -> t.Optional[exp.Expression]: 3167 return self._parse_alias(self._parse_conjunction()) 3168 3169 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3170 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3171 3172 def _parse_equality(self) -> t.Optional[exp.Expression]: 3173 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3174 3175 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3176 return self._parse_tokens(self._parse_range, self.COMPARISON) 3177 3178 def _parse_range(self) -> t.Optional[exp.Expression]: 3179 this = self._parse_bitwise() 3180 negate = self._match(TokenType.NOT) 3181 3182 if self._match_set(self.RANGE_PARSERS): 3183 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3184 if not expression: 3185 return this 3186 3187 this = expression 3188 elif self._match(TokenType.ISNULL): 3189 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3190 3191 # Postgres supports ISNULL and NOTNULL for conditions. 3192 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3193 if self._match(TokenType.NOTNULL): 3194 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3195 this = self.expression(exp.Not, this=this) 3196 3197 if negate: 3198 this = self.expression(exp.Not, this=this) 3199 3200 if self._match(TokenType.IS): 3201 this = self._parse_is(this) 3202 3203 return this 3204 3205 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3206 index = self._index - 1 3207 negate = self._match(TokenType.NOT) 3208 3209 if self._match_text_seq("DISTINCT", "FROM"): 3210 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3211 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3212 3213 expression = self._parse_null() or self._parse_boolean() 3214 if not expression: 3215 self._retreat(index) 3216 return None 3217 3218 this = self.expression(exp.Is, this=this, expression=expression) 3219 return self.expression(exp.Not, this=this) if negate else this 3220 3221 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3222 unnest = self._parse_unnest(with_alias=False) 3223 if unnest: 3224 this = self.expression(exp.In, this=this, unnest=unnest) 3225 elif self._match(TokenType.L_PAREN): 3226 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3227 3228 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3229 this = self.expression(exp.In, this=this, query=expressions[0]) 3230 else: 3231 this = self.expression(exp.In, this=this, expressions=expressions) 3232 3233 self._match_r_paren(this) 3234 else: 3235 this = self.expression(exp.In, this=this, field=self._parse_field()) 3236 3237 return this 3238 3239 def _parse_between(self, this: exp.Expression) -> exp.Between: 3240 low = self._parse_bitwise() 3241 self._match(TokenType.AND) 3242 high = self._parse_bitwise() 3243 return self.expression(exp.Between, this=this, low=low, high=high) 3244 3245 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3246 if not self._match(TokenType.ESCAPE): 3247 return this 3248 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3249 3250 def _parse_interval(self) -> t.Optional[exp.Interval]: 3251 index = self._index 3252 3253 if not self._match(TokenType.INTERVAL): 3254 return None 3255 3256 if self._match(TokenType.STRING, advance=False): 3257 this = self._parse_primary() 3258 else: 3259 this = self._parse_term() 3260 3261 if not this: 3262 self._retreat(index) 3263 return None 3264 3265 unit = self._parse_function() or self._parse_var(any_token=True) 3266 3267 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3268 # each INTERVAL expression into this canonical form so it's easy to transpile 3269 if this and this.is_number: 3270 this = exp.Literal.string(this.name) 3271 elif this and this.is_string: 3272 parts = this.name.split() 3273 3274 if len(parts) == 2: 3275 if unit: 3276 # This is not actually a unit, it's something else (e.g. a "window side") 3277 unit = None 3278 self._retreat(self._index - 1) 3279 3280 this = exp.Literal.string(parts[0]) 3281 unit = self.expression(exp.Var, this=parts[1]) 3282 3283 return self.expression(exp.Interval, this=this, unit=unit) 3284 3285 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3286 this = self._parse_term() 3287 3288 while True: 3289 if self._match_set(self.BITWISE): 3290 this = self.expression( 3291 self.BITWISE[self._prev.token_type], 3292 this=this, 3293 expression=self._parse_term(), 3294 ) 3295 elif self._match(TokenType.DQMARK): 3296 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3297 elif self._match_pair(TokenType.LT, TokenType.LT): 3298 this = self.expression( 3299 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3300 ) 3301 elif self._match_pair(TokenType.GT, TokenType.GT): 3302 this = self.expression( 3303 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3304 ) 3305 else: 3306 break 3307 3308 return this 3309 3310 def _parse_term(self) -> t.Optional[exp.Expression]: 3311 return self._parse_tokens(self._parse_factor, self.TERM) 3312 3313 def _parse_factor(self) -> t.Optional[exp.Expression]: 3314 return self._parse_tokens(self._parse_unary, self.FACTOR) 3315 3316 def _parse_unary(self) -> t.Optional[exp.Expression]: 3317 if self._match_set(self.UNARY_PARSERS): 3318 return self.UNARY_PARSERS[self._prev.token_type](self) 3319 return self._parse_at_time_zone(self._parse_type()) 3320 3321 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3322 interval = parse_interval and self._parse_interval() 3323 if interval: 3324 return interval 3325 3326 index = self._index 3327 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3328 this = self._parse_column() 3329 3330 if data_type: 3331 if isinstance(this, exp.Literal): 3332 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3333 if parser: 3334 return parser(self, this, data_type) 3335 return self.expression(exp.Cast, this=this, to=data_type) 3336 if not data_type.expressions: 3337 self._retreat(index) 3338 return self._parse_column() 3339 return self._parse_column_ops(data_type) 3340 3341 return this and self._parse_column_ops(this) 3342 3343 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3344 this = self._parse_type() 3345 if not this: 3346 return None 3347 3348 return self.expression( 3349 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3350 ) 3351 3352 def _parse_types( 3353 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3354 ) -> t.Optional[exp.Expression]: 3355 index = self._index 3356 3357 prefix = self._match_text_seq("SYSUDTLIB", ".") 3358 3359 if not self._match_set(self.TYPE_TOKENS): 3360 identifier = allow_identifiers and self._parse_id_var( 3361 any_token=False, tokens=(TokenType.VAR,) 3362 ) 3363 3364 if identifier: 3365 tokens = self._tokenizer.tokenize(identifier.name) 3366 3367 if len(tokens) != 1: 3368 self.raise_error("Unexpected identifier", self._prev) 3369 3370 if tokens[0].token_type in self.TYPE_TOKENS: 3371 self._prev = tokens[0] 3372 elif self.SUPPORTS_USER_DEFINED_TYPES: 3373 type_name = identifier.name 3374 3375 while self._match(TokenType.DOT): 3376 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3377 3378 return exp.DataType.build(type_name, udt=True) 3379 else: 3380 return None 3381 else: 3382 return None 3383 3384 type_token = self._prev.token_type 3385 3386 if type_token == TokenType.PSEUDO_TYPE: 3387 return self.expression(exp.PseudoType, this=self._prev.text) 3388 3389 if type_token == TokenType.OBJECT_IDENTIFIER: 3390 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3391 3392 nested = type_token in self.NESTED_TYPE_TOKENS 3393 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3394 expressions = None 3395 maybe_func = False 3396 3397 if self._match(TokenType.L_PAREN): 3398 if is_struct: 3399 expressions = self._parse_csv(self._parse_struct_types) 3400 elif nested: 3401 expressions = self._parse_csv( 3402 lambda: self._parse_types( 3403 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3404 ) 3405 ) 3406 elif type_token in self.ENUM_TYPE_TOKENS: 3407 expressions = self._parse_csv(self._parse_equality) 3408 else: 3409 expressions = self._parse_csv(self._parse_type_size) 3410 3411 if not expressions or not self._match(TokenType.R_PAREN): 3412 self._retreat(index) 3413 return None 3414 3415 maybe_func = True 3416 3417 this: t.Optional[exp.Expression] = None 3418 values: t.Optional[t.List[exp.Expression]] = None 3419 3420 if nested and self._match(TokenType.LT): 3421 if is_struct: 3422 expressions = self._parse_csv(self._parse_struct_types) 3423 else: 3424 expressions = self._parse_csv( 3425 lambda: self._parse_types( 3426 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3427 ) 3428 ) 3429 3430 if not self._match(TokenType.GT): 3431 self.raise_error("Expecting >") 3432 3433 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3434 values = self._parse_csv(self._parse_conjunction) 3435 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3436 3437 if type_token in self.TIMESTAMPS: 3438 if self._match_text_seq("WITH", "TIME", "ZONE"): 3439 maybe_func = False 3440 tz_type = ( 3441 exp.DataType.Type.TIMETZ 3442 if type_token in self.TIMES 3443 else exp.DataType.Type.TIMESTAMPTZ 3444 ) 3445 this = exp.DataType(this=tz_type, expressions=expressions) 3446 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3447 maybe_func = False 3448 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3449 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3450 maybe_func = False 3451 elif type_token == TokenType.INTERVAL: 3452 unit = self._parse_var() 3453 3454 if self._match_text_seq("TO"): 3455 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3456 else: 3457 span = None 3458 3459 if span or not unit: 3460 this = self.expression( 3461 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3462 ) 3463 else: 3464 this = self.expression(exp.Interval, unit=unit) 3465 3466 if maybe_func and check_func: 3467 index2 = self._index 3468 peek = self._parse_string() 3469 3470 if not peek: 3471 self._retreat(index) 3472 return None 3473 3474 self._retreat(index2) 3475 3476 if not this: 3477 if self._match_text_seq("UNSIGNED"): 3478 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3479 if not unsigned_type_token: 3480 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3481 3482 type_token = unsigned_type_token or type_token 3483 3484 this = exp.DataType( 3485 this=exp.DataType.Type[type_token.value], 3486 expressions=expressions, 3487 nested=nested, 3488 values=values, 3489 prefix=prefix, 3490 ) 3491 3492 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3493 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3494 3495 return this 3496 3497 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3498 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3499 self._match(TokenType.COLON) 3500 return self._parse_column_def(this) 3501 3502 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3503 if not self._match_text_seq("AT", "TIME", "ZONE"): 3504 return this 3505 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3506 3507 def _parse_column(self) -> t.Optional[exp.Expression]: 3508 this = self._parse_field() 3509 if isinstance(this, exp.Identifier): 3510 this = self.expression(exp.Column, this=this) 3511 elif not this: 3512 return self._parse_bracket(this) 3513 return self._parse_column_ops(this) 3514 3515 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3516 this = self._parse_bracket(this) 3517 3518 while self._match_set(self.COLUMN_OPERATORS): 3519 op_token = self._prev.token_type 3520 op = self.COLUMN_OPERATORS.get(op_token) 3521 3522 if op_token == TokenType.DCOLON: 3523 field = self._parse_types() 3524 if not field: 3525 self.raise_error("Expected type") 3526 elif op and self._curr: 3527 self._advance() 3528 value = self._prev.text 3529 field = ( 3530 exp.Literal.number(value) 3531 if self._prev.token_type == TokenType.NUMBER 3532 else exp.Literal.string(value) 3533 ) 3534 else: 3535 field = self._parse_field(anonymous_func=True, any_token=True) 3536 3537 if isinstance(field, exp.Func): 3538 # bigquery allows function calls like x.y.count(...) 3539 # SAFE.SUBSTR(...) 3540 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3541 this = self._replace_columns_with_dots(this) 3542 3543 if op: 3544 this = op(self, this, field) 3545 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3546 this = self.expression( 3547 exp.Column, 3548 this=field, 3549 table=this.this, 3550 db=this.args.get("table"), 3551 catalog=this.args.get("db"), 3552 ) 3553 else: 3554 this = self.expression(exp.Dot, this=this, expression=field) 3555 this = self._parse_bracket(this) 3556 return this 3557 3558 def _parse_primary(self) -> t.Optional[exp.Expression]: 3559 if self._match_set(self.PRIMARY_PARSERS): 3560 token_type = self._prev.token_type 3561 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3562 3563 if token_type == TokenType.STRING: 3564 expressions = [primary] 3565 while self._match(TokenType.STRING): 3566 expressions.append(exp.Literal.string(self._prev.text)) 3567 3568 if len(expressions) > 1: 3569 return self.expression(exp.Concat, expressions=expressions) 3570 3571 return primary 3572 3573 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3574 return exp.Literal.number(f"0.{self._prev.text}") 3575 3576 if self._match(TokenType.L_PAREN): 3577 comments = self._prev_comments 3578 query = self._parse_select() 3579 3580 if query: 3581 expressions = [query] 3582 else: 3583 expressions = self._parse_expressions() 3584 3585 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3586 3587 if isinstance(this, exp.Subqueryable): 3588 this = self._parse_set_operations( 3589 self._parse_subquery(this=this, parse_alias=False) 3590 ) 3591 elif len(expressions) > 1: 3592 this = self.expression(exp.Tuple, expressions=expressions) 3593 else: 3594 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3595 3596 if this: 3597 this.add_comments(comments) 3598 3599 self._match_r_paren(expression=this) 3600 return this 3601 3602 return None 3603 3604 def _parse_field( 3605 self, 3606 any_token: bool = False, 3607 tokens: t.Optional[t.Collection[TokenType]] = None, 3608 anonymous_func: bool = False, 3609 ) -> t.Optional[exp.Expression]: 3610 return ( 3611 self._parse_primary() 3612 or self._parse_function(anonymous=anonymous_func) 3613 or self._parse_id_var(any_token=any_token, tokens=tokens) 3614 ) 3615 3616 def _parse_function( 3617 self, 3618 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3619 anonymous: bool = False, 3620 optional_parens: bool = True, 3621 ) -> t.Optional[exp.Expression]: 3622 if not self._curr: 3623 return None 3624 3625 token_type = self._curr.token_type 3626 this = self._curr.text 3627 upper = this.upper() 3628 3629 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3630 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3631 self._advance() 3632 return parser(self) 3633 3634 if not self._next or self._next.token_type != TokenType.L_PAREN: 3635 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3636 self._advance() 3637 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3638 3639 return None 3640 3641 if token_type not in self.FUNC_TOKENS: 3642 return None 3643 3644 self._advance(2) 3645 3646 parser = self.FUNCTION_PARSERS.get(upper) 3647 if parser and not anonymous: 3648 this = parser(self) 3649 else: 3650 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3651 3652 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3653 this = self.expression(subquery_predicate, this=self._parse_select()) 3654 self._match_r_paren() 3655 return this 3656 3657 if functions is None: 3658 functions = self.FUNCTIONS 3659 3660 function = functions.get(upper) 3661 3662 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3663 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3664 3665 if function and not anonymous: 3666 func = self.validate_expression(function(args), args) 3667 if not self.NORMALIZE_FUNCTIONS: 3668 func.meta["name"] = this 3669 this = func 3670 else: 3671 this = self.expression(exp.Anonymous, this=this, expressions=args) 3672 3673 self._match_r_paren(this) 3674 return self._parse_window(this) 3675 3676 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3677 return self._parse_column_def(self._parse_id_var()) 3678 3679 def _parse_user_defined_function( 3680 self, kind: t.Optional[TokenType] = None 3681 ) -> t.Optional[exp.Expression]: 3682 this = self._parse_id_var() 3683 3684 while self._match(TokenType.DOT): 3685 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3686 3687 if not self._match(TokenType.L_PAREN): 3688 return this 3689 3690 expressions = self._parse_csv(self._parse_function_parameter) 3691 self._match_r_paren() 3692 return self.expression( 3693 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3694 ) 3695 3696 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3697 literal = self._parse_primary() 3698 if literal: 3699 return self.expression(exp.Introducer, this=token.text, expression=literal) 3700 3701 return self.expression(exp.Identifier, this=token.text) 3702 3703 def _parse_session_parameter(self) -> exp.SessionParameter: 3704 kind = None 3705 this = self._parse_id_var() or self._parse_primary() 3706 3707 if this and self._match(TokenType.DOT): 3708 kind = this.name 3709 this = self._parse_var() or self._parse_primary() 3710 3711 return self.expression(exp.SessionParameter, this=this, kind=kind) 3712 3713 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3714 index = self._index 3715 3716 if self._match(TokenType.L_PAREN): 3717 expressions = t.cast( 3718 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3719 ) 3720 3721 if not self._match(TokenType.R_PAREN): 3722 self._retreat(index) 3723 else: 3724 expressions = [self._parse_id_var()] 3725 3726 if self._match_set(self.LAMBDAS): 3727 return self.LAMBDAS[self._prev.token_type](self, expressions) 3728 3729 self._retreat(index) 3730 3731 this: t.Optional[exp.Expression] 3732 3733 if self._match(TokenType.DISTINCT): 3734 this = self.expression( 3735 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3736 ) 3737 else: 3738 this = self._parse_select_or_expression(alias=alias) 3739 3740 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3741 3742 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3743 index = self._index 3744 3745 if not self.errors: 3746 try: 3747 if self._parse_select(nested=True): 3748 return this 3749 except ParseError: 3750 pass 3751 finally: 3752 self.errors.clear() 3753 self._retreat(index) 3754 3755 if not self._match(TokenType.L_PAREN): 3756 return this 3757 3758 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3759 3760 self._match_r_paren() 3761 return self.expression(exp.Schema, this=this, expressions=args) 3762 3763 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3764 return self._parse_column_def(self._parse_field(any_token=True)) 3765 3766 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3767 # column defs are not really columns, they're identifiers 3768 if isinstance(this, exp.Column): 3769 this = this.this 3770 3771 kind = self._parse_types(schema=True) 3772 3773 if self._match_text_seq("FOR", "ORDINALITY"): 3774 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3775 3776 constraints: t.List[exp.Expression] = [] 3777 3778 if not kind and self._match(TokenType.ALIAS): 3779 constraints.append( 3780 self.expression( 3781 exp.ComputedColumnConstraint, 3782 this=self._parse_conjunction(), 3783 persisted=self._match_text_seq("PERSISTED"), 3784 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3785 ) 3786 ) 3787 3788 while True: 3789 constraint = self._parse_column_constraint() 3790 if not constraint: 3791 break 3792 constraints.append(constraint) 3793 3794 if not kind and not constraints: 3795 return this 3796 3797 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3798 3799 def _parse_auto_increment( 3800 self, 3801 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3802 start = None 3803 increment = None 3804 3805 if self._match(TokenType.L_PAREN, advance=False): 3806 args = self._parse_wrapped_csv(self._parse_bitwise) 3807 start = seq_get(args, 0) 3808 increment = seq_get(args, 1) 3809 elif self._match_text_seq("START"): 3810 start = self._parse_bitwise() 3811 self._match_text_seq("INCREMENT") 3812 increment = self._parse_bitwise() 3813 3814 if start and increment: 3815 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3816 3817 return exp.AutoIncrementColumnConstraint() 3818 3819 def _parse_compress(self) -> exp.CompressColumnConstraint: 3820 if self._match(TokenType.L_PAREN, advance=False): 3821 return self.expression( 3822 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3823 ) 3824 3825 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3826 3827 def _parse_generated_as_identity( 3828 self, 3829 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3830 if self._match_text_seq("BY", "DEFAULT"): 3831 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3832 this = self.expression( 3833 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3834 ) 3835 else: 3836 self._match_text_seq("ALWAYS") 3837 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3838 3839 self._match(TokenType.ALIAS) 3840 identity = self._match_text_seq("IDENTITY") 3841 3842 if self._match(TokenType.L_PAREN): 3843 if self._match(TokenType.START_WITH): 3844 this.set("start", self._parse_bitwise()) 3845 if self._match_text_seq("INCREMENT", "BY"): 3846 this.set("increment", self._parse_bitwise()) 3847 if self._match_text_seq("MINVALUE"): 3848 this.set("minvalue", self._parse_bitwise()) 3849 if self._match_text_seq("MAXVALUE"): 3850 this.set("maxvalue", self._parse_bitwise()) 3851 3852 if self._match_text_seq("CYCLE"): 3853 this.set("cycle", True) 3854 elif self._match_text_seq("NO", "CYCLE"): 3855 this.set("cycle", False) 3856 3857 if not identity: 3858 this.set("expression", self._parse_bitwise()) 3859 3860 self._match_r_paren() 3861 3862 return this 3863 3864 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3865 self._match_text_seq("LENGTH") 3866 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3867 3868 def _parse_not_constraint( 3869 self, 3870 ) -> t.Optional[exp.Expression]: 3871 if self._match_text_seq("NULL"): 3872 return self.expression(exp.NotNullColumnConstraint) 3873 if self._match_text_seq("CASESPECIFIC"): 3874 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3875 if self._match_text_seq("FOR", "REPLICATION"): 3876 return self.expression(exp.NotForReplicationColumnConstraint) 3877 return None 3878 3879 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3880 if self._match(TokenType.CONSTRAINT): 3881 this = self._parse_id_var() 3882 else: 3883 this = None 3884 3885 if self._match_texts(self.CONSTRAINT_PARSERS): 3886 return self.expression( 3887 exp.ColumnConstraint, 3888 this=this, 3889 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3890 ) 3891 3892 return this 3893 3894 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3895 if not self._match(TokenType.CONSTRAINT): 3896 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3897 3898 this = self._parse_id_var() 3899 expressions = [] 3900 3901 while True: 3902 constraint = self._parse_unnamed_constraint() or self._parse_function() 3903 if not constraint: 3904 break 3905 expressions.append(constraint) 3906 3907 return self.expression(exp.Constraint, this=this, expressions=expressions) 3908 3909 def _parse_unnamed_constraint( 3910 self, constraints: t.Optional[t.Collection[str]] = None 3911 ) -> t.Optional[exp.Expression]: 3912 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3913 constraints or self.CONSTRAINT_PARSERS 3914 ): 3915 return None 3916 3917 constraint = self._prev.text.upper() 3918 if constraint not in self.CONSTRAINT_PARSERS: 3919 self.raise_error(f"No parser found for schema constraint {constraint}.") 3920 3921 return self.CONSTRAINT_PARSERS[constraint](self) 3922 3923 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3924 self._match_text_seq("KEY") 3925 return self.expression( 3926 exp.UniqueColumnConstraint, 3927 this=self._parse_schema(self._parse_id_var(any_token=False)), 3928 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3929 ) 3930 3931 def _parse_key_constraint_options(self) -> t.List[str]: 3932 options = [] 3933 while True: 3934 if not self._curr: 3935 break 3936 3937 if self._match(TokenType.ON): 3938 action = None 3939 on = self._advance_any() and self._prev.text 3940 3941 if self._match_text_seq("NO", "ACTION"): 3942 action = "NO ACTION" 3943 elif self._match_text_seq("CASCADE"): 3944 action = "CASCADE" 3945 elif self._match_text_seq("RESTRICT"): 3946 action = "RESTRICT" 3947 elif self._match_pair(TokenType.SET, TokenType.NULL): 3948 action = "SET NULL" 3949 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3950 action = "SET DEFAULT" 3951 else: 3952 self.raise_error("Invalid key constraint") 3953 3954 options.append(f"ON {on} {action}") 3955 elif self._match_text_seq("NOT", "ENFORCED"): 3956 options.append("NOT ENFORCED") 3957 elif self._match_text_seq("DEFERRABLE"): 3958 options.append("DEFERRABLE") 3959 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3960 options.append("INITIALLY DEFERRED") 3961 elif self._match_text_seq("NORELY"): 3962 options.append("NORELY") 3963 elif self._match_text_seq("MATCH", "FULL"): 3964 options.append("MATCH FULL") 3965 else: 3966 break 3967 3968 return options 3969 3970 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3971 if match and not self._match(TokenType.REFERENCES): 3972 return None 3973 3974 expressions = None 3975 this = self._parse_table(schema=True) 3976 options = self._parse_key_constraint_options() 3977 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3978 3979 def _parse_foreign_key(self) -> exp.ForeignKey: 3980 expressions = self._parse_wrapped_id_vars() 3981 reference = self._parse_references() 3982 options = {} 3983 3984 while self._match(TokenType.ON): 3985 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3986 self.raise_error("Expected DELETE or UPDATE") 3987 3988 kind = self._prev.text.lower() 3989 3990 if self._match_text_seq("NO", "ACTION"): 3991 action = "NO ACTION" 3992 elif self._match(TokenType.SET): 3993 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3994 action = "SET " + self._prev.text.upper() 3995 else: 3996 self._advance() 3997 action = self._prev.text.upper() 3998 3999 options[kind] = action 4000 4001 return self.expression( 4002 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4003 ) 4004 4005 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4006 return self._parse_field() 4007 4008 def _parse_primary_key( 4009 self, wrapped_optional: bool = False, in_props: bool = False 4010 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4011 desc = ( 4012 self._match_set((TokenType.ASC, TokenType.DESC)) 4013 and self._prev.token_type == TokenType.DESC 4014 ) 4015 4016 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4017 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4018 4019 expressions = self._parse_wrapped_csv( 4020 self._parse_primary_key_part, optional=wrapped_optional 4021 ) 4022 options = self._parse_key_constraint_options() 4023 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4024 4025 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4026 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4027 return this 4028 4029 bracket_kind = self._prev.token_type 4030 4031 if self._match(TokenType.COLON): 4032 expressions: t.List[exp.Expression] = [ 4033 self.expression(exp.Slice, expression=self._parse_conjunction()) 4034 ] 4035 else: 4036 expressions = self._parse_csv( 4037 lambda: self._parse_slice( 4038 self._parse_alias(self._parse_conjunction(), explicit=True) 4039 ) 4040 ) 4041 4042 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4043 if bracket_kind == TokenType.L_BRACE: 4044 this = self.expression(exp.Struct, expressions=expressions) 4045 elif not this or this.name.upper() == "ARRAY": 4046 this = self.expression(exp.Array, expressions=expressions) 4047 else: 4048 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4049 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4050 4051 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4052 self.raise_error("Expected ]") 4053 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4054 self.raise_error("Expected }") 4055 4056 self._add_comments(this) 4057 return self._parse_bracket(this) 4058 4059 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4060 if self._match(TokenType.COLON): 4061 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4062 return this 4063 4064 def _parse_case(self) -> t.Optional[exp.Expression]: 4065 ifs = [] 4066 default = None 4067 4068 comments = self._prev_comments 4069 expression = self._parse_conjunction() 4070 4071 while self._match(TokenType.WHEN): 4072 this = self._parse_conjunction() 4073 self._match(TokenType.THEN) 4074 then = self._parse_conjunction() 4075 ifs.append(self.expression(exp.If, this=this, true=then)) 4076 4077 if self._match(TokenType.ELSE): 4078 default = self._parse_conjunction() 4079 4080 if not self._match(TokenType.END): 4081 self.raise_error("Expected END after CASE", self._prev) 4082 4083 return self._parse_window( 4084 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4085 ) 4086 4087 def _parse_if(self) -> t.Optional[exp.Expression]: 4088 if self._match(TokenType.L_PAREN): 4089 args = self._parse_csv(self._parse_conjunction) 4090 this = self.validate_expression(exp.If.from_arg_list(args), args) 4091 self._match_r_paren() 4092 else: 4093 index = self._index - 1 4094 condition = self._parse_conjunction() 4095 4096 if not condition: 4097 self._retreat(index) 4098 return None 4099 4100 self._match(TokenType.THEN) 4101 true = self._parse_conjunction() 4102 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4103 self._match(TokenType.END) 4104 this = self.expression(exp.If, this=condition, true=true, false=false) 4105 4106 return self._parse_window(this) 4107 4108 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4109 if not self._match_text_seq("VALUE", "FOR"): 4110 self._retreat(self._index - 1) 4111 return None 4112 4113 return self.expression( 4114 exp.NextValueFor, 4115 this=self._parse_column(), 4116 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4117 ) 4118 4119 def _parse_extract(self) -> exp.Extract: 4120 this = self._parse_function() or self._parse_var() or self._parse_type() 4121 4122 if self._match(TokenType.FROM): 4123 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4124 4125 if not self._match(TokenType.COMMA): 4126 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4127 4128 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4129 4130 def _parse_any_value(self) -> exp.AnyValue: 4131 this = self._parse_lambda() 4132 is_max = None 4133 having = None 4134 4135 if self._match(TokenType.HAVING): 4136 self._match_texts(("MAX", "MIN")) 4137 is_max = self._prev.text == "MAX" 4138 having = self._parse_column() 4139 4140 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4141 4142 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4143 this = self._parse_conjunction() 4144 4145 if not self._match(TokenType.ALIAS): 4146 if self._match(TokenType.COMMA): 4147 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4148 4149 self.raise_error("Expected AS after CAST") 4150 4151 fmt = None 4152 to = self._parse_types() 4153 4154 if not to: 4155 self.raise_error("Expected TYPE after CAST") 4156 elif isinstance(to, exp.Identifier): 4157 to = exp.DataType.build(to.name, udt=True) 4158 elif to.this == exp.DataType.Type.CHAR: 4159 if self._match(TokenType.CHARACTER_SET): 4160 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4161 elif self._match(TokenType.FORMAT): 4162 fmt_string = self._parse_string() 4163 fmt = self._parse_at_time_zone(fmt_string) 4164 4165 if to.this in exp.DataType.TEMPORAL_TYPES: 4166 this = self.expression( 4167 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4168 this=this, 4169 format=exp.Literal.string( 4170 format_time( 4171 fmt_string.this if fmt_string else "", 4172 self.FORMAT_MAPPING or self.TIME_MAPPING, 4173 self.FORMAT_TRIE or self.TIME_TRIE, 4174 ) 4175 ), 4176 ) 4177 4178 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4179 this.set("zone", fmt.args["zone"]) 4180 4181 return this 4182 4183 return self.expression( 4184 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4185 ) 4186 4187 def _parse_concat(self) -> t.Optional[exp.Expression]: 4188 args = self._parse_csv(self._parse_conjunction) 4189 if self.CONCAT_NULL_OUTPUTS_STRING: 4190 args = self._ensure_string_if_null(args) 4191 4192 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4193 # we find such a call we replace it with its argument. 4194 if len(args) == 1: 4195 return args[0] 4196 4197 return self.expression( 4198 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4199 ) 4200 4201 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4202 args = self._parse_csv(self._parse_conjunction) 4203 if len(args) < 2: 4204 return self.expression(exp.ConcatWs, expressions=args) 4205 delim, *values = args 4206 if self.CONCAT_NULL_OUTPUTS_STRING: 4207 values = self._ensure_string_if_null(values) 4208 4209 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4210 4211 def _parse_string_agg(self) -> exp.Expression: 4212 if self._match(TokenType.DISTINCT): 4213 args: t.List[t.Optional[exp.Expression]] = [ 4214 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4215 ] 4216 if self._match(TokenType.COMMA): 4217 args.extend(self._parse_csv(self._parse_conjunction)) 4218 else: 4219 args = self._parse_csv(self._parse_conjunction) # type: ignore 4220 4221 index = self._index 4222 if not self._match(TokenType.R_PAREN) and args: 4223 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4224 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4225 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4226 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4227 4228 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4229 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4230 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4231 if not self._match_text_seq("WITHIN", "GROUP"): 4232 self._retreat(index) 4233 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4234 4235 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4236 order = self._parse_order(this=seq_get(args, 0)) 4237 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4238 4239 def _parse_convert( 4240 self, strict: bool, safe: t.Optional[bool] = None 4241 ) -> t.Optional[exp.Expression]: 4242 this = self._parse_bitwise() 4243 4244 if self._match(TokenType.USING): 4245 to: t.Optional[exp.Expression] = self.expression( 4246 exp.CharacterSet, this=self._parse_var() 4247 ) 4248 elif self._match(TokenType.COMMA): 4249 to = self._parse_types() 4250 else: 4251 to = None 4252 4253 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4254 4255 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4256 """ 4257 There are generally two variants of the DECODE function: 4258 4259 - DECODE(bin, charset) 4260 - DECODE(expression, search, result [, search, result] ... [, default]) 4261 4262 The second variant will always be parsed into a CASE expression. Note that NULL 4263 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4264 instead of relying on pattern matching. 4265 """ 4266 args = self._parse_csv(self._parse_conjunction) 4267 4268 if len(args) < 3: 4269 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4270 4271 expression, *expressions = args 4272 if not expression: 4273 return None 4274 4275 ifs = [] 4276 for search, result in zip(expressions[::2], expressions[1::2]): 4277 if not search or not result: 4278 return None 4279 4280 if isinstance(search, exp.Literal): 4281 ifs.append( 4282 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4283 ) 4284 elif isinstance(search, exp.Null): 4285 ifs.append( 4286 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4287 ) 4288 else: 4289 cond = exp.or_( 4290 exp.EQ(this=expression.copy(), expression=search), 4291 exp.and_( 4292 exp.Is(this=expression.copy(), expression=exp.Null()), 4293 exp.Is(this=search.copy(), expression=exp.Null()), 4294 copy=False, 4295 ), 4296 copy=False, 4297 ) 4298 ifs.append(exp.If(this=cond, true=result)) 4299 4300 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4301 4302 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4303 self._match_text_seq("KEY") 4304 key = self._parse_column() 4305 self._match_set((TokenType.COLON, TokenType.COMMA)) 4306 self._match_text_seq("VALUE") 4307 value = self._parse_bitwise() 4308 4309 if not key and not value: 4310 return None 4311 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4312 4313 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4314 if not this or not self._match_text_seq("FORMAT", "JSON"): 4315 return this 4316 4317 return self.expression(exp.FormatJson, this=this) 4318 4319 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4320 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4321 for value in values: 4322 if self._match_text_seq(value, "ON", on): 4323 return f"{value} ON {on}" 4324 4325 return None 4326 4327 def _parse_json_object(self) -> exp.JSONObject: 4328 star = self._parse_star() 4329 expressions = ( 4330 [star] 4331 if star 4332 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4333 ) 4334 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4335 4336 unique_keys = None 4337 if self._match_text_seq("WITH", "UNIQUE"): 4338 unique_keys = True 4339 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4340 unique_keys = False 4341 4342 self._match_text_seq("KEYS") 4343 4344 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4345 self._parse_type() 4346 ) 4347 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4348 4349 return self.expression( 4350 exp.JSONObject, 4351 expressions=expressions, 4352 null_handling=null_handling, 4353 unique_keys=unique_keys, 4354 return_type=return_type, 4355 encoding=encoding, 4356 ) 4357 4358 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4359 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4360 if not self._match_text_seq("NESTED"): 4361 this = self._parse_id_var() 4362 kind = self._parse_types(allow_identifiers=False) 4363 nested = None 4364 else: 4365 this = None 4366 kind = None 4367 nested = True 4368 4369 path = self._match_text_seq("PATH") and self._parse_string() 4370 nested_schema = nested and self._parse_json_schema() 4371 4372 return self.expression( 4373 exp.JSONColumnDef, 4374 this=this, 4375 kind=kind, 4376 path=path, 4377 nested_schema=nested_schema, 4378 ) 4379 4380 def _parse_json_schema(self) -> exp.JSONSchema: 4381 self._match_text_seq("COLUMNS") 4382 return self.expression( 4383 exp.JSONSchema, 4384 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4385 ) 4386 4387 def _parse_json_table(self) -> exp.JSONTable: 4388 this = self._parse_format_json(self._parse_bitwise()) 4389 path = self._match(TokenType.COMMA) and self._parse_string() 4390 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4391 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4392 schema = self._parse_json_schema() 4393 4394 return exp.JSONTable( 4395 this=this, 4396 schema=schema, 4397 path=path, 4398 error_handling=error_handling, 4399 empty_handling=empty_handling, 4400 ) 4401 4402 def _parse_logarithm(self) -> exp.Func: 4403 # Default argument order is base, expression 4404 args = self._parse_csv(self._parse_range) 4405 4406 if len(args) > 1: 4407 if not self.LOG_BASE_FIRST: 4408 args.reverse() 4409 return exp.Log.from_arg_list(args) 4410 4411 return self.expression( 4412 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4413 ) 4414 4415 def _parse_match_against(self) -> exp.MatchAgainst: 4416 expressions = self._parse_csv(self._parse_column) 4417 4418 self._match_text_seq(")", "AGAINST", "(") 4419 4420 this = self._parse_string() 4421 4422 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4423 modifier = "IN NATURAL LANGUAGE MODE" 4424 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4425 modifier = f"{modifier} WITH QUERY EXPANSION" 4426 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4427 modifier = "IN BOOLEAN MODE" 4428 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4429 modifier = "WITH QUERY EXPANSION" 4430 else: 4431 modifier = None 4432 4433 return self.expression( 4434 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4435 ) 4436 4437 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4438 def _parse_open_json(self) -> exp.OpenJSON: 4439 this = self._parse_bitwise() 4440 path = self._match(TokenType.COMMA) and self._parse_string() 4441 4442 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4443 this = self._parse_field(any_token=True) 4444 kind = self._parse_types() 4445 path = self._parse_string() 4446 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4447 4448 return self.expression( 4449 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4450 ) 4451 4452 expressions = None 4453 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4454 self._match_l_paren() 4455 expressions = self._parse_csv(_parse_open_json_column_def) 4456 4457 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4458 4459 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4460 args = self._parse_csv(self._parse_bitwise) 4461 4462 if self._match(TokenType.IN): 4463 return self.expression( 4464 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4465 ) 4466 4467 if haystack_first: 4468 haystack = seq_get(args, 0) 4469 needle = seq_get(args, 1) 4470 else: 4471 needle = seq_get(args, 0) 4472 haystack = seq_get(args, 1) 4473 4474 return self.expression( 4475 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4476 ) 4477 4478 def _parse_predict(self) -> exp.Predict: 4479 self._match_text_seq("MODEL") 4480 this = self._parse_table() 4481 4482 self._match(TokenType.COMMA) 4483 self._match_text_seq("TABLE") 4484 4485 return self.expression( 4486 exp.Predict, 4487 this=this, 4488 expression=self._parse_table(), 4489 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4490 ) 4491 4492 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4493 args = self._parse_csv(self._parse_table) 4494 return exp.JoinHint(this=func_name.upper(), expressions=args) 4495 4496 def _parse_substring(self) -> exp.Substring: 4497 # Postgres supports the form: substring(string [from int] [for int]) 4498 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4499 4500 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4501 4502 if self._match(TokenType.FROM): 4503 args.append(self._parse_bitwise()) 4504 if self._match(TokenType.FOR): 4505 args.append(self._parse_bitwise()) 4506 4507 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4508 4509 def _parse_trim(self) -> exp.Trim: 4510 # https://www.w3resource.com/sql/character-functions/trim.php 4511 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4512 4513 position = None 4514 collation = None 4515 expression = None 4516 4517 if self._match_texts(self.TRIM_TYPES): 4518 position = self._prev.text.upper() 4519 4520 this = self._parse_bitwise() 4521 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4522 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4523 expression = self._parse_bitwise() 4524 4525 if invert_order: 4526 this, expression = expression, this 4527 4528 if self._match(TokenType.COLLATE): 4529 collation = self._parse_bitwise() 4530 4531 return self.expression( 4532 exp.Trim, this=this, position=position, expression=expression, collation=collation 4533 ) 4534 4535 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4536 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4537 4538 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4539 return self._parse_window(self._parse_id_var(), alias=True) 4540 4541 def _parse_respect_or_ignore_nulls( 4542 self, this: t.Optional[exp.Expression] 4543 ) -> t.Optional[exp.Expression]: 4544 if self._match_text_seq("IGNORE", "NULLS"): 4545 return self.expression(exp.IgnoreNulls, this=this) 4546 if self._match_text_seq("RESPECT", "NULLS"): 4547 return self.expression(exp.RespectNulls, this=this) 4548 return this 4549 4550 def _parse_window( 4551 self, this: t.Optional[exp.Expression], alias: bool = False 4552 ) -> t.Optional[exp.Expression]: 4553 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4554 self._match(TokenType.WHERE) 4555 this = self.expression( 4556 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4557 ) 4558 self._match_r_paren() 4559 4560 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4561 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4562 if self._match_text_seq("WITHIN", "GROUP"): 4563 order = self._parse_wrapped(self._parse_order) 4564 this = self.expression(exp.WithinGroup, this=this, expression=order) 4565 4566 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4567 # Some dialects choose to implement and some do not. 4568 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4569 4570 # There is some code above in _parse_lambda that handles 4571 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4572 4573 # The below changes handle 4574 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4575 4576 # Oracle allows both formats 4577 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4578 # and Snowflake chose to do the same for familiarity 4579 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4580 this = self._parse_respect_or_ignore_nulls(this) 4581 4582 # bigquery select from window x AS (partition by ...) 4583 if alias: 4584 over = None 4585 self._match(TokenType.ALIAS) 4586 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4587 return this 4588 else: 4589 over = self._prev.text.upper() 4590 4591 if not self._match(TokenType.L_PAREN): 4592 return self.expression( 4593 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4594 ) 4595 4596 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4597 4598 first = self._match(TokenType.FIRST) 4599 if self._match_text_seq("LAST"): 4600 first = False 4601 4602 partition, order = self._parse_partition_and_order() 4603 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4604 4605 if kind: 4606 self._match(TokenType.BETWEEN) 4607 start = self._parse_window_spec() 4608 self._match(TokenType.AND) 4609 end = self._parse_window_spec() 4610 4611 spec = self.expression( 4612 exp.WindowSpec, 4613 kind=kind, 4614 start=start["value"], 4615 start_side=start["side"], 4616 end=end["value"], 4617 end_side=end["side"], 4618 ) 4619 else: 4620 spec = None 4621 4622 self._match_r_paren() 4623 4624 window = self.expression( 4625 exp.Window, 4626 this=this, 4627 partition_by=partition, 4628 order=order, 4629 spec=spec, 4630 alias=window_alias, 4631 over=over, 4632 first=first, 4633 ) 4634 4635 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4636 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4637 return self._parse_window(window, alias=alias) 4638 4639 return window 4640 4641 def _parse_partition_and_order( 4642 self, 4643 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4644 return self._parse_partition_by(), self._parse_order() 4645 4646 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4647 self._match(TokenType.BETWEEN) 4648 4649 return { 4650 "value": ( 4651 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4652 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4653 or self._parse_bitwise() 4654 ), 4655 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4656 } 4657 4658 def _parse_alias( 4659 self, this: t.Optional[exp.Expression], explicit: bool = False 4660 ) -> t.Optional[exp.Expression]: 4661 any_token = self._match(TokenType.ALIAS) 4662 4663 if explicit and not any_token: 4664 return this 4665 4666 if self._match(TokenType.L_PAREN): 4667 aliases = self.expression( 4668 exp.Aliases, 4669 this=this, 4670 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4671 ) 4672 self._match_r_paren(aliases) 4673 return aliases 4674 4675 alias = self._parse_id_var(any_token) 4676 4677 if alias: 4678 return self.expression(exp.Alias, this=this, alias=alias) 4679 4680 return this 4681 4682 def _parse_id_var( 4683 self, 4684 any_token: bool = True, 4685 tokens: t.Optional[t.Collection[TokenType]] = None, 4686 ) -> t.Optional[exp.Expression]: 4687 identifier = self._parse_identifier() 4688 4689 if identifier: 4690 return identifier 4691 4692 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4693 quoted = self._prev.token_type == TokenType.STRING 4694 return exp.Identifier(this=self._prev.text, quoted=quoted) 4695 4696 return None 4697 4698 def _parse_string(self) -> t.Optional[exp.Expression]: 4699 if self._match(TokenType.STRING): 4700 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4701 return self._parse_placeholder() 4702 4703 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4704 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4705 4706 def _parse_number(self) -> t.Optional[exp.Expression]: 4707 if self._match(TokenType.NUMBER): 4708 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4709 return self._parse_placeholder() 4710 4711 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4712 if self._match(TokenType.IDENTIFIER): 4713 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4714 return self._parse_placeholder() 4715 4716 def _parse_var( 4717 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4718 ) -> t.Optional[exp.Expression]: 4719 if ( 4720 (any_token and self._advance_any()) 4721 or self._match(TokenType.VAR) 4722 or (self._match_set(tokens) if tokens else False) 4723 ): 4724 return self.expression(exp.Var, this=self._prev.text) 4725 return self._parse_placeholder() 4726 4727 def _advance_any(self) -> t.Optional[Token]: 4728 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4729 self._advance() 4730 return self._prev 4731 return None 4732 4733 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4734 return self._parse_var() or self._parse_string() 4735 4736 def _parse_null(self) -> t.Optional[exp.Expression]: 4737 if self._match_set(self.NULL_TOKENS): 4738 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4739 return self._parse_placeholder() 4740 4741 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4742 if self._match(TokenType.TRUE): 4743 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4744 if self._match(TokenType.FALSE): 4745 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4746 return self._parse_placeholder() 4747 4748 def _parse_star(self) -> t.Optional[exp.Expression]: 4749 if self._match(TokenType.STAR): 4750 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4751 return self._parse_placeholder() 4752 4753 def _parse_parameter(self) -> exp.Parameter: 4754 wrapped = self._match(TokenType.L_BRACE) 4755 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4756 self._match(TokenType.R_BRACE) 4757 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4758 4759 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4760 if self._match_set(self.PLACEHOLDER_PARSERS): 4761 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4762 if placeholder: 4763 return placeholder 4764 self._advance(-1) 4765 return None 4766 4767 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4768 if not self._match(TokenType.EXCEPT): 4769 return None 4770 if self._match(TokenType.L_PAREN, advance=False): 4771 return self._parse_wrapped_csv(self._parse_column) 4772 4773 except_column = self._parse_column() 4774 return [except_column] if except_column else None 4775 4776 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4777 if not self._match(TokenType.REPLACE): 4778 return None 4779 if self._match(TokenType.L_PAREN, advance=False): 4780 return self._parse_wrapped_csv(self._parse_expression) 4781 4782 replace_expression = self._parse_expression() 4783 return [replace_expression] if replace_expression else None 4784 4785 def _parse_csv( 4786 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4787 ) -> t.List[exp.Expression]: 4788 parse_result = parse_method() 4789 items = [parse_result] if parse_result is not None else [] 4790 4791 while self._match(sep): 4792 self._add_comments(parse_result) 4793 parse_result = parse_method() 4794 if parse_result is not None: 4795 items.append(parse_result) 4796 4797 return items 4798 4799 def _parse_tokens( 4800 self, parse_method: t.Callable, expressions: t.Dict 4801 ) -> t.Optional[exp.Expression]: 4802 this = parse_method() 4803 4804 while self._match_set(expressions): 4805 this = self.expression( 4806 expressions[self._prev.token_type], 4807 this=this, 4808 comments=self._prev_comments, 4809 expression=parse_method(), 4810 ) 4811 4812 return this 4813 4814 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4815 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4816 4817 def _parse_wrapped_csv( 4818 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4819 ) -> t.List[exp.Expression]: 4820 return self._parse_wrapped( 4821 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4822 ) 4823 4824 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4825 wrapped = self._match(TokenType.L_PAREN) 4826 if not wrapped and not optional: 4827 self.raise_error("Expecting (") 4828 parse_result = parse_method() 4829 if wrapped: 4830 self._match_r_paren() 4831 return parse_result 4832 4833 def _parse_expressions(self) -> t.List[exp.Expression]: 4834 return self._parse_csv(self._parse_expression) 4835 4836 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4837 return self._parse_select() or self._parse_set_operations( 4838 self._parse_expression() if alias else self._parse_conjunction() 4839 ) 4840 4841 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4842 return self._parse_query_modifiers( 4843 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4844 ) 4845 4846 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4847 this = None 4848 if self._match_texts(self.TRANSACTION_KIND): 4849 this = self._prev.text 4850 4851 self._match_texts({"TRANSACTION", "WORK"}) 4852 4853 modes = [] 4854 while True: 4855 mode = [] 4856 while self._match(TokenType.VAR): 4857 mode.append(self._prev.text) 4858 4859 if mode: 4860 modes.append(" ".join(mode)) 4861 if not self._match(TokenType.COMMA): 4862 break 4863 4864 return self.expression(exp.Transaction, this=this, modes=modes) 4865 4866 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4867 chain = None 4868 savepoint = None 4869 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4870 4871 self._match_texts({"TRANSACTION", "WORK"}) 4872 4873 if self._match_text_seq("TO"): 4874 self._match_text_seq("SAVEPOINT") 4875 savepoint = self._parse_id_var() 4876 4877 if self._match(TokenType.AND): 4878 chain = not self._match_text_seq("NO") 4879 self._match_text_seq("CHAIN") 4880 4881 if is_rollback: 4882 return self.expression(exp.Rollback, savepoint=savepoint) 4883 4884 return self.expression(exp.Commit, chain=chain) 4885 4886 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4887 if not self._match_text_seq("ADD"): 4888 return None 4889 4890 self._match(TokenType.COLUMN) 4891 exists_column = self._parse_exists(not_=True) 4892 expression = self._parse_field_def() 4893 4894 if expression: 4895 expression.set("exists", exists_column) 4896 4897 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4898 if self._match_texts(("FIRST", "AFTER")): 4899 position = self._prev.text 4900 column_position = self.expression( 4901 exp.ColumnPosition, this=self._parse_column(), position=position 4902 ) 4903 expression.set("position", column_position) 4904 4905 return expression 4906 4907 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4908 drop = self._match(TokenType.DROP) and self._parse_drop() 4909 if drop and not isinstance(drop, exp.Command): 4910 drop.set("kind", drop.args.get("kind", "COLUMN")) 4911 return drop 4912 4913 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4914 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4915 return self.expression( 4916 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4917 ) 4918 4919 def _parse_add_constraint(self) -> exp.AddConstraint: 4920 this = None 4921 kind = self._prev.token_type 4922 4923 if kind == TokenType.CONSTRAINT: 4924 this = self._parse_id_var() 4925 4926 if self._match_text_seq("CHECK"): 4927 expression = self._parse_wrapped(self._parse_conjunction) 4928 enforced = self._match_text_seq("ENFORCED") 4929 4930 return self.expression( 4931 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4932 ) 4933 4934 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4935 expression = self._parse_foreign_key() 4936 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4937 expression = self._parse_primary_key() 4938 else: 4939 expression = None 4940 4941 return self.expression(exp.AddConstraint, this=this, expression=expression) 4942 4943 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4944 index = self._index - 1 4945 4946 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4947 return self._parse_csv(self._parse_add_constraint) 4948 4949 self._retreat(index) 4950 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4951 return self._parse_csv(self._parse_field_def) 4952 4953 return self._parse_csv(self._parse_add_column) 4954 4955 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4956 self._match(TokenType.COLUMN) 4957 column = self._parse_field(any_token=True) 4958 4959 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4960 return self.expression(exp.AlterColumn, this=column, drop=True) 4961 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4962 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4963 4964 self._match_text_seq("SET", "DATA") 4965 return self.expression( 4966 exp.AlterColumn, 4967 this=column, 4968 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4969 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4970 using=self._match(TokenType.USING) and self._parse_conjunction(), 4971 ) 4972 4973 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4974 index = self._index - 1 4975 4976 partition_exists = self._parse_exists() 4977 if self._match(TokenType.PARTITION, advance=False): 4978 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4979 4980 self._retreat(index) 4981 return self._parse_csv(self._parse_drop_column) 4982 4983 def _parse_alter_table_rename(self) -> exp.RenameTable: 4984 self._match_text_seq("TO") 4985 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4986 4987 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4988 start = self._prev 4989 4990 if not self._match(TokenType.TABLE): 4991 return self._parse_as_command(start) 4992 4993 exists = self._parse_exists() 4994 only = self._match_text_seq("ONLY") 4995 this = self._parse_table(schema=True) 4996 4997 if self._next: 4998 self._advance() 4999 5000 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5001 if parser: 5002 actions = ensure_list(parser(self)) 5003 5004 if not self._curr: 5005 return self.expression( 5006 exp.AlterTable, 5007 this=this, 5008 exists=exists, 5009 actions=actions, 5010 only=only, 5011 ) 5012 5013 return self._parse_as_command(start) 5014 5015 def _parse_merge(self) -> exp.Merge: 5016 self._match(TokenType.INTO) 5017 target = self._parse_table() 5018 5019 if target and self._match(TokenType.ALIAS, advance=False): 5020 target.set("alias", self._parse_table_alias()) 5021 5022 self._match(TokenType.USING) 5023 using = self._parse_table() 5024 5025 self._match(TokenType.ON) 5026 on = self._parse_conjunction() 5027 5028 return self.expression( 5029 exp.Merge, 5030 this=target, 5031 using=using, 5032 on=on, 5033 expressions=self._parse_when_matched(), 5034 ) 5035 5036 def _parse_when_matched(self) -> t.List[exp.When]: 5037 whens = [] 5038 5039 while self._match(TokenType.WHEN): 5040 matched = not self._match(TokenType.NOT) 5041 self._match_text_seq("MATCHED") 5042 source = ( 5043 False 5044 if self._match_text_seq("BY", "TARGET") 5045 else self._match_text_seq("BY", "SOURCE") 5046 ) 5047 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5048 5049 self._match(TokenType.THEN) 5050 5051 if self._match(TokenType.INSERT): 5052 _this = self._parse_star() 5053 if _this: 5054 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5055 else: 5056 then = self.expression( 5057 exp.Insert, 5058 this=self._parse_value(), 5059 expression=self._match(TokenType.VALUES) and self._parse_value(), 5060 ) 5061 elif self._match(TokenType.UPDATE): 5062 expressions = self._parse_star() 5063 if expressions: 5064 then = self.expression(exp.Update, expressions=expressions) 5065 else: 5066 then = self.expression( 5067 exp.Update, 5068 expressions=self._match(TokenType.SET) 5069 and self._parse_csv(self._parse_equality), 5070 ) 5071 elif self._match(TokenType.DELETE): 5072 then = self.expression(exp.Var, this=self._prev.text) 5073 else: 5074 then = None 5075 5076 whens.append( 5077 self.expression( 5078 exp.When, 5079 matched=matched, 5080 source=source, 5081 condition=condition, 5082 then=then, 5083 ) 5084 ) 5085 return whens 5086 5087 def _parse_show(self) -> t.Optional[exp.Expression]: 5088 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5089 if parser: 5090 return parser(self) 5091 return self._parse_as_command(self._prev) 5092 5093 def _parse_set_item_assignment( 5094 self, kind: t.Optional[str] = None 5095 ) -> t.Optional[exp.Expression]: 5096 index = self._index 5097 5098 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5099 return self._parse_set_transaction(global_=kind == "GLOBAL") 5100 5101 left = self._parse_primary() or self._parse_id_var() 5102 assignment_delimiter = self._match_texts(("=", "TO")) 5103 5104 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5105 self._retreat(index) 5106 return None 5107 5108 right = self._parse_statement() or self._parse_id_var() 5109 this = self.expression(exp.EQ, this=left, expression=right) 5110 5111 return self.expression(exp.SetItem, this=this, kind=kind) 5112 5113 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5114 self._match_text_seq("TRANSACTION") 5115 characteristics = self._parse_csv( 5116 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5117 ) 5118 return self.expression( 5119 exp.SetItem, 5120 expressions=characteristics, 5121 kind="TRANSACTION", 5122 **{"global": global_}, # type: ignore 5123 ) 5124 5125 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5126 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5127 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5128 5129 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5130 index = self._index 5131 set_ = self.expression( 5132 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5133 ) 5134 5135 if self._curr: 5136 self._retreat(index) 5137 return self._parse_as_command(self._prev) 5138 5139 return set_ 5140 5141 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5142 for option in options: 5143 if self._match_text_seq(*option.split(" ")): 5144 return exp.var(option) 5145 return None 5146 5147 def _parse_as_command(self, start: Token) -> exp.Command: 5148 while self._curr: 5149 self._advance() 5150 text = self._find_sql(start, self._prev) 5151 size = len(start.text) 5152 return exp.Command(this=text[:size], expression=text[size:]) 5153 5154 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5155 settings = [] 5156 5157 self._match_l_paren() 5158 kind = self._parse_id_var() 5159 5160 if self._match(TokenType.L_PAREN): 5161 while True: 5162 key = self._parse_id_var() 5163 value = self._parse_primary() 5164 5165 if not key and value is None: 5166 break 5167 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5168 self._match(TokenType.R_PAREN) 5169 5170 self._match_r_paren() 5171 5172 return self.expression( 5173 exp.DictProperty, 5174 this=this, 5175 kind=kind.this if kind else None, 5176 settings=settings, 5177 ) 5178 5179 def _parse_dict_range(self, this: str) -> exp.DictRange: 5180 self._match_l_paren() 5181 has_min = self._match_text_seq("MIN") 5182 if has_min: 5183 min = self._parse_var() or self._parse_primary() 5184 self._match_text_seq("MAX") 5185 max = self._parse_var() or self._parse_primary() 5186 else: 5187 max = self._parse_var() or self._parse_primary() 5188 min = exp.Literal.number(0) 5189 self._match_r_paren() 5190 return self.expression(exp.DictRange, this=this, min=min, max=max) 5191 5192 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5193 index = self._index 5194 expression = self._parse_column() 5195 if not self._match(TokenType.IN): 5196 self._retreat(index - 1) 5197 return None 5198 iterator = self._parse_column() 5199 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5200 return self.expression( 5201 exp.Comprehension, 5202 this=this, 5203 expression=expression, 5204 iterator=iterator, 5205 condition=condition, 5206 ) 5207 5208 def _find_parser( 5209 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5210 ) -> t.Optional[t.Callable]: 5211 if not self._curr: 5212 return None 5213 5214 index = self._index 5215 this = [] 5216 while True: 5217 # The current token might be multiple words 5218 curr = self._curr.text.upper() 5219 key = curr.split(" ") 5220 this.append(curr) 5221 5222 self._advance() 5223 result, trie = in_trie(trie, key) 5224 if result == TrieResult.FAILED: 5225 break 5226 5227 if result == TrieResult.EXISTS: 5228 subparser = parsers[" ".join(this)] 5229 return subparser 5230 5231 self._retreat(index) 5232 return None 5233 5234 def _match(self, token_type, advance=True, expression=None): 5235 if not self._curr: 5236 return None 5237 5238 if self._curr.token_type == token_type: 5239 if advance: 5240 self._advance() 5241 self._add_comments(expression) 5242 return True 5243 5244 return None 5245 5246 def _match_set(self, types, advance=True): 5247 if not self._curr: 5248 return None 5249 5250 if self._curr.token_type in types: 5251 if advance: 5252 self._advance() 5253 return True 5254 5255 return None 5256 5257 def _match_pair(self, token_type_a, token_type_b, advance=True): 5258 if not self._curr or not self._next: 5259 return None 5260 5261 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5262 if advance: 5263 self._advance(2) 5264 return True 5265 5266 return None 5267 5268 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5269 if not self._match(TokenType.L_PAREN, expression=expression): 5270 self.raise_error("Expecting (") 5271 5272 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5273 if not self._match(TokenType.R_PAREN, expression=expression): 5274 self.raise_error("Expecting )") 5275 5276 def _match_texts(self, texts, advance=True): 5277 if self._curr and self._curr.text.upper() in texts: 5278 if advance: 5279 self._advance() 5280 return True 5281 return False 5282 5283 def _match_text_seq(self, *texts, advance=True): 5284 index = self._index 5285 for text in texts: 5286 if self._curr and self._curr.text.upper() == text: 5287 self._advance() 5288 else: 5289 self._retreat(index) 5290 return False 5291 5292 if not advance: 5293 self._retreat(index) 5294 5295 return True 5296 5297 @t.overload 5298 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5299 ... 5300 5301 @t.overload 5302 def _replace_columns_with_dots( 5303 self, this: t.Optional[exp.Expression] 5304 ) -> t.Optional[exp.Expression]: 5305 ... 5306 5307 def _replace_columns_with_dots(self, this): 5308 if isinstance(this, exp.Dot): 5309 exp.replace_children(this, self._replace_columns_with_dots) 5310 elif isinstance(this, exp.Column): 5311 exp.replace_children(this, self._replace_columns_with_dots) 5312 table = this.args.get("table") 5313 this = ( 5314 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5315 ) 5316 5317 return this 5318 5319 def _replace_lambda( 5320 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5321 ) -> t.Optional[exp.Expression]: 5322 if not node: 5323 return node 5324 5325 for column in node.find_all(exp.Column): 5326 if column.parts[0].name in lambda_variables: 5327 dot_or_id = column.to_dot() if column.table else column.this 5328 parent = column.parent 5329 5330 while isinstance(parent, exp.Dot): 5331 if not isinstance(parent.parent, exp.Dot): 5332 parent.replace(dot_or_id) 5333 break 5334 parent = parent.parent 5335 else: 5336 if column is node: 5337 node = dot_or_id 5338 else: 5339 column.replace(dot_or_id) 5340 return node 5341 5342 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5343 return [ 5344 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5345 for value in values 5346 if value 5347 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 *DB_CREATABLES, 253 } 254 255 # Tokens that can represent identifiers 256 ID_VAR_TOKENS = { 257 TokenType.VAR, 258 TokenType.ANTI, 259 TokenType.APPLY, 260 TokenType.ASC, 261 TokenType.AUTO_INCREMENT, 262 TokenType.BEGIN, 263 TokenType.CACHE, 264 TokenType.CASE, 265 TokenType.COLLATE, 266 TokenType.COMMAND, 267 TokenType.COMMENT, 268 TokenType.COMMIT, 269 TokenType.CONSTRAINT, 270 TokenType.DEFAULT, 271 TokenType.DELETE, 272 TokenType.DESC, 273 TokenType.DESCRIBE, 274 TokenType.DICTIONARY, 275 TokenType.DIV, 276 TokenType.END, 277 TokenType.EXECUTE, 278 TokenType.ESCAPE, 279 TokenType.FALSE, 280 TokenType.FIRST, 281 TokenType.FILTER, 282 TokenType.FORMAT, 283 TokenType.FULL, 284 TokenType.IS, 285 TokenType.ISNULL, 286 TokenType.INTERVAL, 287 TokenType.KEEP, 288 TokenType.KILL, 289 TokenType.LEFT, 290 TokenType.LOAD, 291 TokenType.MERGE, 292 TokenType.NATURAL, 293 TokenType.NEXT, 294 TokenType.OFFSET, 295 TokenType.ORDINALITY, 296 TokenType.OVERLAPS, 297 TokenType.OVERWRITE, 298 TokenType.PARTITION, 299 TokenType.PERCENT, 300 TokenType.PIVOT, 301 TokenType.PRAGMA, 302 TokenType.RANGE, 303 TokenType.REFERENCES, 304 TokenType.RIGHT, 305 TokenType.ROW, 306 TokenType.ROWS, 307 TokenType.SEMI, 308 TokenType.SET, 309 TokenType.SETTINGS, 310 TokenType.SHOW, 311 TokenType.TEMPORARY, 312 TokenType.TOP, 313 TokenType.TRUE, 314 TokenType.UNIQUE, 315 TokenType.UNPIVOT, 316 TokenType.UPDATE, 317 TokenType.VOLATILE, 318 TokenType.WINDOW, 319 *CREATABLES, 320 *SUBQUERY_PREDICATES, 321 *TYPE_TOKENS, 322 *NO_PAREN_FUNCTIONS, 323 } 324 325 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 326 327 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 328 TokenType.ANTI, 329 TokenType.APPLY, 330 TokenType.ASOF, 331 TokenType.FULL, 332 TokenType.LEFT, 333 TokenType.LOCK, 334 TokenType.NATURAL, 335 TokenType.OFFSET, 336 TokenType.RIGHT, 337 TokenType.SEMI, 338 TokenType.WINDOW, 339 } 340 341 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 342 343 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 344 345 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 346 347 FUNC_TOKENS = { 348 TokenType.COLLATE, 349 TokenType.COMMAND, 350 TokenType.CURRENT_DATE, 351 TokenType.CURRENT_DATETIME, 352 TokenType.CURRENT_TIMESTAMP, 353 TokenType.CURRENT_TIME, 354 TokenType.CURRENT_USER, 355 TokenType.FILTER, 356 TokenType.FIRST, 357 TokenType.FORMAT, 358 TokenType.GLOB, 359 TokenType.IDENTIFIER, 360 TokenType.INDEX, 361 TokenType.ISNULL, 362 TokenType.ILIKE, 363 TokenType.INSERT, 364 TokenType.LIKE, 365 TokenType.MERGE, 366 TokenType.OFFSET, 367 TokenType.PRIMARY_KEY, 368 TokenType.RANGE, 369 TokenType.REPLACE, 370 TokenType.RLIKE, 371 TokenType.ROW, 372 TokenType.UNNEST, 373 TokenType.VAR, 374 TokenType.LEFT, 375 TokenType.RIGHT, 376 TokenType.DATE, 377 TokenType.DATETIME, 378 TokenType.TABLE, 379 TokenType.TIMESTAMP, 380 TokenType.TIMESTAMPTZ, 381 TokenType.WINDOW, 382 TokenType.XOR, 383 *TYPE_TOKENS, 384 *SUBQUERY_PREDICATES, 385 } 386 387 CONJUNCTION = { 388 TokenType.AND: exp.And, 389 TokenType.OR: exp.Or, 390 } 391 392 EQUALITY = { 393 TokenType.EQ: exp.EQ, 394 TokenType.NEQ: exp.NEQ, 395 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 396 } 397 398 COMPARISON = { 399 TokenType.GT: exp.GT, 400 TokenType.GTE: exp.GTE, 401 TokenType.LT: exp.LT, 402 TokenType.LTE: exp.LTE, 403 } 404 405 BITWISE = { 406 TokenType.AMP: exp.BitwiseAnd, 407 TokenType.CARET: exp.BitwiseXor, 408 TokenType.PIPE: exp.BitwiseOr, 409 TokenType.DPIPE: exp.DPipe, 410 } 411 412 TERM = { 413 TokenType.DASH: exp.Sub, 414 TokenType.PLUS: exp.Add, 415 TokenType.MOD: exp.Mod, 416 TokenType.COLLATE: exp.Collate, 417 } 418 419 FACTOR = { 420 TokenType.DIV: exp.IntDiv, 421 TokenType.LR_ARROW: exp.Distance, 422 TokenType.SLASH: exp.Div, 423 TokenType.STAR: exp.Mul, 424 } 425 426 TIMES = { 427 TokenType.TIME, 428 TokenType.TIMETZ, 429 } 430 431 TIMESTAMPS = { 432 TokenType.TIMESTAMP, 433 TokenType.TIMESTAMPTZ, 434 TokenType.TIMESTAMPLTZ, 435 *TIMES, 436 } 437 438 SET_OPERATIONS = { 439 TokenType.UNION, 440 TokenType.INTERSECT, 441 TokenType.EXCEPT, 442 } 443 444 JOIN_METHODS = { 445 TokenType.NATURAL, 446 TokenType.ASOF, 447 } 448 449 JOIN_SIDES = { 450 TokenType.LEFT, 451 TokenType.RIGHT, 452 TokenType.FULL, 453 } 454 455 JOIN_KINDS = { 456 TokenType.INNER, 457 TokenType.OUTER, 458 TokenType.CROSS, 459 TokenType.SEMI, 460 TokenType.ANTI, 461 } 462 463 JOIN_HINTS: t.Set[str] = set() 464 465 LAMBDAS = { 466 TokenType.ARROW: lambda self, expressions: self.expression( 467 exp.Lambda, 468 this=self._replace_lambda( 469 self._parse_conjunction(), 470 {node.name for node in expressions}, 471 ), 472 expressions=expressions, 473 ), 474 TokenType.FARROW: lambda self, expressions: self.expression( 475 exp.Kwarg, 476 this=exp.var(expressions[0].name), 477 expression=self._parse_conjunction(), 478 ), 479 } 480 481 COLUMN_OPERATORS = { 482 TokenType.DOT: None, 483 TokenType.DCOLON: lambda self, this, to: self.expression( 484 exp.Cast if self.STRICT_CAST else exp.TryCast, 485 this=this, 486 to=to, 487 ), 488 TokenType.ARROW: lambda self, this, path: self.expression( 489 exp.JSONExtract, 490 this=this, 491 expression=path, 492 ), 493 TokenType.DARROW: lambda self, this, path: self.expression( 494 exp.JSONExtractScalar, 495 this=this, 496 expression=path, 497 ), 498 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 499 exp.JSONBExtract, 500 this=this, 501 expression=path, 502 ), 503 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 504 exp.JSONBExtractScalar, 505 this=this, 506 expression=path, 507 ), 508 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 509 exp.JSONBContains, 510 this=this, 511 expression=key, 512 ), 513 } 514 515 EXPRESSION_PARSERS = { 516 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 517 exp.Column: lambda self: self._parse_column(), 518 exp.Condition: lambda self: self._parse_conjunction(), 519 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 520 exp.Expression: lambda self: self._parse_statement(), 521 exp.From: lambda self: self._parse_from(), 522 exp.Group: lambda self: self._parse_group(), 523 exp.Having: lambda self: self._parse_having(), 524 exp.Identifier: lambda self: self._parse_id_var(), 525 exp.Join: lambda self: self._parse_join(), 526 exp.Lambda: lambda self: self._parse_lambda(), 527 exp.Lateral: lambda self: self._parse_lateral(), 528 exp.Limit: lambda self: self._parse_limit(), 529 exp.Offset: lambda self: self._parse_offset(), 530 exp.Order: lambda self: self._parse_order(), 531 exp.Ordered: lambda self: self._parse_ordered(), 532 exp.Properties: lambda self: self._parse_properties(), 533 exp.Qualify: lambda self: self._parse_qualify(), 534 exp.Returning: lambda self: self._parse_returning(), 535 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 536 exp.Table: lambda self: self._parse_table_parts(), 537 exp.TableAlias: lambda self: self._parse_table_alias(), 538 exp.Where: lambda self: self._parse_where(), 539 exp.Window: lambda self: self._parse_named_window(), 540 exp.With: lambda self: self._parse_with(), 541 "JOIN_TYPE": lambda self: self._parse_join_parts(), 542 } 543 544 STATEMENT_PARSERS = { 545 TokenType.ALTER: lambda self: self._parse_alter(), 546 TokenType.BEGIN: lambda self: self._parse_transaction(), 547 TokenType.CACHE: lambda self: self._parse_cache(), 548 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 549 TokenType.COMMENT: lambda self: self._parse_comment(), 550 TokenType.CREATE: lambda self: self._parse_create(), 551 TokenType.DELETE: lambda self: self._parse_delete(), 552 TokenType.DESC: lambda self: self._parse_describe(), 553 TokenType.DESCRIBE: lambda self: self._parse_describe(), 554 TokenType.DROP: lambda self: self._parse_drop(), 555 TokenType.INSERT: lambda self: self._parse_insert(), 556 TokenType.KILL: lambda self: self._parse_kill(), 557 TokenType.LOAD: lambda self: self._parse_load(), 558 TokenType.MERGE: lambda self: self._parse_merge(), 559 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 560 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 561 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 562 TokenType.SET: lambda self: self._parse_set(), 563 TokenType.UNCACHE: lambda self: self._parse_uncache(), 564 TokenType.UPDATE: lambda self: self._parse_update(), 565 TokenType.USE: lambda self: self.expression( 566 exp.Use, 567 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 568 and exp.var(self._prev.text), 569 this=self._parse_table(schema=False), 570 ), 571 } 572 573 UNARY_PARSERS = { 574 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 575 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 576 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 577 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 578 } 579 580 PRIMARY_PARSERS = { 581 TokenType.STRING: lambda self, token: self.expression( 582 exp.Literal, this=token.text, is_string=True 583 ), 584 TokenType.NUMBER: lambda self, token: self.expression( 585 exp.Literal, this=token.text, is_string=False 586 ), 587 TokenType.STAR: lambda self, _: self.expression( 588 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 589 ), 590 TokenType.NULL: lambda self, _: self.expression(exp.Null), 591 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 592 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 593 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 594 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 595 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 596 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 597 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 598 exp.National, this=token.text 599 ), 600 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 601 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 602 exp.RawString, this=token.text 603 ), 604 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 605 } 606 607 PLACEHOLDER_PARSERS = { 608 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 609 TokenType.PARAMETER: lambda self: self._parse_parameter(), 610 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 611 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 612 else None, 613 } 614 615 RANGE_PARSERS = { 616 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 617 TokenType.GLOB: binary_range_parser(exp.Glob), 618 TokenType.ILIKE: binary_range_parser(exp.ILike), 619 TokenType.IN: lambda self, this: self._parse_in(this), 620 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 621 TokenType.IS: lambda self, this: self._parse_is(this), 622 TokenType.LIKE: binary_range_parser(exp.Like), 623 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 624 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 625 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 626 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 627 } 628 629 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 630 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 631 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 632 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 633 "CHARACTER SET": lambda self: self._parse_character_set(), 634 "CHECKSUM": lambda self: self._parse_checksum(), 635 "CLUSTER BY": lambda self: self._parse_cluster(), 636 "CLUSTERED": lambda self: self._parse_clustered_by(), 637 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 638 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 639 "COPY": lambda self: self._parse_copy_property(), 640 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 641 "DEFINER": lambda self: self._parse_definer(), 642 "DETERMINISTIC": lambda self: self.expression( 643 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 644 ), 645 "DISTKEY": lambda self: self._parse_distkey(), 646 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 647 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 648 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 649 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 650 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 651 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 652 "FREESPACE": lambda self: self._parse_freespace(), 653 "HEAP": lambda self: self.expression(exp.HeapProperty), 654 "IMMUTABLE": lambda self: self.expression( 655 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 656 ), 657 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 658 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 659 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 660 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 661 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 662 "LIKE": lambda self: self._parse_create_like(), 663 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 664 "LOCK": lambda self: self._parse_locking(), 665 "LOCKING": lambda self: self._parse_locking(), 666 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 667 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 668 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 669 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 670 "NO": lambda self: self._parse_no_property(), 671 "ON": lambda self: self._parse_on_property(), 672 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 673 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 674 "PARTITION BY": lambda self: self._parse_partitioned_by(), 675 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 676 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 677 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 678 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 679 "REMOTE": lambda self: self._parse_remote_with_connection(), 680 "RETURNS": lambda self: self._parse_returns(), 681 "ROW": lambda self: self._parse_row(), 682 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 683 "SAMPLE": lambda self: self.expression( 684 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 685 ), 686 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 687 "SETTINGS": lambda self: self.expression( 688 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 689 ), 690 "SORTKEY": lambda self: self._parse_sortkey(), 691 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 692 "STABLE": lambda self: self.expression( 693 exp.StabilityProperty, this=exp.Literal.string("STABLE") 694 ), 695 "STORED": lambda self: self._parse_stored(), 696 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 697 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 698 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 699 "TO": lambda self: self._parse_to_table(), 700 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 701 "TRANSFORM": lambda self: self.expression( 702 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 703 ), 704 "TTL": lambda self: self._parse_ttl(), 705 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 706 "VOLATILE": lambda self: self._parse_volatile_property(), 707 "WITH": lambda self: self._parse_with_property(), 708 } 709 710 CONSTRAINT_PARSERS = { 711 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 712 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 713 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 714 "CHARACTER SET": lambda self: self.expression( 715 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 716 ), 717 "CHECK": lambda self: self.expression( 718 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 719 ), 720 "COLLATE": lambda self: self.expression( 721 exp.CollateColumnConstraint, this=self._parse_var() 722 ), 723 "COMMENT": lambda self: self.expression( 724 exp.CommentColumnConstraint, this=self._parse_string() 725 ), 726 "COMPRESS": lambda self: self._parse_compress(), 727 "CLUSTERED": lambda self: self.expression( 728 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 729 ), 730 "NONCLUSTERED": lambda self: self.expression( 731 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 732 ), 733 "DEFAULT": lambda self: self.expression( 734 exp.DefaultColumnConstraint, this=self._parse_bitwise() 735 ), 736 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 737 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 738 "FORMAT": lambda self: self.expression( 739 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 740 ), 741 "GENERATED": lambda self: self._parse_generated_as_identity(), 742 "IDENTITY": lambda self: self._parse_auto_increment(), 743 "INLINE": lambda self: self._parse_inline(), 744 "LIKE": lambda self: self._parse_create_like(), 745 "NOT": lambda self: self._parse_not_constraint(), 746 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 747 "ON": lambda self: ( 748 self._match(TokenType.UPDATE) 749 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 750 ) 751 or self.expression(exp.OnProperty, this=self._parse_id_var()), 752 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 753 "PRIMARY KEY": lambda self: self._parse_primary_key(), 754 "REFERENCES": lambda self: self._parse_references(match=False), 755 "TITLE": lambda self: self.expression( 756 exp.TitleColumnConstraint, this=self._parse_var_or_string() 757 ), 758 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 759 "UNIQUE": lambda self: self._parse_unique(), 760 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 761 "WITH": lambda self: self.expression( 762 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 763 ), 764 } 765 766 ALTER_PARSERS = { 767 "ADD": lambda self: self._parse_alter_table_add(), 768 "ALTER": lambda self: self._parse_alter_table_alter(), 769 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 770 "DROP": lambda self: self._parse_alter_table_drop(), 771 "RENAME": lambda self: self._parse_alter_table_rename(), 772 } 773 774 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 775 776 NO_PAREN_FUNCTION_PARSERS = { 777 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 778 "CASE": lambda self: self._parse_case(), 779 "IF": lambda self: self._parse_if(), 780 "NEXT": lambda self: self._parse_next_value_for(), 781 } 782 783 INVALID_FUNC_NAME_TOKENS = { 784 TokenType.IDENTIFIER, 785 TokenType.STRING, 786 } 787 788 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 789 790 FUNCTION_PARSERS = { 791 "ANY_VALUE": lambda self: self._parse_any_value(), 792 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 793 "CONCAT": lambda self: self._parse_concat(), 794 "CONCAT_WS": lambda self: self._parse_concat_ws(), 795 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 796 "DECODE": lambda self: self._parse_decode(), 797 "EXTRACT": lambda self: self._parse_extract(), 798 "JSON_OBJECT": lambda self: self._parse_json_object(), 799 "JSON_TABLE": lambda self: self._parse_json_table(), 800 "LOG": lambda self: self._parse_logarithm(), 801 "MATCH": lambda self: self._parse_match_against(), 802 "OPENJSON": lambda self: self._parse_open_json(), 803 "POSITION": lambda self: self._parse_position(), 804 "PREDICT": lambda self: self._parse_predict(), 805 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 806 "STRING_AGG": lambda self: self._parse_string_agg(), 807 "SUBSTRING": lambda self: self._parse_substring(), 808 "TRIM": lambda self: self._parse_trim(), 809 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 810 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 811 } 812 813 QUERY_MODIFIER_PARSERS = { 814 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 815 TokenType.WHERE: lambda self: ("where", self._parse_where()), 816 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 817 TokenType.HAVING: lambda self: ("having", self._parse_having()), 818 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 819 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 820 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 821 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 822 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 823 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 824 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 825 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 826 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 827 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 828 TokenType.CLUSTER_BY: lambda self: ( 829 "cluster", 830 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 831 ), 832 TokenType.DISTRIBUTE_BY: lambda self: ( 833 "distribute", 834 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 835 ), 836 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 837 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 838 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 839 } 840 841 SET_PARSERS = { 842 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 843 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 844 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 845 "TRANSACTION": lambda self: self._parse_set_transaction(), 846 } 847 848 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 849 850 TYPE_LITERAL_PARSERS = { 851 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 852 } 853 854 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 855 856 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 857 858 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 859 860 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 861 TRANSACTION_CHARACTERISTICS = { 862 "ISOLATION LEVEL REPEATABLE READ", 863 "ISOLATION LEVEL READ COMMITTED", 864 "ISOLATION LEVEL READ UNCOMMITTED", 865 "ISOLATION LEVEL SERIALIZABLE", 866 "READ WRITE", 867 "READ ONLY", 868 } 869 870 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 871 872 CLONE_KEYWORDS = {"CLONE", "COPY"} 873 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 874 875 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 876 877 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 878 879 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 880 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 881 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 882 883 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 884 885 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 886 887 DISTINCT_TOKENS = {TokenType.DISTINCT} 888 889 NULL_TOKENS = {TokenType.NULL} 890 891 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 892 893 STRICT_CAST = True 894 895 # A NULL arg in CONCAT yields NULL by default 896 CONCAT_NULL_OUTPUTS_STRING = False 897 898 PREFIXED_PIVOT_COLUMNS = False 899 IDENTIFY_PIVOT_STRINGS = False 900 901 LOG_BASE_FIRST = True 902 LOG_DEFAULTS_TO_LN = False 903 904 # Whether or not ADD is present for each column added by ALTER TABLE 905 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 906 907 # Whether or not the table sample clause expects CSV syntax 908 TABLESAMPLE_CSV = False 909 910 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 911 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 912 913 # Whether the TRIM function expects the characters to trim as its first argument 914 TRIM_PATTERN_FIRST = False 915 916 __slots__ = ( 917 "error_level", 918 "error_message_context", 919 "max_errors", 920 "sql", 921 "errors", 922 "_tokens", 923 "_index", 924 "_curr", 925 "_next", 926 "_prev", 927 "_prev_comments", 928 "_tokenizer", 929 ) 930 931 # Autofilled 932 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 933 INDEX_OFFSET: int = 0 934 UNNEST_COLUMN_ONLY: bool = False 935 ALIAS_POST_TABLESAMPLE: bool = False 936 STRICT_STRING_CONCAT = False 937 SUPPORTS_USER_DEFINED_TYPES = True 938 NORMALIZE_FUNCTIONS = "upper" 939 NULL_ORDERING: str = "nulls_are_small" 940 SHOW_TRIE: t.Dict = {} 941 SET_TRIE: t.Dict = {} 942 FORMAT_MAPPING: t.Dict[str, str] = {} 943 FORMAT_TRIE: t.Dict = {} 944 TIME_MAPPING: t.Dict[str, str] = {} 945 TIME_TRIE: t.Dict = {} 946 947 def __init__( 948 self, 949 error_level: t.Optional[ErrorLevel] = None, 950 error_message_context: int = 100, 951 max_errors: int = 3, 952 ): 953 self.error_level = error_level or ErrorLevel.IMMEDIATE 954 self.error_message_context = error_message_context 955 self.max_errors = max_errors 956 self._tokenizer = self.TOKENIZER_CLASS() 957 self.reset() 958 959 def reset(self): 960 self.sql = "" 961 self.errors = [] 962 self._tokens = [] 963 self._index = 0 964 self._curr = None 965 self._next = None 966 self._prev = None 967 self._prev_comments = None 968 969 def parse( 970 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 971 ) -> t.List[t.Optional[exp.Expression]]: 972 """ 973 Parses a list of tokens and returns a list of syntax trees, one tree 974 per parsed SQL statement. 975 976 Args: 977 raw_tokens: The list of tokens. 978 sql: The original SQL string, used to produce helpful debug messages. 979 980 Returns: 981 The list of the produced syntax trees. 982 """ 983 return self._parse( 984 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 985 ) 986 987 def parse_into( 988 self, 989 expression_types: exp.IntoType, 990 raw_tokens: t.List[Token], 991 sql: t.Optional[str] = None, 992 ) -> t.List[t.Optional[exp.Expression]]: 993 """ 994 Parses a list of tokens into a given Expression type. If a collection of Expression 995 types is given instead, this method will try to parse the token list into each one 996 of them, stopping at the first for which the parsing succeeds. 997 998 Args: 999 expression_types: The expression type(s) to try and parse the token list into. 1000 raw_tokens: The list of tokens. 1001 sql: The original SQL string, used to produce helpful debug messages. 1002 1003 Returns: 1004 The target Expression. 1005 """ 1006 errors = [] 1007 for expression_type in ensure_list(expression_types): 1008 parser = self.EXPRESSION_PARSERS.get(expression_type) 1009 if not parser: 1010 raise TypeError(f"No parser registered for {expression_type}") 1011 1012 try: 1013 return self._parse(parser, raw_tokens, sql) 1014 except ParseError as e: 1015 e.errors[0]["into_expression"] = expression_type 1016 errors.append(e) 1017 1018 raise ParseError( 1019 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1020 errors=merge_errors(errors), 1021 ) from errors[-1] 1022 1023 def _parse( 1024 self, 1025 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1026 raw_tokens: t.List[Token], 1027 sql: t.Optional[str] = None, 1028 ) -> t.List[t.Optional[exp.Expression]]: 1029 self.reset() 1030 self.sql = sql or "" 1031 1032 total = len(raw_tokens) 1033 chunks: t.List[t.List[Token]] = [[]] 1034 1035 for i, token in enumerate(raw_tokens): 1036 if token.token_type == TokenType.SEMICOLON: 1037 if i < total - 1: 1038 chunks.append([]) 1039 else: 1040 chunks[-1].append(token) 1041 1042 expressions = [] 1043 1044 for tokens in chunks: 1045 self._index = -1 1046 self._tokens = tokens 1047 self._advance() 1048 1049 expressions.append(parse_method(self)) 1050 1051 if self._index < len(self._tokens): 1052 self.raise_error("Invalid expression / Unexpected token") 1053 1054 self.check_errors() 1055 1056 return expressions 1057 1058 def check_errors(self) -> None: 1059 """Logs or raises any found errors, depending on the chosen error level setting.""" 1060 if self.error_level == ErrorLevel.WARN: 1061 for error in self.errors: 1062 logger.error(str(error)) 1063 elif self.error_level == ErrorLevel.RAISE and self.errors: 1064 raise ParseError( 1065 concat_messages(self.errors, self.max_errors), 1066 errors=merge_errors(self.errors), 1067 ) 1068 1069 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1070 """ 1071 Appends an error in the list of recorded errors or raises it, depending on the chosen 1072 error level setting. 1073 """ 1074 token = token or self._curr or self._prev or Token.string("") 1075 start = token.start 1076 end = token.end + 1 1077 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1078 highlight = self.sql[start:end] 1079 end_context = self.sql[end : end + self.error_message_context] 1080 1081 error = ParseError.new( 1082 f"{message}. Line {token.line}, Col: {token.col}.\n" 1083 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1084 description=message, 1085 line=token.line, 1086 col=token.col, 1087 start_context=start_context, 1088 highlight=highlight, 1089 end_context=end_context, 1090 ) 1091 1092 if self.error_level == ErrorLevel.IMMEDIATE: 1093 raise error 1094 1095 self.errors.append(error) 1096 1097 def expression( 1098 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1099 ) -> E: 1100 """ 1101 Creates a new, validated Expression. 1102 1103 Args: 1104 exp_class: The expression class to instantiate. 1105 comments: An optional list of comments to attach to the expression. 1106 kwargs: The arguments to set for the expression along with their respective values. 1107 1108 Returns: 1109 The target expression. 1110 """ 1111 instance = exp_class(**kwargs) 1112 instance.add_comments(comments) if comments else self._add_comments(instance) 1113 return self.validate_expression(instance) 1114 1115 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1116 if expression and self._prev_comments: 1117 expression.add_comments(self._prev_comments) 1118 self._prev_comments = None 1119 1120 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1121 """ 1122 Validates an Expression, making sure that all its mandatory arguments are set. 1123 1124 Args: 1125 expression: The expression to validate. 1126 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1127 1128 Returns: 1129 The validated expression. 1130 """ 1131 if self.error_level != ErrorLevel.IGNORE: 1132 for error_message in expression.error_messages(args): 1133 self.raise_error(error_message) 1134 1135 return expression 1136 1137 def _find_sql(self, start: Token, end: Token) -> str: 1138 return self.sql[start.start : end.end + 1] 1139 1140 def _advance(self, times: int = 1) -> None: 1141 self._index += times 1142 self._curr = seq_get(self._tokens, self._index) 1143 self._next = seq_get(self._tokens, self._index + 1) 1144 1145 if self._index > 0: 1146 self._prev = self._tokens[self._index - 1] 1147 self._prev_comments = self._prev.comments 1148 else: 1149 self._prev = None 1150 self._prev_comments = None 1151 1152 def _retreat(self, index: int) -> None: 1153 if index != self._index: 1154 self._advance(index - self._index) 1155 1156 def _parse_command(self) -> exp.Command: 1157 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1158 1159 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1160 start = self._prev 1161 exists = self._parse_exists() if allow_exists else None 1162 1163 self._match(TokenType.ON) 1164 1165 kind = self._match_set(self.CREATABLES) and self._prev 1166 if not kind: 1167 return self._parse_as_command(start) 1168 1169 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1170 this = self._parse_user_defined_function(kind=kind.token_type) 1171 elif kind.token_type == TokenType.TABLE: 1172 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1173 elif kind.token_type == TokenType.COLUMN: 1174 this = self._parse_column() 1175 else: 1176 this = self._parse_id_var() 1177 1178 self._match(TokenType.IS) 1179 1180 return self.expression( 1181 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1182 ) 1183 1184 def _parse_to_table( 1185 self, 1186 ) -> exp.ToTableProperty: 1187 table = self._parse_table_parts(schema=True) 1188 return self.expression(exp.ToTableProperty, this=table) 1189 1190 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1191 def _parse_ttl(self) -> exp.Expression: 1192 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1193 this = self._parse_bitwise() 1194 1195 if self._match_text_seq("DELETE"): 1196 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1197 if self._match_text_seq("RECOMPRESS"): 1198 return self.expression( 1199 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1200 ) 1201 if self._match_text_seq("TO", "DISK"): 1202 return self.expression( 1203 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1204 ) 1205 if self._match_text_seq("TO", "VOLUME"): 1206 return self.expression( 1207 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1208 ) 1209 1210 return this 1211 1212 expressions = self._parse_csv(_parse_ttl_action) 1213 where = self._parse_where() 1214 group = self._parse_group() 1215 1216 aggregates = None 1217 if group and self._match(TokenType.SET): 1218 aggregates = self._parse_csv(self._parse_set_item) 1219 1220 return self.expression( 1221 exp.MergeTreeTTL, 1222 expressions=expressions, 1223 where=where, 1224 group=group, 1225 aggregates=aggregates, 1226 ) 1227 1228 def _parse_statement(self) -> t.Optional[exp.Expression]: 1229 if self._curr is None: 1230 return None 1231 1232 if self._match_set(self.STATEMENT_PARSERS): 1233 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1234 1235 if self._match_set(Tokenizer.COMMANDS): 1236 return self._parse_command() 1237 1238 expression = self._parse_expression() 1239 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1240 return self._parse_query_modifiers(expression) 1241 1242 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1243 start = self._prev 1244 temporary = self._match(TokenType.TEMPORARY) 1245 materialized = self._match_text_seq("MATERIALIZED") 1246 1247 kind = self._match_set(self.CREATABLES) and self._prev.text 1248 if not kind: 1249 return self._parse_as_command(start) 1250 1251 return self.expression( 1252 exp.Drop, 1253 comments=start.comments, 1254 exists=exists or self._parse_exists(), 1255 this=self._parse_table(schema=True), 1256 kind=kind, 1257 temporary=temporary, 1258 materialized=materialized, 1259 cascade=self._match_text_seq("CASCADE"), 1260 constraints=self._match_text_seq("CONSTRAINTS"), 1261 purge=self._match_text_seq("PURGE"), 1262 ) 1263 1264 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1265 return ( 1266 self._match_text_seq("IF") 1267 and (not not_ or self._match(TokenType.NOT)) 1268 and self._match(TokenType.EXISTS) 1269 ) 1270 1271 def _parse_create(self) -> exp.Create | exp.Command: 1272 # Note: this can't be None because we've matched a statement parser 1273 start = self._prev 1274 comments = self._prev_comments 1275 1276 replace = start.text.upper() == "REPLACE" or self._match_pair( 1277 TokenType.OR, TokenType.REPLACE 1278 ) 1279 unique = self._match(TokenType.UNIQUE) 1280 1281 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1282 self._advance() 1283 1284 properties = None 1285 create_token = self._match_set(self.CREATABLES) and self._prev 1286 1287 if not create_token: 1288 # exp.Properties.Location.POST_CREATE 1289 properties = self._parse_properties() 1290 create_token = self._match_set(self.CREATABLES) and self._prev 1291 1292 if not properties or not create_token: 1293 return self._parse_as_command(start) 1294 1295 exists = self._parse_exists(not_=True) 1296 this = None 1297 expression: t.Optional[exp.Expression] = None 1298 indexes = None 1299 no_schema_binding = None 1300 begin = None 1301 end = None 1302 clone = None 1303 1304 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1305 nonlocal properties 1306 if properties and temp_props: 1307 properties.expressions.extend(temp_props.expressions) 1308 elif temp_props: 1309 properties = temp_props 1310 1311 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1312 this = self._parse_user_defined_function(kind=create_token.token_type) 1313 1314 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1315 extend_props(self._parse_properties()) 1316 1317 self._match(TokenType.ALIAS) 1318 1319 if self._match(TokenType.COMMAND): 1320 expression = self._parse_as_command(self._prev) 1321 else: 1322 begin = self._match(TokenType.BEGIN) 1323 return_ = self._match_text_seq("RETURN") 1324 1325 if self._match(TokenType.STRING, advance=False): 1326 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1327 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1328 expression = self._parse_string() 1329 extend_props(self._parse_properties()) 1330 else: 1331 expression = self._parse_statement() 1332 1333 end = self._match_text_seq("END") 1334 1335 if return_: 1336 expression = self.expression(exp.Return, this=expression) 1337 elif create_token.token_type == TokenType.INDEX: 1338 this = self._parse_index(index=self._parse_id_var()) 1339 elif create_token.token_type in self.DB_CREATABLES: 1340 table_parts = self._parse_table_parts(schema=True) 1341 1342 # exp.Properties.Location.POST_NAME 1343 self._match(TokenType.COMMA) 1344 extend_props(self._parse_properties(before=True)) 1345 1346 this = self._parse_schema(this=table_parts) 1347 1348 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1349 extend_props(self._parse_properties()) 1350 1351 self._match(TokenType.ALIAS) 1352 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1353 # exp.Properties.Location.POST_ALIAS 1354 extend_props(self._parse_properties()) 1355 1356 expression = self._parse_ddl_select() 1357 1358 if create_token.token_type == TokenType.TABLE: 1359 # exp.Properties.Location.POST_EXPRESSION 1360 extend_props(self._parse_properties()) 1361 1362 indexes = [] 1363 while True: 1364 index = self._parse_index() 1365 1366 # exp.Properties.Location.POST_INDEX 1367 extend_props(self._parse_properties()) 1368 1369 if not index: 1370 break 1371 else: 1372 self._match(TokenType.COMMA) 1373 indexes.append(index) 1374 elif create_token.token_type == TokenType.VIEW: 1375 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1376 no_schema_binding = True 1377 1378 shallow = self._match_text_seq("SHALLOW") 1379 1380 if self._match_texts(self.CLONE_KEYWORDS): 1381 copy = self._prev.text.lower() == "copy" 1382 clone = self._parse_table(schema=True) 1383 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1384 clone_kind = ( 1385 self._match(TokenType.L_PAREN) 1386 and self._match_texts(self.CLONE_KINDS) 1387 and self._prev.text.upper() 1388 ) 1389 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1390 self._match(TokenType.R_PAREN) 1391 clone = self.expression( 1392 exp.Clone, 1393 this=clone, 1394 when=when, 1395 kind=clone_kind, 1396 shallow=shallow, 1397 expression=clone_expression, 1398 copy=copy, 1399 ) 1400 1401 return self.expression( 1402 exp.Create, 1403 comments=comments, 1404 this=this, 1405 kind=create_token.text, 1406 replace=replace, 1407 unique=unique, 1408 expression=expression, 1409 exists=exists, 1410 properties=properties, 1411 indexes=indexes, 1412 no_schema_binding=no_schema_binding, 1413 begin=begin, 1414 end=end, 1415 clone=clone, 1416 ) 1417 1418 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1419 # only used for teradata currently 1420 self._match(TokenType.COMMA) 1421 1422 kwargs = { 1423 "no": self._match_text_seq("NO"), 1424 "dual": self._match_text_seq("DUAL"), 1425 "before": self._match_text_seq("BEFORE"), 1426 "default": self._match_text_seq("DEFAULT"), 1427 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1428 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1429 "after": self._match_text_seq("AFTER"), 1430 "minimum": self._match_texts(("MIN", "MINIMUM")), 1431 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1432 } 1433 1434 if self._match_texts(self.PROPERTY_PARSERS): 1435 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1436 try: 1437 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1438 except TypeError: 1439 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1440 1441 return None 1442 1443 def _parse_property(self) -> t.Optional[exp.Expression]: 1444 if self._match_texts(self.PROPERTY_PARSERS): 1445 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1446 1447 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1448 return self._parse_character_set(default=True) 1449 1450 if self._match_text_seq("COMPOUND", "SORTKEY"): 1451 return self._parse_sortkey(compound=True) 1452 1453 if self._match_text_seq("SQL", "SECURITY"): 1454 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1455 1456 index = self._index 1457 key = self._parse_column() 1458 1459 if not self._match(TokenType.EQ): 1460 self._retreat(index) 1461 return None 1462 1463 return self.expression( 1464 exp.Property, 1465 this=key.to_dot() if isinstance(key, exp.Column) else key, 1466 value=self._parse_column() or self._parse_var(any_token=True), 1467 ) 1468 1469 def _parse_stored(self) -> exp.FileFormatProperty: 1470 self._match(TokenType.ALIAS) 1471 1472 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1473 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1474 1475 return self.expression( 1476 exp.FileFormatProperty, 1477 this=self.expression( 1478 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1479 ) 1480 if input_format or output_format 1481 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1482 ) 1483 1484 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1485 self._match(TokenType.EQ) 1486 self._match(TokenType.ALIAS) 1487 return self.expression(exp_class, this=self._parse_field()) 1488 1489 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1490 properties = [] 1491 while True: 1492 if before: 1493 prop = self._parse_property_before() 1494 else: 1495 prop = self._parse_property() 1496 1497 if not prop: 1498 break 1499 for p in ensure_list(prop): 1500 properties.append(p) 1501 1502 if properties: 1503 return self.expression(exp.Properties, expressions=properties) 1504 1505 return None 1506 1507 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1508 return self.expression( 1509 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1510 ) 1511 1512 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1513 if self._index >= 2: 1514 pre_volatile_token = self._tokens[self._index - 2] 1515 else: 1516 pre_volatile_token = None 1517 1518 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1519 return exp.VolatileProperty() 1520 1521 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1522 1523 def _parse_with_property( 1524 self, 1525 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1526 if self._match(TokenType.L_PAREN, advance=False): 1527 return self._parse_wrapped_csv(self._parse_property) 1528 1529 if self._match_text_seq("JOURNAL"): 1530 return self._parse_withjournaltable() 1531 1532 if self._match_text_seq("DATA"): 1533 return self._parse_withdata(no=False) 1534 elif self._match_text_seq("NO", "DATA"): 1535 return self._parse_withdata(no=True) 1536 1537 if not self._next: 1538 return None 1539 1540 return self._parse_withisolatedloading() 1541 1542 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1543 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1544 self._match(TokenType.EQ) 1545 1546 user = self._parse_id_var() 1547 self._match(TokenType.PARAMETER) 1548 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1549 1550 if not user or not host: 1551 return None 1552 1553 return exp.DefinerProperty(this=f"{user}@{host}") 1554 1555 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1556 self._match(TokenType.TABLE) 1557 self._match(TokenType.EQ) 1558 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1559 1560 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1561 return self.expression(exp.LogProperty, no=no) 1562 1563 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1564 return self.expression(exp.JournalProperty, **kwargs) 1565 1566 def _parse_checksum(self) -> exp.ChecksumProperty: 1567 self._match(TokenType.EQ) 1568 1569 on = None 1570 if self._match(TokenType.ON): 1571 on = True 1572 elif self._match_text_seq("OFF"): 1573 on = False 1574 1575 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1576 1577 def _parse_cluster(self) -> exp.Cluster: 1578 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1579 1580 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1581 self._match_text_seq("BY") 1582 1583 self._match_l_paren() 1584 expressions = self._parse_csv(self._parse_column) 1585 self._match_r_paren() 1586 1587 if self._match_text_seq("SORTED", "BY"): 1588 self._match_l_paren() 1589 sorted_by = self._parse_csv(self._parse_ordered) 1590 self._match_r_paren() 1591 else: 1592 sorted_by = None 1593 1594 self._match(TokenType.INTO) 1595 buckets = self._parse_number() 1596 self._match_text_seq("BUCKETS") 1597 1598 return self.expression( 1599 exp.ClusteredByProperty, 1600 expressions=expressions, 1601 sorted_by=sorted_by, 1602 buckets=buckets, 1603 ) 1604 1605 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1606 if not self._match_text_seq("GRANTS"): 1607 self._retreat(self._index - 1) 1608 return None 1609 1610 return self.expression(exp.CopyGrantsProperty) 1611 1612 def _parse_freespace(self) -> exp.FreespaceProperty: 1613 self._match(TokenType.EQ) 1614 return self.expression( 1615 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1616 ) 1617 1618 def _parse_mergeblockratio( 1619 self, no: bool = False, default: bool = False 1620 ) -> exp.MergeBlockRatioProperty: 1621 if self._match(TokenType.EQ): 1622 return self.expression( 1623 exp.MergeBlockRatioProperty, 1624 this=self._parse_number(), 1625 percent=self._match(TokenType.PERCENT), 1626 ) 1627 1628 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1629 1630 def _parse_datablocksize( 1631 self, 1632 default: t.Optional[bool] = None, 1633 minimum: t.Optional[bool] = None, 1634 maximum: t.Optional[bool] = None, 1635 ) -> exp.DataBlocksizeProperty: 1636 self._match(TokenType.EQ) 1637 size = self._parse_number() 1638 1639 units = None 1640 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1641 units = self._prev.text 1642 1643 return self.expression( 1644 exp.DataBlocksizeProperty, 1645 size=size, 1646 units=units, 1647 default=default, 1648 minimum=minimum, 1649 maximum=maximum, 1650 ) 1651 1652 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1653 self._match(TokenType.EQ) 1654 always = self._match_text_seq("ALWAYS") 1655 manual = self._match_text_seq("MANUAL") 1656 never = self._match_text_seq("NEVER") 1657 default = self._match_text_seq("DEFAULT") 1658 1659 autotemp = None 1660 if self._match_text_seq("AUTOTEMP"): 1661 autotemp = self._parse_schema() 1662 1663 return self.expression( 1664 exp.BlockCompressionProperty, 1665 always=always, 1666 manual=manual, 1667 never=never, 1668 default=default, 1669 autotemp=autotemp, 1670 ) 1671 1672 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1673 no = self._match_text_seq("NO") 1674 concurrent = self._match_text_seq("CONCURRENT") 1675 self._match_text_seq("ISOLATED", "LOADING") 1676 for_all = self._match_text_seq("FOR", "ALL") 1677 for_insert = self._match_text_seq("FOR", "INSERT") 1678 for_none = self._match_text_seq("FOR", "NONE") 1679 return self.expression( 1680 exp.IsolatedLoadingProperty, 1681 no=no, 1682 concurrent=concurrent, 1683 for_all=for_all, 1684 for_insert=for_insert, 1685 for_none=for_none, 1686 ) 1687 1688 def _parse_locking(self) -> exp.LockingProperty: 1689 if self._match(TokenType.TABLE): 1690 kind = "TABLE" 1691 elif self._match(TokenType.VIEW): 1692 kind = "VIEW" 1693 elif self._match(TokenType.ROW): 1694 kind = "ROW" 1695 elif self._match_text_seq("DATABASE"): 1696 kind = "DATABASE" 1697 else: 1698 kind = None 1699 1700 if kind in ("DATABASE", "TABLE", "VIEW"): 1701 this = self._parse_table_parts() 1702 else: 1703 this = None 1704 1705 if self._match(TokenType.FOR): 1706 for_or_in = "FOR" 1707 elif self._match(TokenType.IN): 1708 for_or_in = "IN" 1709 else: 1710 for_or_in = None 1711 1712 if self._match_text_seq("ACCESS"): 1713 lock_type = "ACCESS" 1714 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1715 lock_type = "EXCLUSIVE" 1716 elif self._match_text_seq("SHARE"): 1717 lock_type = "SHARE" 1718 elif self._match_text_seq("READ"): 1719 lock_type = "READ" 1720 elif self._match_text_seq("WRITE"): 1721 lock_type = "WRITE" 1722 elif self._match_text_seq("CHECKSUM"): 1723 lock_type = "CHECKSUM" 1724 else: 1725 lock_type = None 1726 1727 override = self._match_text_seq("OVERRIDE") 1728 1729 return self.expression( 1730 exp.LockingProperty, 1731 this=this, 1732 kind=kind, 1733 for_or_in=for_or_in, 1734 lock_type=lock_type, 1735 override=override, 1736 ) 1737 1738 def _parse_partition_by(self) -> t.List[exp.Expression]: 1739 if self._match(TokenType.PARTITION_BY): 1740 return self._parse_csv(self._parse_conjunction) 1741 return [] 1742 1743 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1744 self._match(TokenType.EQ) 1745 return self.expression( 1746 exp.PartitionedByProperty, 1747 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1748 ) 1749 1750 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1751 if self._match_text_seq("AND", "STATISTICS"): 1752 statistics = True 1753 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1754 statistics = False 1755 else: 1756 statistics = None 1757 1758 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1759 1760 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1761 if self._match_text_seq("PRIMARY", "INDEX"): 1762 return exp.NoPrimaryIndexProperty() 1763 return None 1764 1765 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1766 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1767 return exp.OnCommitProperty() 1768 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1769 return exp.OnCommitProperty(delete=True) 1770 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1771 1772 def _parse_distkey(self) -> exp.DistKeyProperty: 1773 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1774 1775 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1776 table = self._parse_table(schema=True) 1777 1778 options = [] 1779 while self._match_texts(("INCLUDING", "EXCLUDING")): 1780 this = self._prev.text.upper() 1781 1782 id_var = self._parse_id_var() 1783 if not id_var: 1784 return None 1785 1786 options.append( 1787 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1788 ) 1789 1790 return self.expression(exp.LikeProperty, this=table, expressions=options) 1791 1792 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1793 return self.expression( 1794 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1795 ) 1796 1797 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1798 self._match(TokenType.EQ) 1799 return self.expression( 1800 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1801 ) 1802 1803 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1804 self._match_text_seq("WITH", "CONNECTION") 1805 return self.expression( 1806 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1807 ) 1808 1809 def _parse_returns(self) -> exp.ReturnsProperty: 1810 value: t.Optional[exp.Expression] 1811 is_table = self._match(TokenType.TABLE) 1812 1813 if is_table: 1814 if self._match(TokenType.LT): 1815 value = self.expression( 1816 exp.Schema, 1817 this="TABLE", 1818 expressions=self._parse_csv(self._parse_struct_types), 1819 ) 1820 if not self._match(TokenType.GT): 1821 self.raise_error("Expecting >") 1822 else: 1823 value = self._parse_schema(exp.var("TABLE")) 1824 else: 1825 value = self._parse_types() 1826 1827 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1828 1829 def _parse_describe(self) -> exp.Describe: 1830 kind = self._match_set(self.CREATABLES) and self._prev.text 1831 this = self._parse_table(schema=True) 1832 properties = self._parse_properties() 1833 expressions = properties.expressions if properties else None 1834 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1835 1836 def _parse_insert(self) -> exp.Insert: 1837 comments = ensure_list(self._prev_comments) 1838 overwrite = self._match(TokenType.OVERWRITE) 1839 ignore = self._match(TokenType.IGNORE) 1840 local = self._match_text_seq("LOCAL") 1841 alternative = None 1842 1843 if self._match_text_seq("DIRECTORY"): 1844 this: t.Optional[exp.Expression] = self.expression( 1845 exp.Directory, 1846 this=self._parse_var_or_string(), 1847 local=local, 1848 row_format=self._parse_row_format(match_row=True), 1849 ) 1850 else: 1851 if self._match(TokenType.OR): 1852 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1853 1854 self._match(TokenType.INTO) 1855 comments += ensure_list(self._prev_comments) 1856 self._match(TokenType.TABLE) 1857 this = self._parse_table(schema=True) 1858 1859 returning = self._parse_returning() 1860 1861 return self.expression( 1862 exp.Insert, 1863 comments=comments, 1864 this=this, 1865 by_name=self._match_text_seq("BY", "NAME"), 1866 exists=self._parse_exists(), 1867 partition=self._parse_partition(), 1868 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1869 and self._parse_conjunction(), 1870 expression=self._parse_ddl_select(), 1871 conflict=self._parse_on_conflict(), 1872 returning=returning or self._parse_returning(), 1873 overwrite=overwrite, 1874 alternative=alternative, 1875 ignore=ignore, 1876 ) 1877 1878 def _parse_kill(self) -> exp.Kill: 1879 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1880 1881 return self.expression( 1882 exp.Kill, 1883 this=self._parse_primary(), 1884 kind=kind, 1885 ) 1886 1887 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1888 conflict = self._match_text_seq("ON", "CONFLICT") 1889 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1890 1891 if not conflict and not duplicate: 1892 return None 1893 1894 nothing = None 1895 expressions = None 1896 key = None 1897 constraint = None 1898 1899 if conflict: 1900 if self._match_text_seq("ON", "CONSTRAINT"): 1901 constraint = self._parse_id_var() 1902 else: 1903 key = self._parse_csv(self._parse_value) 1904 1905 self._match_text_seq("DO") 1906 if self._match_text_seq("NOTHING"): 1907 nothing = True 1908 else: 1909 self._match(TokenType.UPDATE) 1910 self._match(TokenType.SET) 1911 expressions = self._parse_csv(self._parse_equality) 1912 1913 return self.expression( 1914 exp.OnConflict, 1915 duplicate=duplicate, 1916 expressions=expressions, 1917 nothing=nothing, 1918 key=key, 1919 constraint=constraint, 1920 ) 1921 1922 def _parse_returning(self) -> t.Optional[exp.Returning]: 1923 if not self._match(TokenType.RETURNING): 1924 return None 1925 return self.expression( 1926 exp.Returning, 1927 expressions=self._parse_csv(self._parse_expression), 1928 into=self._match(TokenType.INTO) and self._parse_table_part(), 1929 ) 1930 1931 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1932 if not self._match(TokenType.FORMAT): 1933 return None 1934 return self._parse_row_format() 1935 1936 def _parse_row_format( 1937 self, match_row: bool = False 1938 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1939 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1940 return None 1941 1942 if self._match_text_seq("SERDE"): 1943 this = self._parse_string() 1944 1945 serde_properties = None 1946 if self._match(TokenType.SERDE_PROPERTIES): 1947 serde_properties = self.expression( 1948 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1949 ) 1950 1951 return self.expression( 1952 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1953 ) 1954 1955 self._match_text_seq("DELIMITED") 1956 1957 kwargs = {} 1958 1959 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1960 kwargs["fields"] = self._parse_string() 1961 if self._match_text_seq("ESCAPED", "BY"): 1962 kwargs["escaped"] = self._parse_string() 1963 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1964 kwargs["collection_items"] = self._parse_string() 1965 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1966 kwargs["map_keys"] = self._parse_string() 1967 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1968 kwargs["lines"] = self._parse_string() 1969 if self._match_text_seq("NULL", "DEFINED", "AS"): 1970 kwargs["null"] = self._parse_string() 1971 1972 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1973 1974 def _parse_load(self) -> exp.LoadData | exp.Command: 1975 if self._match_text_seq("DATA"): 1976 local = self._match_text_seq("LOCAL") 1977 self._match_text_seq("INPATH") 1978 inpath = self._parse_string() 1979 overwrite = self._match(TokenType.OVERWRITE) 1980 self._match_pair(TokenType.INTO, TokenType.TABLE) 1981 1982 return self.expression( 1983 exp.LoadData, 1984 this=self._parse_table(schema=True), 1985 local=local, 1986 overwrite=overwrite, 1987 inpath=inpath, 1988 partition=self._parse_partition(), 1989 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1990 serde=self._match_text_seq("SERDE") and self._parse_string(), 1991 ) 1992 return self._parse_as_command(self._prev) 1993 1994 def _parse_delete(self) -> exp.Delete: 1995 # This handles MySQL's "Multiple-Table Syntax" 1996 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1997 tables = None 1998 comments = self._prev_comments 1999 if not self._match(TokenType.FROM, advance=False): 2000 tables = self._parse_csv(self._parse_table) or None 2001 2002 returning = self._parse_returning() 2003 2004 return self.expression( 2005 exp.Delete, 2006 comments=comments, 2007 tables=tables, 2008 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2009 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2010 where=self._parse_where(), 2011 returning=returning or self._parse_returning(), 2012 limit=self._parse_limit(), 2013 ) 2014 2015 def _parse_update(self) -> exp.Update: 2016 comments = self._prev_comments 2017 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2018 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2019 returning = self._parse_returning() 2020 return self.expression( 2021 exp.Update, 2022 comments=comments, 2023 **{ # type: ignore 2024 "this": this, 2025 "expressions": expressions, 2026 "from": self._parse_from(joins=True), 2027 "where": self._parse_where(), 2028 "returning": returning or self._parse_returning(), 2029 "order": self._parse_order(), 2030 "limit": self._parse_limit(), 2031 }, 2032 ) 2033 2034 def _parse_uncache(self) -> exp.Uncache: 2035 if not self._match(TokenType.TABLE): 2036 self.raise_error("Expecting TABLE after UNCACHE") 2037 2038 return self.expression( 2039 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2040 ) 2041 2042 def _parse_cache(self) -> exp.Cache: 2043 lazy = self._match_text_seq("LAZY") 2044 self._match(TokenType.TABLE) 2045 table = self._parse_table(schema=True) 2046 2047 options = [] 2048 if self._match_text_seq("OPTIONS"): 2049 self._match_l_paren() 2050 k = self._parse_string() 2051 self._match(TokenType.EQ) 2052 v = self._parse_string() 2053 options = [k, v] 2054 self._match_r_paren() 2055 2056 self._match(TokenType.ALIAS) 2057 return self.expression( 2058 exp.Cache, 2059 this=table, 2060 lazy=lazy, 2061 options=options, 2062 expression=self._parse_select(nested=True), 2063 ) 2064 2065 def _parse_partition(self) -> t.Optional[exp.Partition]: 2066 if not self._match(TokenType.PARTITION): 2067 return None 2068 2069 return self.expression( 2070 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2071 ) 2072 2073 def _parse_value(self) -> exp.Tuple: 2074 if self._match(TokenType.L_PAREN): 2075 expressions = self._parse_csv(self._parse_conjunction) 2076 self._match_r_paren() 2077 return self.expression(exp.Tuple, expressions=expressions) 2078 2079 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2080 # https://prestodb.io/docs/current/sql/values.html 2081 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2082 2083 def _parse_projections(self) -> t.List[exp.Expression]: 2084 return self._parse_expressions() 2085 2086 def _parse_select( 2087 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2088 ) -> t.Optional[exp.Expression]: 2089 cte = self._parse_with() 2090 2091 if cte: 2092 this = self._parse_statement() 2093 2094 if not this: 2095 self.raise_error("Failed to parse any statement following CTE") 2096 return cte 2097 2098 if "with" in this.arg_types: 2099 this.set("with", cte) 2100 else: 2101 self.raise_error(f"{this.key} does not support CTE") 2102 this = cte 2103 2104 return this 2105 2106 # duckdb supports leading with FROM x 2107 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2108 2109 if self._match(TokenType.SELECT): 2110 comments = self._prev_comments 2111 2112 hint = self._parse_hint() 2113 all_ = self._match(TokenType.ALL) 2114 distinct = self._match_set(self.DISTINCT_TOKENS) 2115 2116 kind = ( 2117 self._match(TokenType.ALIAS) 2118 and self._match_texts(("STRUCT", "VALUE")) 2119 and self._prev.text 2120 ) 2121 2122 if distinct: 2123 distinct = self.expression( 2124 exp.Distinct, 2125 on=self._parse_value() if self._match(TokenType.ON) else None, 2126 ) 2127 2128 if all_ and distinct: 2129 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2130 2131 limit = self._parse_limit(top=True) 2132 projections = self._parse_projections() 2133 2134 this = self.expression( 2135 exp.Select, 2136 kind=kind, 2137 hint=hint, 2138 distinct=distinct, 2139 expressions=projections, 2140 limit=limit, 2141 ) 2142 this.comments = comments 2143 2144 into = self._parse_into() 2145 if into: 2146 this.set("into", into) 2147 2148 if not from_: 2149 from_ = self._parse_from() 2150 2151 if from_: 2152 this.set("from", from_) 2153 2154 this = self._parse_query_modifiers(this) 2155 elif (table or nested) and self._match(TokenType.L_PAREN): 2156 if self._match(TokenType.PIVOT): 2157 this = self._parse_simplified_pivot() 2158 elif self._match(TokenType.FROM): 2159 this = exp.select("*").from_( 2160 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2161 ) 2162 else: 2163 this = self._parse_table() if table else self._parse_select(nested=True) 2164 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2165 2166 self._match_r_paren() 2167 2168 # We return early here so that the UNION isn't attached to the subquery by the 2169 # following call to _parse_set_operations, but instead becomes the parent node 2170 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2171 elif self._match(TokenType.VALUES): 2172 this = self.expression( 2173 exp.Values, 2174 expressions=self._parse_csv(self._parse_value), 2175 alias=self._parse_table_alias(), 2176 ) 2177 elif from_: 2178 this = exp.select("*").from_(from_.this, copy=False) 2179 else: 2180 this = None 2181 2182 return self._parse_set_operations(this) 2183 2184 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2185 if not skip_with_token and not self._match(TokenType.WITH): 2186 return None 2187 2188 comments = self._prev_comments 2189 recursive = self._match(TokenType.RECURSIVE) 2190 2191 expressions = [] 2192 while True: 2193 expressions.append(self._parse_cte()) 2194 2195 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2196 break 2197 else: 2198 self._match(TokenType.WITH) 2199 2200 return self.expression( 2201 exp.With, comments=comments, expressions=expressions, recursive=recursive 2202 ) 2203 2204 def _parse_cte(self) -> exp.CTE: 2205 alias = self._parse_table_alias() 2206 if not alias or not alias.this: 2207 self.raise_error("Expected CTE to have alias") 2208 2209 self._match(TokenType.ALIAS) 2210 return self.expression( 2211 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2212 ) 2213 2214 def _parse_table_alias( 2215 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2216 ) -> t.Optional[exp.TableAlias]: 2217 any_token = self._match(TokenType.ALIAS) 2218 alias = ( 2219 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2220 or self._parse_string_as_identifier() 2221 ) 2222 2223 index = self._index 2224 if self._match(TokenType.L_PAREN): 2225 columns = self._parse_csv(self._parse_function_parameter) 2226 self._match_r_paren() if columns else self._retreat(index) 2227 else: 2228 columns = None 2229 2230 if not alias and not columns: 2231 return None 2232 2233 return self.expression(exp.TableAlias, this=alias, columns=columns) 2234 2235 def _parse_subquery( 2236 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2237 ) -> t.Optional[exp.Subquery]: 2238 if not this: 2239 return None 2240 2241 return self.expression( 2242 exp.Subquery, 2243 this=this, 2244 pivots=self._parse_pivots(), 2245 alias=self._parse_table_alias() if parse_alias else None, 2246 ) 2247 2248 def _parse_query_modifiers( 2249 self, this: t.Optional[exp.Expression] 2250 ) -> t.Optional[exp.Expression]: 2251 if isinstance(this, self.MODIFIABLES): 2252 for join in iter(self._parse_join, None): 2253 this.append("joins", join) 2254 for lateral in iter(self._parse_lateral, None): 2255 this.append("laterals", lateral) 2256 2257 while True: 2258 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2259 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2260 key, expression = parser(self) 2261 2262 if expression: 2263 this.set(key, expression) 2264 if key == "limit": 2265 offset = expression.args.pop("offset", None) 2266 if offset: 2267 this.set("offset", exp.Offset(expression=offset)) 2268 continue 2269 break 2270 return this 2271 2272 def _parse_hint(self) -> t.Optional[exp.Hint]: 2273 if self._match(TokenType.HINT): 2274 hints = [] 2275 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2276 hints.extend(hint) 2277 2278 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2279 self.raise_error("Expected */ after HINT") 2280 2281 return self.expression(exp.Hint, expressions=hints) 2282 2283 return None 2284 2285 def _parse_into(self) -> t.Optional[exp.Into]: 2286 if not self._match(TokenType.INTO): 2287 return None 2288 2289 temp = self._match(TokenType.TEMPORARY) 2290 unlogged = self._match_text_seq("UNLOGGED") 2291 self._match(TokenType.TABLE) 2292 2293 return self.expression( 2294 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2295 ) 2296 2297 def _parse_from( 2298 self, joins: bool = False, skip_from_token: bool = False 2299 ) -> t.Optional[exp.From]: 2300 if not skip_from_token and not self._match(TokenType.FROM): 2301 return None 2302 2303 return self.expression( 2304 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2305 ) 2306 2307 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2308 if not self._match(TokenType.MATCH_RECOGNIZE): 2309 return None 2310 2311 self._match_l_paren() 2312 2313 partition = self._parse_partition_by() 2314 order = self._parse_order() 2315 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2316 2317 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2318 rows = exp.var("ONE ROW PER MATCH") 2319 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2320 text = "ALL ROWS PER MATCH" 2321 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2322 text += f" SHOW EMPTY MATCHES" 2323 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2324 text += f" OMIT EMPTY MATCHES" 2325 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2326 text += f" WITH UNMATCHED ROWS" 2327 rows = exp.var(text) 2328 else: 2329 rows = None 2330 2331 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2332 text = "AFTER MATCH SKIP" 2333 if self._match_text_seq("PAST", "LAST", "ROW"): 2334 text += f" PAST LAST ROW" 2335 elif self._match_text_seq("TO", "NEXT", "ROW"): 2336 text += f" TO NEXT ROW" 2337 elif self._match_text_seq("TO", "FIRST"): 2338 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2339 elif self._match_text_seq("TO", "LAST"): 2340 text += f" TO LAST {self._advance_any().text}" # type: ignore 2341 after = exp.var(text) 2342 else: 2343 after = None 2344 2345 if self._match_text_seq("PATTERN"): 2346 self._match_l_paren() 2347 2348 if not self._curr: 2349 self.raise_error("Expecting )", self._curr) 2350 2351 paren = 1 2352 start = self._curr 2353 2354 while self._curr and paren > 0: 2355 if self._curr.token_type == TokenType.L_PAREN: 2356 paren += 1 2357 if self._curr.token_type == TokenType.R_PAREN: 2358 paren -= 1 2359 2360 end = self._prev 2361 self._advance() 2362 2363 if paren > 0: 2364 self.raise_error("Expecting )", self._curr) 2365 2366 pattern = exp.var(self._find_sql(start, end)) 2367 else: 2368 pattern = None 2369 2370 define = ( 2371 self._parse_csv( 2372 lambda: self.expression( 2373 exp.Alias, 2374 alias=self._parse_id_var(any_token=True), 2375 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2376 ) 2377 ) 2378 if self._match_text_seq("DEFINE") 2379 else None 2380 ) 2381 2382 self._match_r_paren() 2383 2384 return self.expression( 2385 exp.MatchRecognize, 2386 partition_by=partition, 2387 order=order, 2388 measures=measures, 2389 rows=rows, 2390 after=after, 2391 pattern=pattern, 2392 define=define, 2393 alias=self._parse_table_alias(), 2394 ) 2395 2396 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2397 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2398 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2399 2400 if outer_apply or cross_apply: 2401 this = self._parse_select(table=True) 2402 view = None 2403 outer = not cross_apply 2404 elif self._match(TokenType.LATERAL): 2405 this = self._parse_select(table=True) 2406 view = self._match(TokenType.VIEW) 2407 outer = self._match(TokenType.OUTER) 2408 else: 2409 return None 2410 2411 if not this: 2412 this = ( 2413 self._parse_unnest() 2414 or self._parse_function() 2415 or self._parse_id_var(any_token=False) 2416 ) 2417 2418 while self._match(TokenType.DOT): 2419 this = exp.Dot( 2420 this=this, 2421 expression=self._parse_function() or self._parse_id_var(any_token=False), 2422 ) 2423 2424 if view: 2425 table = self._parse_id_var(any_token=False) 2426 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2427 table_alias: t.Optional[exp.TableAlias] = self.expression( 2428 exp.TableAlias, this=table, columns=columns 2429 ) 2430 elif isinstance(this, exp.Subquery) and this.alias: 2431 # Ensures parity between the Subquery's and the Lateral's "alias" args 2432 table_alias = this.args["alias"].copy() 2433 else: 2434 table_alias = self._parse_table_alias() 2435 2436 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2437 2438 def _parse_join_parts( 2439 self, 2440 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2441 return ( 2442 self._match_set(self.JOIN_METHODS) and self._prev, 2443 self._match_set(self.JOIN_SIDES) and self._prev, 2444 self._match_set(self.JOIN_KINDS) and self._prev, 2445 ) 2446 2447 def _parse_join( 2448 self, skip_join_token: bool = False, parse_bracket: bool = False 2449 ) -> t.Optional[exp.Join]: 2450 if self._match(TokenType.COMMA): 2451 return self.expression(exp.Join, this=self._parse_table()) 2452 2453 index = self._index 2454 method, side, kind = self._parse_join_parts() 2455 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2456 join = self._match(TokenType.JOIN) 2457 2458 if not skip_join_token and not join: 2459 self._retreat(index) 2460 kind = None 2461 method = None 2462 side = None 2463 2464 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2465 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2466 2467 if not skip_join_token and not join and not outer_apply and not cross_apply: 2468 return None 2469 2470 if outer_apply: 2471 side = Token(TokenType.LEFT, "LEFT") 2472 2473 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2474 2475 if method: 2476 kwargs["method"] = method.text 2477 if side: 2478 kwargs["side"] = side.text 2479 if kind: 2480 kwargs["kind"] = kind.text 2481 if hint: 2482 kwargs["hint"] = hint 2483 2484 if self._match(TokenType.ON): 2485 kwargs["on"] = self._parse_conjunction() 2486 elif self._match(TokenType.USING): 2487 kwargs["using"] = self._parse_wrapped_id_vars() 2488 elif not (kind and kind.token_type == TokenType.CROSS): 2489 index = self._index 2490 join = self._parse_join() 2491 2492 if join and self._match(TokenType.ON): 2493 kwargs["on"] = self._parse_conjunction() 2494 elif join and self._match(TokenType.USING): 2495 kwargs["using"] = self._parse_wrapped_id_vars() 2496 else: 2497 join = None 2498 self._retreat(index) 2499 2500 kwargs["this"].set("joins", [join] if join else None) 2501 2502 comments = [c for token in (method, side, kind) if token for c in token.comments] 2503 return self.expression(exp.Join, comments=comments, **kwargs) 2504 2505 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2506 this = self._parse_conjunction() 2507 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2508 return this 2509 2510 opclass = self._parse_var(any_token=True) 2511 if opclass: 2512 return self.expression(exp.Opclass, this=this, expression=opclass) 2513 2514 return this 2515 2516 def _parse_index( 2517 self, 2518 index: t.Optional[exp.Expression] = None, 2519 ) -> t.Optional[exp.Index]: 2520 if index: 2521 unique = None 2522 primary = None 2523 amp = None 2524 2525 self._match(TokenType.ON) 2526 self._match(TokenType.TABLE) # hive 2527 table = self._parse_table_parts(schema=True) 2528 else: 2529 unique = self._match(TokenType.UNIQUE) 2530 primary = self._match_text_seq("PRIMARY") 2531 amp = self._match_text_seq("AMP") 2532 2533 if not self._match(TokenType.INDEX): 2534 return None 2535 2536 index = self._parse_id_var() 2537 table = None 2538 2539 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2540 2541 if self._match(TokenType.L_PAREN, advance=False): 2542 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2543 else: 2544 columns = None 2545 2546 return self.expression( 2547 exp.Index, 2548 this=index, 2549 table=table, 2550 using=using, 2551 columns=columns, 2552 unique=unique, 2553 primary=primary, 2554 amp=amp, 2555 partition_by=self._parse_partition_by(), 2556 where=self._parse_where(), 2557 ) 2558 2559 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2560 hints: t.List[exp.Expression] = [] 2561 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2562 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2563 hints.append( 2564 self.expression( 2565 exp.WithTableHint, 2566 expressions=self._parse_csv( 2567 lambda: self._parse_function() or self._parse_var(any_token=True) 2568 ), 2569 ) 2570 ) 2571 self._match_r_paren() 2572 else: 2573 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2574 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2575 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2576 2577 self._match_texts({"INDEX", "KEY"}) 2578 if self._match(TokenType.FOR): 2579 hint.set("target", self._advance_any() and self._prev.text.upper()) 2580 2581 hint.set("expressions", self._parse_wrapped_id_vars()) 2582 hints.append(hint) 2583 2584 return hints or None 2585 2586 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2587 return ( 2588 (not schema and self._parse_function(optional_parens=False)) 2589 or self._parse_id_var(any_token=False) 2590 or self._parse_string_as_identifier() 2591 or self._parse_placeholder() 2592 ) 2593 2594 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2595 catalog = None 2596 db = None 2597 table = self._parse_table_part(schema=schema) 2598 2599 while self._match(TokenType.DOT): 2600 if catalog: 2601 # This allows nesting the table in arbitrarily many dot expressions if needed 2602 table = self.expression( 2603 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2604 ) 2605 else: 2606 catalog = db 2607 db = table 2608 table = self._parse_table_part(schema=schema) 2609 2610 if not table: 2611 self.raise_error(f"Expected table name but got {self._curr}") 2612 2613 return self.expression( 2614 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2615 ) 2616 2617 def _parse_table( 2618 self, 2619 schema: bool = False, 2620 joins: bool = False, 2621 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2622 parse_bracket: bool = False, 2623 ) -> t.Optional[exp.Expression]: 2624 lateral = self._parse_lateral() 2625 if lateral: 2626 return lateral 2627 2628 unnest = self._parse_unnest() 2629 if unnest: 2630 return unnest 2631 2632 values = self._parse_derived_table_values() 2633 if values: 2634 return values 2635 2636 subquery = self._parse_select(table=True) 2637 if subquery: 2638 if not subquery.args.get("pivots"): 2639 subquery.set("pivots", self._parse_pivots()) 2640 return subquery 2641 2642 bracket = parse_bracket and self._parse_bracket(None) 2643 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2644 this = t.cast( 2645 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2646 ) 2647 2648 if schema: 2649 return self._parse_schema(this=this) 2650 2651 version = self._parse_version() 2652 2653 if version: 2654 this.set("version", version) 2655 2656 if self.ALIAS_POST_TABLESAMPLE: 2657 table_sample = self._parse_table_sample() 2658 2659 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2660 if alias: 2661 this.set("alias", alias) 2662 2663 if self._match_text_seq("AT"): 2664 this.set("index", self._parse_id_var()) 2665 2666 this.set("hints", self._parse_table_hints()) 2667 2668 if not this.args.get("pivots"): 2669 this.set("pivots", self._parse_pivots()) 2670 2671 if not self.ALIAS_POST_TABLESAMPLE: 2672 table_sample = self._parse_table_sample() 2673 2674 if table_sample: 2675 table_sample.set("this", this) 2676 this = table_sample 2677 2678 if joins: 2679 for join in iter(self._parse_join, None): 2680 this.append("joins", join) 2681 2682 return this 2683 2684 def _parse_version(self) -> t.Optional[exp.Version]: 2685 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2686 this = "TIMESTAMP" 2687 elif self._match(TokenType.VERSION_SNAPSHOT): 2688 this = "VERSION" 2689 else: 2690 return None 2691 2692 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2693 kind = self._prev.text.upper() 2694 start = self._parse_bitwise() 2695 self._match_texts(("TO", "AND")) 2696 end = self._parse_bitwise() 2697 expression: t.Optional[exp.Expression] = self.expression( 2698 exp.Tuple, expressions=[start, end] 2699 ) 2700 elif self._match_text_seq("CONTAINED", "IN"): 2701 kind = "CONTAINED IN" 2702 expression = self.expression( 2703 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2704 ) 2705 elif self._match(TokenType.ALL): 2706 kind = "ALL" 2707 expression = None 2708 else: 2709 self._match_text_seq("AS", "OF") 2710 kind = "AS OF" 2711 expression = self._parse_type() 2712 2713 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2714 2715 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2716 if not self._match(TokenType.UNNEST): 2717 return None 2718 2719 expressions = self._parse_wrapped_csv(self._parse_type) 2720 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2721 2722 alias = self._parse_table_alias() if with_alias else None 2723 2724 if alias: 2725 if self.UNNEST_COLUMN_ONLY: 2726 if alias.args.get("columns"): 2727 self.raise_error("Unexpected extra column alias in unnest.") 2728 2729 alias.set("columns", [alias.this]) 2730 alias.set("this", None) 2731 2732 columns = alias.args.get("columns") or [] 2733 if offset and len(expressions) < len(columns): 2734 offset = columns.pop() 2735 2736 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2737 self._match(TokenType.ALIAS) 2738 offset = self._parse_id_var( 2739 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2740 ) or exp.to_identifier("offset") 2741 2742 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2743 2744 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2745 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2746 if not is_derived and not self._match(TokenType.VALUES): 2747 return None 2748 2749 expressions = self._parse_csv(self._parse_value) 2750 alias = self._parse_table_alias() 2751 2752 if is_derived: 2753 self._match_r_paren() 2754 2755 return self.expression( 2756 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2757 ) 2758 2759 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2760 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2761 as_modifier and self._match_text_seq("USING", "SAMPLE") 2762 ): 2763 return None 2764 2765 bucket_numerator = None 2766 bucket_denominator = None 2767 bucket_field = None 2768 percent = None 2769 rows = None 2770 size = None 2771 seed = None 2772 2773 kind = ( 2774 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2775 ) 2776 method = self._parse_var(tokens=(TokenType.ROW,)) 2777 2778 matched_l_paren = self._match(TokenType.L_PAREN) 2779 2780 if self.TABLESAMPLE_CSV: 2781 num = None 2782 expressions = self._parse_csv(self._parse_primary) 2783 else: 2784 expressions = None 2785 num = ( 2786 self._parse_factor() 2787 if self._match(TokenType.NUMBER, advance=False) 2788 else self._parse_primary() 2789 ) 2790 2791 if self._match_text_seq("BUCKET"): 2792 bucket_numerator = self._parse_number() 2793 self._match_text_seq("OUT", "OF") 2794 bucket_denominator = bucket_denominator = self._parse_number() 2795 self._match(TokenType.ON) 2796 bucket_field = self._parse_field() 2797 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2798 percent = num 2799 elif self._match(TokenType.ROWS): 2800 rows = num 2801 elif num: 2802 size = num 2803 2804 if matched_l_paren: 2805 self._match_r_paren() 2806 2807 if self._match(TokenType.L_PAREN): 2808 method = self._parse_var() 2809 seed = self._match(TokenType.COMMA) and self._parse_number() 2810 self._match_r_paren() 2811 elif self._match_texts(("SEED", "REPEATABLE")): 2812 seed = self._parse_wrapped(self._parse_number) 2813 2814 return self.expression( 2815 exp.TableSample, 2816 expressions=expressions, 2817 method=method, 2818 bucket_numerator=bucket_numerator, 2819 bucket_denominator=bucket_denominator, 2820 bucket_field=bucket_field, 2821 percent=percent, 2822 rows=rows, 2823 size=size, 2824 seed=seed, 2825 kind=kind, 2826 ) 2827 2828 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2829 return list(iter(self._parse_pivot, None)) or None 2830 2831 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2832 return list(iter(self._parse_join, None)) or None 2833 2834 # https://duckdb.org/docs/sql/statements/pivot 2835 def _parse_simplified_pivot(self) -> exp.Pivot: 2836 def _parse_on() -> t.Optional[exp.Expression]: 2837 this = self._parse_bitwise() 2838 return self._parse_in(this) if self._match(TokenType.IN) else this 2839 2840 this = self._parse_table() 2841 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2842 using = self._match(TokenType.USING) and self._parse_csv( 2843 lambda: self._parse_alias(self._parse_function()) 2844 ) 2845 group = self._parse_group() 2846 return self.expression( 2847 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2848 ) 2849 2850 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2851 index = self._index 2852 include_nulls = None 2853 2854 if self._match(TokenType.PIVOT): 2855 unpivot = False 2856 elif self._match(TokenType.UNPIVOT): 2857 unpivot = True 2858 2859 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2860 if self._match_text_seq("INCLUDE", "NULLS"): 2861 include_nulls = True 2862 elif self._match_text_seq("EXCLUDE", "NULLS"): 2863 include_nulls = False 2864 else: 2865 return None 2866 2867 expressions = [] 2868 field = None 2869 2870 if not self._match(TokenType.L_PAREN): 2871 self._retreat(index) 2872 return None 2873 2874 if unpivot: 2875 expressions = self._parse_csv(self._parse_column) 2876 else: 2877 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2878 2879 if not expressions: 2880 self.raise_error("Failed to parse PIVOT's aggregation list") 2881 2882 if not self._match(TokenType.FOR): 2883 self.raise_error("Expecting FOR") 2884 2885 value = self._parse_column() 2886 2887 if not self._match(TokenType.IN): 2888 self.raise_error("Expecting IN") 2889 2890 field = self._parse_in(value, alias=True) 2891 2892 self._match_r_paren() 2893 2894 pivot = self.expression( 2895 exp.Pivot, 2896 expressions=expressions, 2897 field=field, 2898 unpivot=unpivot, 2899 include_nulls=include_nulls, 2900 ) 2901 2902 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2903 pivot.set("alias", self._parse_table_alias()) 2904 2905 if not unpivot: 2906 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2907 2908 columns: t.List[exp.Expression] = [] 2909 for fld in pivot.args["field"].expressions: 2910 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2911 for name in names: 2912 if self.PREFIXED_PIVOT_COLUMNS: 2913 name = f"{name}_{field_name}" if name else field_name 2914 else: 2915 name = f"{field_name}_{name}" if name else field_name 2916 2917 columns.append(exp.to_identifier(name)) 2918 2919 pivot.set("columns", columns) 2920 2921 return pivot 2922 2923 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2924 return [agg.alias for agg in aggregations] 2925 2926 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2927 if not skip_where_token and not self._match(TokenType.WHERE): 2928 return None 2929 2930 return self.expression( 2931 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2932 ) 2933 2934 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2935 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2936 return None 2937 2938 elements = defaultdict(list) 2939 2940 if self._match(TokenType.ALL): 2941 return self.expression(exp.Group, all=True) 2942 2943 while True: 2944 expressions = self._parse_csv(self._parse_conjunction) 2945 if expressions: 2946 elements["expressions"].extend(expressions) 2947 2948 grouping_sets = self._parse_grouping_sets() 2949 if grouping_sets: 2950 elements["grouping_sets"].extend(grouping_sets) 2951 2952 rollup = None 2953 cube = None 2954 totals = None 2955 2956 with_ = self._match(TokenType.WITH) 2957 if self._match(TokenType.ROLLUP): 2958 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2959 elements["rollup"].extend(ensure_list(rollup)) 2960 2961 if self._match(TokenType.CUBE): 2962 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2963 elements["cube"].extend(ensure_list(cube)) 2964 2965 if self._match_text_seq("TOTALS"): 2966 totals = True 2967 elements["totals"] = True # type: ignore 2968 2969 if not (grouping_sets or rollup or cube or totals): 2970 break 2971 2972 return self.expression(exp.Group, **elements) # type: ignore 2973 2974 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2975 if not self._match(TokenType.GROUPING_SETS): 2976 return None 2977 2978 return self._parse_wrapped_csv(self._parse_grouping_set) 2979 2980 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2981 if self._match(TokenType.L_PAREN): 2982 grouping_set = self._parse_csv(self._parse_column) 2983 self._match_r_paren() 2984 return self.expression(exp.Tuple, expressions=grouping_set) 2985 2986 return self._parse_column() 2987 2988 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2989 if not skip_having_token and not self._match(TokenType.HAVING): 2990 return None 2991 return self.expression(exp.Having, this=self._parse_conjunction()) 2992 2993 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2994 if not self._match(TokenType.QUALIFY): 2995 return None 2996 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2997 2998 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2999 if skip_start_token: 3000 start = None 3001 elif self._match(TokenType.START_WITH): 3002 start = self._parse_conjunction() 3003 else: 3004 return None 3005 3006 self._match(TokenType.CONNECT_BY) 3007 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3008 exp.Prior, this=self._parse_bitwise() 3009 ) 3010 connect = self._parse_conjunction() 3011 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3012 3013 if not start and self._match(TokenType.START_WITH): 3014 start = self._parse_conjunction() 3015 3016 return self.expression(exp.Connect, start=start, connect=connect) 3017 3018 def _parse_order( 3019 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3020 ) -> t.Optional[exp.Expression]: 3021 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3022 return this 3023 3024 return self.expression( 3025 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3026 ) 3027 3028 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3029 if not self._match(token): 3030 return None 3031 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3032 3033 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3034 this = parse_method() if parse_method else self._parse_conjunction() 3035 3036 asc = self._match(TokenType.ASC) 3037 desc = self._match(TokenType.DESC) or (asc and False) 3038 3039 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3040 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3041 3042 nulls_first = is_nulls_first or False 3043 explicitly_null_ordered = is_nulls_first or is_nulls_last 3044 3045 if ( 3046 not explicitly_null_ordered 3047 and ( 3048 (not desc and self.NULL_ORDERING == "nulls_are_small") 3049 or (desc and self.NULL_ORDERING != "nulls_are_small") 3050 ) 3051 and self.NULL_ORDERING != "nulls_are_last" 3052 ): 3053 nulls_first = True 3054 3055 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3056 3057 def _parse_limit( 3058 self, this: t.Optional[exp.Expression] = None, top: bool = False 3059 ) -> t.Optional[exp.Expression]: 3060 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3061 comments = self._prev_comments 3062 if top: 3063 limit_paren = self._match(TokenType.L_PAREN) 3064 expression = self._parse_number() 3065 3066 if limit_paren: 3067 self._match_r_paren() 3068 else: 3069 expression = self._parse_term() 3070 3071 if self._match(TokenType.COMMA): 3072 offset = expression 3073 expression = self._parse_term() 3074 else: 3075 offset = None 3076 3077 limit_exp = self.expression( 3078 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3079 ) 3080 3081 return limit_exp 3082 3083 if self._match(TokenType.FETCH): 3084 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3085 direction = self._prev.text if direction else "FIRST" 3086 3087 count = self._parse_field(tokens=self.FETCH_TOKENS) 3088 percent = self._match(TokenType.PERCENT) 3089 3090 self._match_set((TokenType.ROW, TokenType.ROWS)) 3091 3092 only = self._match_text_seq("ONLY") 3093 with_ties = self._match_text_seq("WITH", "TIES") 3094 3095 if only and with_ties: 3096 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3097 3098 return self.expression( 3099 exp.Fetch, 3100 direction=direction, 3101 count=count, 3102 percent=percent, 3103 with_ties=with_ties, 3104 ) 3105 3106 return this 3107 3108 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3109 if not self._match(TokenType.OFFSET): 3110 return this 3111 3112 count = self._parse_term() 3113 self._match_set((TokenType.ROW, TokenType.ROWS)) 3114 return self.expression(exp.Offset, this=this, expression=count) 3115 3116 def _parse_locks(self) -> t.List[exp.Lock]: 3117 locks = [] 3118 while True: 3119 if self._match_text_seq("FOR", "UPDATE"): 3120 update = True 3121 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3122 "LOCK", "IN", "SHARE", "MODE" 3123 ): 3124 update = False 3125 else: 3126 break 3127 3128 expressions = None 3129 if self._match_text_seq("OF"): 3130 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3131 3132 wait: t.Optional[bool | exp.Expression] = None 3133 if self._match_text_seq("NOWAIT"): 3134 wait = True 3135 elif self._match_text_seq("WAIT"): 3136 wait = self._parse_primary() 3137 elif self._match_text_seq("SKIP", "LOCKED"): 3138 wait = False 3139 3140 locks.append( 3141 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3142 ) 3143 3144 return locks 3145 3146 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3147 if not self._match_set(self.SET_OPERATIONS): 3148 return this 3149 3150 token_type = self._prev.token_type 3151 3152 if token_type == TokenType.UNION: 3153 expression = exp.Union 3154 elif token_type == TokenType.EXCEPT: 3155 expression = exp.Except 3156 else: 3157 expression = exp.Intersect 3158 3159 return self.expression( 3160 expression, 3161 this=this, 3162 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3163 by_name=self._match_text_seq("BY", "NAME"), 3164 expression=self._parse_set_operations(self._parse_select(nested=True)), 3165 ) 3166 3167 def _parse_expression(self) -> t.Optional[exp.Expression]: 3168 return self._parse_alias(self._parse_conjunction()) 3169 3170 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3171 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3172 3173 def _parse_equality(self) -> t.Optional[exp.Expression]: 3174 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3175 3176 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3177 return self._parse_tokens(self._parse_range, self.COMPARISON) 3178 3179 def _parse_range(self) -> t.Optional[exp.Expression]: 3180 this = self._parse_bitwise() 3181 negate = self._match(TokenType.NOT) 3182 3183 if self._match_set(self.RANGE_PARSERS): 3184 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3185 if not expression: 3186 return this 3187 3188 this = expression 3189 elif self._match(TokenType.ISNULL): 3190 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3191 3192 # Postgres supports ISNULL and NOTNULL for conditions. 3193 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3194 if self._match(TokenType.NOTNULL): 3195 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3196 this = self.expression(exp.Not, this=this) 3197 3198 if negate: 3199 this = self.expression(exp.Not, this=this) 3200 3201 if self._match(TokenType.IS): 3202 this = self._parse_is(this) 3203 3204 return this 3205 3206 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3207 index = self._index - 1 3208 negate = self._match(TokenType.NOT) 3209 3210 if self._match_text_seq("DISTINCT", "FROM"): 3211 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3212 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3213 3214 expression = self._parse_null() or self._parse_boolean() 3215 if not expression: 3216 self._retreat(index) 3217 return None 3218 3219 this = self.expression(exp.Is, this=this, expression=expression) 3220 return self.expression(exp.Not, this=this) if negate else this 3221 3222 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3223 unnest = self._parse_unnest(with_alias=False) 3224 if unnest: 3225 this = self.expression(exp.In, this=this, unnest=unnest) 3226 elif self._match(TokenType.L_PAREN): 3227 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3228 3229 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3230 this = self.expression(exp.In, this=this, query=expressions[0]) 3231 else: 3232 this = self.expression(exp.In, this=this, expressions=expressions) 3233 3234 self._match_r_paren(this) 3235 else: 3236 this = self.expression(exp.In, this=this, field=self._parse_field()) 3237 3238 return this 3239 3240 def _parse_between(self, this: exp.Expression) -> exp.Between: 3241 low = self._parse_bitwise() 3242 self._match(TokenType.AND) 3243 high = self._parse_bitwise() 3244 return self.expression(exp.Between, this=this, low=low, high=high) 3245 3246 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3247 if not self._match(TokenType.ESCAPE): 3248 return this 3249 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3250 3251 def _parse_interval(self) -> t.Optional[exp.Interval]: 3252 index = self._index 3253 3254 if not self._match(TokenType.INTERVAL): 3255 return None 3256 3257 if self._match(TokenType.STRING, advance=False): 3258 this = self._parse_primary() 3259 else: 3260 this = self._parse_term() 3261 3262 if not this: 3263 self._retreat(index) 3264 return None 3265 3266 unit = self._parse_function() or self._parse_var(any_token=True) 3267 3268 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3269 # each INTERVAL expression into this canonical form so it's easy to transpile 3270 if this and this.is_number: 3271 this = exp.Literal.string(this.name) 3272 elif this and this.is_string: 3273 parts = this.name.split() 3274 3275 if len(parts) == 2: 3276 if unit: 3277 # This is not actually a unit, it's something else (e.g. a "window side") 3278 unit = None 3279 self._retreat(self._index - 1) 3280 3281 this = exp.Literal.string(parts[0]) 3282 unit = self.expression(exp.Var, this=parts[1]) 3283 3284 return self.expression(exp.Interval, this=this, unit=unit) 3285 3286 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3287 this = self._parse_term() 3288 3289 while True: 3290 if self._match_set(self.BITWISE): 3291 this = self.expression( 3292 self.BITWISE[self._prev.token_type], 3293 this=this, 3294 expression=self._parse_term(), 3295 ) 3296 elif self._match(TokenType.DQMARK): 3297 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3298 elif self._match_pair(TokenType.LT, TokenType.LT): 3299 this = self.expression( 3300 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3301 ) 3302 elif self._match_pair(TokenType.GT, TokenType.GT): 3303 this = self.expression( 3304 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3305 ) 3306 else: 3307 break 3308 3309 return this 3310 3311 def _parse_term(self) -> t.Optional[exp.Expression]: 3312 return self._parse_tokens(self._parse_factor, self.TERM) 3313 3314 def _parse_factor(self) -> t.Optional[exp.Expression]: 3315 return self._parse_tokens(self._parse_unary, self.FACTOR) 3316 3317 def _parse_unary(self) -> t.Optional[exp.Expression]: 3318 if self._match_set(self.UNARY_PARSERS): 3319 return self.UNARY_PARSERS[self._prev.token_type](self) 3320 return self._parse_at_time_zone(self._parse_type()) 3321 3322 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3323 interval = parse_interval and self._parse_interval() 3324 if interval: 3325 return interval 3326 3327 index = self._index 3328 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3329 this = self._parse_column() 3330 3331 if data_type: 3332 if isinstance(this, exp.Literal): 3333 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3334 if parser: 3335 return parser(self, this, data_type) 3336 return self.expression(exp.Cast, this=this, to=data_type) 3337 if not data_type.expressions: 3338 self._retreat(index) 3339 return self._parse_column() 3340 return self._parse_column_ops(data_type) 3341 3342 return this and self._parse_column_ops(this) 3343 3344 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3345 this = self._parse_type() 3346 if not this: 3347 return None 3348 3349 return self.expression( 3350 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3351 ) 3352 3353 def _parse_types( 3354 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3355 ) -> t.Optional[exp.Expression]: 3356 index = self._index 3357 3358 prefix = self._match_text_seq("SYSUDTLIB", ".") 3359 3360 if not self._match_set(self.TYPE_TOKENS): 3361 identifier = allow_identifiers and self._parse_id_var( 3362 any_token=False, tokens=(TokenType.VAR,) 3363 ) 3364 3365 if identifier: 3366 tokens = self._tokenizer.tokenize(identifier.name) 3367 3368 if len(tokens) != 1: 3369 self.raise_error("Unexpected identifier", self._prev) 3370 3371 if tokens[0].token_type in self.TYPE_TOKENS: 3372 self._prev = tokens[0] 3373 elif self.SUPPORTS_USER_DEFINED_TYPES: 3374 type_name = identifier.name 3375 3376 while self._match(TokenType.DOT): 3377 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3378 3379 return exp.DataType.build(type_name, udt=True) 3380 else: 3381 return None 3382 else: 3383 return None 3384 3385 type_token = self._prev.token_type 3386 3387 if type_token == TokenType.PSEUDO_TYPE: 3388 return self.expression(exp.PseudoType, this=self._prev.text) 3389 3390 if type_token == TokenType.OBJECT_IDENTIFIER: 3391 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3392 3393 nested = type_token in self.NESTED_TYPE_TOKENS 3394 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3395 expressions = None 3396 maybe_func = False 3397 3398 if self._match(TokenType.L_PAREN): 3399 if is_struct: 3400 expressions = self._parse_csv(self._parse_struct_types) 3401 elif nested: 3402 expressions = self._parse_csv( 3403 lambda: self._parse_types( 3404 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3405 ) 3406 ) 3407 elif type_token in self.ENUM_TYPE_TOKENS: 3408 expressions = self._parse_csv(self._parse_equality) 3409 else: 3410 expressions = self._parse_csv(self._parse_type_size) 3411 3412 if not expressions or not self._match(TokenType.R_PAREN): 3413 self._retreat(index) 3414 return None 3415 3416 maybe_func = True 3417 3418 this: t.Optional[exp.Expression] = None 3419 values: t.Optional[t.List[exp.Expression]] = None 3420 3421 if nested and self._match(TokenType.LT): 3422 if is_struct: 3423 expressions = self._parse_csv(self._parse_struct_types) 3424 else: 3425 expressions = self._parse_csv( 3426 lambda: self._parse_types( 3427 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3428 ) 3429 ) 3430 3431 if not self._match(TokenType.GT): 3432 self.raise_error("Expecting >") 3433 3434 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3435 values = self._parse_csv(self._parse_conjunction) 3436 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3437 3438 if type_token in self.TIMESTAMPS: 3439 if self._match_text_seq("WITH", "TIME", "ZONE"): 3440 maybe_func = False 3441 tz_type = ( 3442 exp.DataType.Type.TIMETZ 3443 if type_token in self.TIMES 3444 else exp.DataType.Type.TIMESTAMPTZ 3445 ) 3446 this = exp.DataType(this=tz_type, expressions=expressions) 3447 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3448 maybe_func = False 3449 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3450 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3451 maybe_func = False 3452 elif type_token == TokenType.INTERVAL: 3453 unit = self._parse_var() 3454 3455 if self._match_text_seq("TO"): 3456 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3457 else: 3458 span = None 3459 3460 if span or not unit: 3461 this = self.expression( 3462 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3463 ) 3464 else: 3465 this = self.expression(exp.Interval, unit=unit) 3466 3467 if maybe_func and check_func: 3468 index2 = self._index 3469 peek = self._parse_string() 3470 3471 if not peek: 3472 self._retreat(index) 3473 return None 3474 3475 self._retreat(index2) 3476 3477 if not this: 3478 if self._match_text_seq("UNSIGNED"): 3479 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3480 if not unsigned_type_token: 3481 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3482 3483 type_token = unsigned_type_token or type_token 3484 3485 this = exp.DataType( 3486 this=exp.DataType.Type[type_token.value], 3487 expressions=expressions, 3488 nested=nested, 3489 values=values, 3490 prefix=prefix, 3491 ) 3492 3493 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3494 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3495 3496 return this 3497 3498 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3499 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3500 self._match(TokenType.COLON) 3501 return self._parse_column_def(this) 3502 3503 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3504 if not self._match_text_seq("AT", "TIME", "ZONE"): 3505 return this 3506 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3507 3508 def _parse_column(self) -> t.Optional[exp.Expression]: 3509 this = self._parse_field() 3510 if isinstance(this, exp.Identifier): 3511 this = self.expression(exp.Column, this=this) 3512 elif not this: 3513 return self._parse_bracket(this) 3514 return self._parse_column_ops(this) 3515 3516 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3517 this = self._parse_bracket(this) 3518 3519 while self._match_set(self.COLUMN_OPERATORS): 3520 op_token = self._prev.token_type 3521 op = self.COLUMN_OPERATORS.get(op_token) 3522 3523 if op_token == TokenType.DCOLON: 3524 field = self._parse_types() 3525 if not field: 3526 self.raise_error("Expected type") 3527 elif op and self._curr: 3528 self._advance() 3529 value = self._prev.text 3530 field = ( 3531 exp.Literal.number(value) 3532 if self._prev.token_type == TokenType.NUMBER 3533 else exp.Literal.string(value) 3534 ) 3535 else: 3536 field = self._parse_field(anonymous_func=True, any_token=True) 3537 3538 if isinstance(field, exp.Func): 3539 # bigquery allows function calls like x.y.count(...) 3540 # SAFE.SUBSTR(...) 3541 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3542 this = self._replace_columns_with_dots(this) 3543 3544 if op: 3545 this = op(self, this, field) 3546 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3547 this = self.expression( 3548 exp.Column, 3549 this=field, 3550 table=this.this, 3551 db=this.args.get("table"), 3552 catalog=this.args.get("db"), 3553 ) 3554 else: 3555 this = self.expression(exp.Dot, this=this, expression=field) 3556 this = self._parse_bracket(this) 3557 return this 3558 3559 def _parse_primary(self) -> t.Optional[exp.Expression]: 3560 if self._match_set(self.PRIMARY_PARSERS): 3561 token_type = self._prev.token_type 3562 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3563 3564 if token_type == TokenType.STRING: 3565 expressions = [primary] 3566 while self._match(TokenType.STRING): 3567 expressions.append(exp.Literal.string(self._prev.text)) 3568 3569 if len(expressions) > 1: 3570 return self.expression(exp.Concat, expressions=expressions) 3571 3572 return primary 3573 3574 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3575 return exp.Literal.number(f"0.{self._prev.text}") 3576 3577 if self._match(TokenType.L_PAREN): 3578 comments = self._prev_comments 3579 query = self._parse_select() 3580 3581 if query: 3582 expressions = [query] 3583 else: 3584 expressions = self._parse_expressions() 3585 3586 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3587 3588 if isinstance(this, exp.Subqueryable): 3589 this = self._parse_set_operations( 3590 self._parse_subquery(this=this, parse_alias=False) 3591 ) 3592 elif len(expressions) > 1: 3593 this = self.expression(exp.Tuple, expressions=expressions) 3594 else: 3595 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3596 3597 if this: 3598 this.add_comments(comments) 3599 3600 self._match_r_paren(expression=this) 3601 return this 3602 3603 return None 3604 3605 def _parse_field( 3606 self, 3607 any_token: bool = False, 3608 tokens: t.Optional[t.Collection[TokenType]] = None, 3609 anonymous_func: bool = False, 3610 ) -> t.Optional[exp.Expression]: 3611 return ( 3612 self._parse_primary() 3613 or self._parse_function(anonymous=anonymous_func) 3614 or self._parse_id_var(any_token=any_token, tokens=tokens) 3615 ) 3616 3617 def _parse_function( 3618 self, 3619 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3620 anonymous: bool = False, 3621 optional_parens: bool = True, 3622 ) -> t.Optional[exp.Expression]: 3623 if not self._curr: 3624 return None 3625 3626 token_type = self._curr.token_type 3627 this = self._curr.text 3628 upper = this.upper() 3629 3630 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3631 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3632 self._advance() 3633 return parser(self) 3634 3635 if not self._next or self._next.token_type != TokenType.L_PAREN: 3636 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3637 self._advance() 3638 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3639 3640 return None 3641 3642 if token_type not in self.FUNC_TOKENS: 3643 return None 3644 3645 self._advance(2) 3646 3647 parser = self.FUNCTION_PARSERS.get(upper) 3648 if parser and not anonymous: 3649 this = parser(self) 3650 else: 3651 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3652 3653 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3654 this = self.expression(subquery_predicate, this=self._parse_select()) 3655 self._match_r_paren() 3656 return this 3657 3658 if functions is None: 3659 functions = self.FUNCTIONS 3660 3661 function = functions.get(upper) 3662 3663 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3664 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3665 3666 if function and not anonymous: 3667 func = self.validate_expression(function(args), args) 3668 if not self.NORMALIZE_FUNCTIONS: 3669 func.meta["name"] = this 3670 this = func 3671 else: 3672 this = self.expression(exp.Anonymous, this=this, expressions=args) 3673 3674 self._match_r_paren(this) 3675 return self._parse_window(this) 3676 3677 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3678 return self._parse_column_def(self._parse_id_var()) 3679 3680 def _parse_user_defined_function( 3681 self, kind: t.Optional[TokenType] = None 3682 ) -> t.Optional[exp.Expression]: 3683 this = self._parse_id_var() 3684 3685 while self._match(TokenType.DOT): 3686 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3687 3688 if not self._match(TokenType.L_PAREN): 3689 return this 3690 3691 expressions = self._parse_csv(self._parse_function_parameter) 3692 self._match_r_paren() 3693 return self.expression( 3694 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3695 ) 3696 3697 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3698 literal = self._parse_primary() 3699 if literal: 3700 return self.expression(exp.Introducer, this=token.text, expression=literal) 3701 3702 return self.expression(exp.Identifier, this=token.text) 3703 3704 def _parse_session_parameter(self) -> exp.SessionParameter: 3705 kind = None 3706 this = self._parse_id_var() or self._parse_primary() 3707 3708 if this and self._match(TokenType.DOT): 3709 kind = this.name 3710 this = self._parse_var() or self._parse_primary() 3711 3712 return self.expression(exp.SessionParameter, this=this, kind=kind) 3713 3714 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3715 index = self._index 3716 3717 if self._match(TokenType.L_PAREN): 3718 expressions = t.cast( 3719 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3720 ) 3721 3722 if not self._match(TokenType.R_PAREN): 3723 self._retreat(index) 3724 else: 3725 expressions = [self._parse_id_var()] 3726 3727 if self._match_set(self.LAMBDAS): 3728 return self.LAMBDAS[self._prev.token_type](self, expressions) 3729 3730 self._retreat(index) 3731 3732 this: t.Optional[exp.Expression] 3733 3734 if self._match(TokenType.DISTINCT): 3735 this = self.expression( 3736 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3737 ) 3738 else: 3739 this = self._parse_select_or_expression(alias=alias) 3740 3741 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3742 3743 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3744 index = self._index 3745 3746 if not self.errors: 3747 try: 3748 if self._parse_select(nested=True): 3749 return this 3750 except ParseError: 3751 pass 3752 finally: 3753 self.errors.clear() 3754 self._retreat(index) 3755 3756 if not self._match(TokenType.L_PAREN): 3757 return this 3758 3759 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3760 3761 self._match_r_paren() 3762 return self.expression(exp.Schema, this=this, expressions=args) 3763 3764 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3765 return self._parse_column_def(self._parse_field(any_token=True)) 3766 3767 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3768 # column defs are not really columns, they're identifiers 3769 if isinstance(this, exp.Column): 3770 this = this.this 3771 3772 kind = self._parse_types(schema=True) 3773 3774 if self._match_text_seq("FOR", "ORDINALITY"): 3775 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3776 3777 constraints: t.List[exp.Expression] = [] 3778 3779 if not kind and self._match(TokenType.ALIAS): 3780 constraints.append( 3781 self.expression( 3782 exp.ComputedColumnConstraint, 3783 this=self._parse_conjunction(), 3784 persisted=self._match_text_seq("PERSISTED"), 3785 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3786 ) 3787 ) 3788 3789 while True: 3790 constraint = self._parse_column_constraint() 3791 if not constraint: 3792 break 3793 constraints.append(constraint) 3794 3795 if not kind and not constraints: 3796 return this 3797 3798 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3799 3800 def _parse_auto_increment( 3801 self, 3802 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3803 start = None 3804 increment = None 3805 3806 if self._match(TokenType.L_PAREN, advance=False): 3807 args = self._parse_wrapped_csv(self._parse_bitwise) 3808 start = seq_get(args, 0) 3809 increment = seq_get(args, 1) 3810 elif self._match_text_seq("START"): 3811 start = self._parse_bitwise() 3812 self._match_text_seq("INCREMENT") 3813 increment = self._parse_bitwise() 3814 3815 if start and increment: 3816 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3817 3818 return exp.AutoIncrementColumnConstraint() 3819 3820 def _parse_compress(self) -> exp.CompressColumnConstraint: 3821 if self._match(TokenType.L_PAREN, advance=False): 3822 return self.expression( 3823 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3824 ) 3825 3826 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3827 3828 def _parse_generated_as_identity( 3829 self, 3830 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3831 if self._match_text_seq("BY", "DEFAULT"): 3832 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3833 this = self.expression( 3834 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3835 ) 3836 else: 3837 self._match_text_seq("ALWAYS") 3838 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3839 3840 self._match(TokenType.ALIAS) 3841 identity = self._match_text_seq("IDENTITY") 3842 3843 if self._match(TokenType.L_PAREN): 3844 if self._match(TokenType.START_WITH): 3845 this.set("start", self._parse_bitwise()) 3846 if self._match_text_seq("INCREMENT", "BY"): 3847 this.set("increment", self._parse_bitwise()) 3848 if self._match_text_seq("MINVALUE"): 3849 this.set("minvalue", self._parse_bitwise()) 3850 if self._match_text_seq("MAXVALUE"): 3851 this.set("maxvalue", self._parse_bitwise()) 3852 3853 if self._match_text_seq("CYCLE"): 3854 this.set("cycle", True) 3855 elif self._match_text_seq("NO", "CYCLE"): 3856 this.set("cycle", False) 3857 3858 if not identity: 3859 this.set("expression", self._parse_bitwise()) 3860 3861 self._match_r_paren() 3862 3863 return this 3864 3865 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3866 self._match_text_seq("LENGTH") 3867 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3868 3869 def _parse_not_constraint( 3870 self, 3871 ) -> t.Optional[exp.Expression]: 3872 if self._match_text_seq("NULL"): 3873 return self.expression(exp.NotNullColumnConstraint) 3874 if self._match_text_seq("CASESPECIFIC"): 3875 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3876 if self._match_text_seq("FOR", "REPLICATION"): 3877 return self.expression(exp.NotForReplicationColumnConstraint) 3878 return None 3879 3880 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3881 if self._match(TokenType.CONSTRAINT): 3882 this = self._parse_id_var() 3883 else: 3884 this = None 3885 3886 if self._match_texts(self.CONSTRAINT_PARSERS): 3887 return self.expression( 3888 exp.ColumnConstraint, 3889 this=this, 3890 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3891 ) 3892 3893 return this 3894 3895 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3896 if not self._match(TokenType.CONSTRAINT): 3897 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3898 3899 this = self._parse_id_var() 3900 expressions = [] 3901 3902 while True: 3903 constraint = self._parse_unnamed_constraint() or self._parse_function() 3904 if not constraint: 3905 break 3906 expressions.append(constraint) 3907 3908 return self.expression(exp.Constraint, this=this, expressions=expressions) 3909 3910 def _parse_unnamed_constraint( 3911 self, constraints: t.Optional[t.Collection[str]] = None 3912 ) -> t.Optional[exp.Expression]: 3913 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3914 constraints or self.CONSTRAINT_PARSERS 3915 ): 3916 return None 3917 3918 constraint = self._prev.text.upper() 3919 if constraint not in self.CONSTRAINT_PARSERS: 3920 self.raise_error(f"No parser found for schema constraint {constraint}.") 3921 3922 return self.CONSTRAINT_PARSERS[constraint](self) 3923 3924 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3925 self._match_text_seq("KEY") 3926 return self.expression( 3927 exp.UniqueColumnConstraint, 3928 this=self._parse_schema(self._parse_id_var(any_token=False)), 3929 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3930 ) 3931 3932 def _parse_key_constraint_options(self) -> t.List[str]: 3933 options = [] 3934 while True: 3935 if not self._curr: 3936 break 3937 3938 if self._match(TokenType.ON): 3939 action = None 3940 on = self._advance_any() and self._prev.text 3941 3942 if self._match_text_seq("NO", "ACTION"): 3943 action = "NO ACTION" 3944 elif self._match_text_seq("CASCADE"): 3945 action = "CASCADE" 3946 elif self._match_text_seq("RESTRICT"): 3947 action = "RESTRICT" 3948 elif self._match_pair(TokenType.SET, TokenType.NULL): 3949 action = "SET NULL" 3950 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3951 action = "SET DEFAULT" 3952 else: 3953 self.raise_error("Invalid key constraint") 3954 3955 options.append(f"ON {on} {action}") 3956 elif self._match_text_seq("NOT", "ENFORCED"): 3957 options.append("NOT ENFORCED") 3958 elif self._match_text_seq("DEFERRABLE"): 3959 options.append("DEFERRABLE") 3960 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3961 options.append("INITIALLY DEFERRED") 3962 elif self._match_text_seq("NORELY"): 3963 options.append("NORELY") 3964 elif self._match_text_seq("MATCH", "FULL"): 3965 options.append("MATCH FULL") 3966 else: 3967 break 3968 3969 return options 3970 3971 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3972 if match and not self._match(TokenType.REFERENCES): 3973 return None 3974 3975 expressions = None 3976 this = self._parse_table(schema=True) 3977 options = self._parse_key_constraint_options() 3978 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3979 3980 def _parse_foreign_key(self) -> exp.ForeignKey: 3981 expressions = self._parse_wrapped_id_vars() 3982 reference = self._parse_references() 3983 options = {} 3984 3985 while self._match(TokenType.ON): 3986 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3987 self.raise_error("Expected DELETE or UPDATE") 3988 3989 kind = self._prev.text.lower() 3990 3991 if self._match_text_seq("NO", "ACTION"): 3992 action = "NO ACTION" 3993 elif self._match(TokenType.SET): 3994 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3995 action = "SET " + self._prev.text.upper() 3996 else: 3997 self._advance() 3998 action = self._prev.text.upper() 3999 4000 options[kind] = action 4001 4002 return self.expression( 4003 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4004 ) 4005 4006 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4007 return self._parse_field() 4008 4009 def _parse_primary_key( 4010 self, wrapped_optional: bool = False, in_props: bool = False 4011 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4012 desc = ( 4013 self._match_set((TokenType.ASC, TokenType.DESC)) 4014 and self._prev.token_type == TokenType.DESC 4015 ) 4016 4017 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4018 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4019 4020 expressions = self._parse_wrapped_csv( 4021 self._parse_primary_key_part, optional=wrapped_optional 4022 ) 4023 options = self._parse_key_constraint_options() 4024 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4025 4026 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4027 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4028 return this 4029 4030 bracket_kind = self._prev.token_type 4031 4032 if self._match(TokenType.COLON): 4033 expressions: t.List[exp.Expression] = [ 4034 self.expression(exp.Slice, expression=self._parse_conjunction()) 4035 ] 4036 else: 4037 expressions = self._parse_csv( 4038 lambda: self._parse_slice( 4039 self._parse_alias(self._parse_conjunction(), explicit=True) 4040 ) 4041 ) 4042 4043 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4044 if bracket_kind == TokenType.L_BRACE: 4045 this = self.expression(exp.Struct, expressions=expressions) 4046 elif not this or this.name.upper() == "ARRAY": 4047 this = self.expression(exp.Array, expressions=expressions) 4048 else: 4049 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4050 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4051 4052 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4053 self.raise_error("Expected ]") 4054 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4055 self.raise_error("Expected }") 4056 4057 self._add_comments(this) 4058 return self._parse_bracket(this) 4059 4060 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4061 if self._match(TokenType.COLON): 4062 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4063 return this 4064 4065 def _parse_case(self) -> t.Optional[exp.Expression]: 4066 ifs = [] 4067 default = None 4068 4069 comments = self._prev_comments 4070 expression = self._parse_conjunction() 4071 4072 while self._match(TokenType.WHEN): 4073 this = self._parse_conjunction() 4074 self._match(TokenType.THEN) 4075 then = self._parse_conjunction() 4076 ifs.append(self.expression(exp.If, this=this, true=then)) 4077 4078 if self._match(TokenType.ELSE): 4079 default = self._parse_conjunction() 4080 4081 if not self._match(TokenType.END): 4082 self.raise_error("Expected END after CASE", self._prev) 4083 4084 return self._parse_window( 4085 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4086 ) 4087 4088 def _parse_if(self) -> t.Optional[exp.Expression]: 4089 if self._match(TokenType.L_PAREN): 4090 args = self._parse_csv(self._parse_conjunction) 4091 this = self.validate_expression(exp.If.from_arg_list(args), args) 4092 self._match_r_paren() 4093 else: 4094 index = self._index - 1 4095 condition = self._parse_conjunction() 4096 4097 if not condition: 4098 self._retreat(index) 4099 return None 4100 4101 self._match(TokenType.THEN) 4102 true = self._parse_conjunction() 4103 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4104 self._match(TokenType.END) 4105 this = self.expression(exp.If, this=condition, true=true, false=false) 4106 4107 return self._parse_window(this) 4108 4109 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4110 if not self._match_text_seq("VALUE", "FOR"): 4111 self._retreat(self._index - 1) 4112 return None 4113 4114 return self.expression( 4115 exp.NextValueFor, 4116 this=self._parse_column(), 4117 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4118 ) 4119 4120 def _parse_extract(self) -> exp.Extract: 4121 this = self._parse_function() or self._parse_var() or self._parse_type() 4122 4123 if self._match(TokenType.FROM): 4124 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4125 4126 if not self._match(TokenType.COMMA): 4127 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4128 4129 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4130 4131 def _parse_any_value(self) -> exp.AnyValue: 4132 this = self._parse_lambda() 4133 is_max = None 4134 having = None 4135 4136 if self._match(TokenType.HAVING): 4137 self._match_texts(("MAX", "MIN")) 4138 is_max = self._prev.text == "MAX" 4139 having = self._parse_column() 4140 4141 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4142 4143 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4144 this = self._parse_conjunction() 4145 4146 if not self._match(TokenType.ALIAS): 4147 if self._match(TokenType.COMMA): 4148 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4149 4150 self.raise_error("Expected AS after CAST") 4151 4152 fmt = None 4153 to = self._parse_types() 4154 4155 if not to: 4156 self.raise_error("Expected TYPE after CAST") 4157 elif isinstance(to, exp.Identifier): 4158 to = exp.DataType.build(to.name, udt=True) 4159 elif to.this == exp.DataType.Type.CHAR: 4160 if self._match(TokenType.CHARACTER_SET): 4161 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4162 elif self._match(TokenType.FORMAT): 4163 fmt_string = self._parse_string() 4164 fmt = self._parse_at_time_zone(fmt_string) 4165 4166 if to.this in exp.DataType.TEMPORAL_TYPES: 4167 this = self.expression( 4168 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4169 this=this, 4170 format=exp.Literal.string( 4171 format_time( 4172 fmt_string.this if fmt_string else "", 4173 self.FORMAT_MAPPING or self.TIME_MAPPING, 4174 self.FORMAT_TRIE or self.TIME_TRIE, 4175 ) 4176 ), 4177 ) 4178 4179 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4180 this.set("zone", fmt.args["zone"]) 4181 4182 return this 4183 4184 return self.expression( 4185 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4186 ) 4187 4188 def _parse_concat(self) -> t.Optional[exp.Expression]: 4189 args = self._parse_csv(self._parse_conjunction) 4190 if self.CONCAT_NULL_OUTPUTS_STRING: 4191 args = self._ensure_string_if_null(args) 4192 4193 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4194 # we find such a call we replace it with its argument. 4195 if len(args) == 1: 4196 return args[0] 4197 4198 return self.expression( 4199 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4200 ) 4201 4202 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4203 args = self._parse_csv(self._parse_conjunction) 4204 if len(args) < 2: 4205 return self.expression(exp.ConcatWs, expressions=args) 4206 delim, *values = args 4207 if self.CONCAT_NULL_OUTPUTS_STRING: 4208 values = self._ensure_string_if_null(values) 4209 4210 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4211 4212 def _parse_string_agg(self) -> exp.Expression: 4213 if self._match(TokenType.DISTINCT): 4214 args: t.List[t.Optional[exp.Expression]] = [ 4215 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4216 ] 4217 if self._match(TokenType.COMMA): 4218 args.extend(self._parse_csv(self._parse_conjunction)) 4219 else: 4220 args = self._parse_csv(self._parse_conjunction) # type: ignore 4221 4222 index = self._index 4223 if not self._match(TokenType.R_PAREN) and args: 4224 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4225 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4226 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4227 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4228 4229 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4230 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4231 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4232 if not self._match_text_seq("WITHIN", "GROUP"): 4233 self._retreat(index) 4234 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4235 4236 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4237 order = self._parse_order(this=seq_get(args, 0)) 4238 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4239 4240 def _parse_convert( 4241 self, strict: bool, safe: t.Optional[bool] = None 4242 ) -> t.Optional[exp.Expression]: 4243 this = self._parse_bitwise() 4244 4245 if self._match(TokenType.USING): 4246 to: t.Optional[exp.Expression] = self.expression( 4247 exp.CharacterSet, this=self._parse_var() 4248 ) 4249 elif self._match(TokenType.COMMA): 4250 to = self._parse_types() 4251 else: 4252 to = None 4253 4254 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4255 4256 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4257 """ 4258 There are generally two variants of the DECODE function: 4259 4260 - DECODE(bin, charset) 4261 - DECODE(expression, search, result [, search, result] ... [, default]) 4262 4263 The second variant will always be parsed into a CASE expression. Note that NULL 4264 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4265 instead of relying on pattern matching. 4266 """ 4267 args = self._parse_csv(self._parse_conjunction) 4268 4269 if len(args) < 3: 4270 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4271 4272 expression, *expressions = args 4273 if not expression: 4274 return None 4275 4276 ifs = [] 4277 for search, result in zip(expressions[::2], expressions[1::2]): 4278 if not search or not result: 4279 return None 4280 4281 if isinstance(search, exp.Literal): 4282 ifs.append( 4283 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4284 ) 4285 elif isinstance(search, exp.Null): 4286 ifs.append( 4287 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4288 ) 4289 else: 4290 cond = exp.or_( 4291 exp.EQ(this=expression.copy(), expression=search), 4292 exp.and_( 4293 exp.Is(this=expression.copy(), expression=exp.Null()), 4294 exp.Is(this=search.copy(), expression=exp.Null()), 4295 copy=False, 4296 ), 4297 copy=False, 4298 ) 4299 ifs.append(exp.If(this=cond, true=result)) 4300 4301 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4302 4303 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4304 self._match_text_seq("KEY") 4305 key = self._parse_column() 4306 self._match_set((TokenType.COLON, TokenType.COMMA)) 4307 self._match_text_seq("VALUE") 4308 value = self._parse_bitwise() 4309 4310 if not key and not value: 4311 return None 4312 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4313 4314 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4315 if not this or not self._match_text_seq("FORMAT", "JSON"): 4316 return this 4317 4318 return self.expression(exp.FormatJson, this=this) 4319 4320 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4321 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4322 for value in values: 4323 if self._match_text_seq(value, "ON", on): 4324 return f"{value} ON {on}" 4325 4326 return None 4327 4328 def _parse_json_object(self) -> exp.JSONObject: 4329 star = self._parse_star() 4330 expressions = ( 4331 [star] 4332 if star 4333 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4334 ) 4335 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4336 4337 unique_keys = None 4338 if self._match_text_seq("WITH", "UNIQUE"): 4339 unique_keys = True 4340 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4341 unique_keys = False 4342 4343 self._match_text_seq("KEYS") 4344 4345 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4346 self._parse_type() 4347 ) 4348 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4349 4350 return self.expression( 4351 exp.JSONObject, 4352 expressions=expressions, 4353 null_handling=null_handling, 4354 unique_keys=unique_keys, 4355 return_type=return_type, 4356 encoding=encoding, 4357 ) 4358 4359 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4360 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4361 if not self._match_text_seq("NESTED"): 4362 this = self._parse_id_var() 4363 kind = self._parse_types(allow_identifiers=False) 4364 nested = None 4365 else: 4366 this = None 4367 kind = None 4368 nested = True 4369 4370 path = self._match_text_seq("PATH") and self._parse_string() 4371 nested_schema = nested and self._parse_json_schema() 4372 4373 return self.expression( 4374 exp.JSONColumnDef, 4375 this=this, 4376 kind=kind, 4377 path=path, 4378 nested_schema=nested_schema, 4379 ) 4380 4381 def _parse_json_schema(self) -> exp.JSONSchema: 4382 self._match_text_seq("COLUMNS") 4383 return self.expression( 4384 exp.JSONSchema, 4385 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4386 ) 4387 4388 def _parse_json_table(self) -> exp.JSONTable: 4389 this = self._parse_format_json(self._parse_bitwise()) 4390 path = self._match(TokenType.COMMA) and self._parse_string() 4391 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4392 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4393 schema = self._parse_json_schema() 4394 4395 return exp.JSONTable( 4396 this=this, 4397 schema=schema, 4398 path=path, 4399 error_handling=error_handling, 4400 empty_handling=empty_handling, 4401 ) 4402 4403 def _parse_logarithm(self) -> exp.Func: 4404 # Default argument order is base, expression 4405 args = self._parse_csv(self._parse_range) 4406 4407 if len(args) > 1: 4408 if not self.LOG_BASE_FIRST: 4409 args.reverse() 4410 return exp.Log.from_arg_list(args) 4411 4412 return self.expression( 4413 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4414 ) 4415 4416 def _parse_match_against(self) -> exp.MatchAgainst: 4417 expressions = self._parse_csv(self._parse_column) 4418 4419 self._match_text_seq(")", "AGAINST", "(") 4420 4421 this = self._parse_string() 4422 4423 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4424 modifier = "IN NATURAL LANGUAGE MODE" 4425 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4426 modifier = f"{modifier} WITH QUERY EXPANSION" 4427 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4428 modifier = "IN BOOLEAN MODE" 4429 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4430 modifier = "WITH QUERY EXPANSION" 4431 else: 4432 modifier = None 4433 4434 return self.expression( 4435 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4436 ) 4437 4438 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4439 def _parse_open_json(self) -> exp.OpenJSON: 4440 this = self._parse_bitwise() 4441 path = self._match(TokenType.COMMA) and self._parse_string() 4442 4443 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4444 this = self._parse_field(any_token=True) 4445 kind = self._parse_types() 4446 path = self._parse_string() 4447 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4448 4449 return self.expression( 4450 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4451 ) 4452 4453 expressions = None 4454 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4455 self._match_l_paren() 4456 expressions = self._parse_csv(_parse_open_json_column_def) 4457 4458 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4459 4460 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4461 args = self._parse_csv(self._parse_bitwise) 4462 4463 if self._match(TokenType.IN): 4464 return self.expression( 4465 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4466 ) 4467 4468 if haystack_first: 4469 haystack = seq_get(args, 0) 4470 needle = seq_get(args, 1) 4471 else: 4472 needle = seq_get(args, 0) 4473 haystack = seq_get(args, 1) 4474 4475 return self.expression( 4476 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4477 ) 4478 4479 def _parse_predict(self) -> exp.Predict: 4480 self._match_text_seq("MODEL") 4481 this = self._parse_table() 4482 4483 self._match(TokenType.COMMA) 4484 self._match_text_seq("TABLE") 4485 4486 return self.expression( 4487 exp.Predict, 4488 this=this, 4489 expression=self._parse_table(), 4490 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4491 ) 4492 4493 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4494 args = self._parse_csv(self._parse_table) 4495 return exp.JoinHint(this=func_name.upper(), expressions=args) 4496 4497 def _parse_substring(self) -> exp.Substring: 4498 # Postgres supports the form: substring(string [from int] [for int]) 4499 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4500 4501 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4502 4503 if self._match(TokenType.FROM): 4504 args.append(self._parse_bitwise()) 4505 if self._match(TokenType.FOR): 4506 args.append(self._parse_bitwise()) 4507 4508 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4509 4510 def _parse_trim(self) -> exp.Trim: 4511 # https://www.w3resource.com/sql/character-functions/trim.php 4512 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4513 4514 position = None 4515 collation = None 4516 expression = None 4517 4518 if self._match_texts(self.TRIM_TYPES): 4519 position = self._prev.text.upper() 4520 4521 this = self._parse_bitwise() 4522 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4523 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4524 expression = self._parse_bitwise() 4525 4526 if invert_order: 4527 this, expression = expression, this 4528 4529 if self._match(TokenType.COLLATE): 4530 collation = self._parse_bitwise() 4531 4532 return self.expression( 4533 exp.Trim, this=this, position=position, expression=expression, collation=collation 4534 ) 4535 4536 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4537 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4538 4539 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4540 return self._parse_window(self._parse_id_var(), alias=True) 4541 4542 def _parse_respect_or_ignore_nulls( 4543 self, this: t.Optional[exp.Expression] 4544 ) -> t.Optional[exp.Expression]: 4545 if self._match_text_seq("IGNORE", "NULLS"): 4546 return self.expression(exp.IgnoreNulls, this=this) 4547 if self._match_text_seq("RESPECT", "NULLS"): 4548 return self.expression(exp.RespectNulls, this=this) 4549 return this 4550 4551 def _parse_window( 4552 self, this: t.Optional[exp.Expression], alias: bool = False 4553 ) -> t.Optional[exp.Expression]: 4554 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4555 self._match(TokenType.WHERE) 4556 this = self.expression( 4557 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4558 ) 4559 self._match_r_paren() 4560 4561 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4562 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4563 if self._match_text_seq("WITHIN", "GROUP"): 4564 order = self._parse_wrapped(self._parse_order) 4565 this = self.expression(exp.WithinGroup, this=this, expression=order) 4566 4567 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4568 # Some dialects choose to implement and some do not. 4569 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4570 4571 # There is some code above in _parse_lambda that handles 4572 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4573 4574 # The below changes handle 4575 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4576 4577 # Oracle allows both formats 4578 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4579 # and Snowflake chose to do the same for familiarity 4580 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4581 this = self._parse_respect_or_ignore_nulls(this) 4582 4583 # bigquery select from window x AS (partition by ...) 4584 if alias: 4585 over = None 4586 self._match(TokenType.ALIAS) 4587 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4588 return this 4589 else: 4590 over = self._prev.text.upper() 4591 4592 if not self._match(TokenType.L_PAREN): 4593 return self.expression( 4594 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4595 ) 4596 4597 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4598 4599 first = self._match(TokenType.FIRST) 4600 if self._match_text_seq("LAST"): 4601 first = False 4602 4603 partition, order = self._parse_partition_and_order() 4604 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4605 4606 if kind: 4607 self._match(TokenType.BETWEEN) 4608 start = self._parse_window_spec() 4609 self._match(TokenType.AND) 4610 end = self._parse_window_spec() 4611 4612 spec = self.expression( 4613 exp.WindowSpec, 4614 kind=kind, 4615 start=start["value"], 4616 start_side=start["side"], 4617 end=end["value"], 4618 end_side=end["side"], 4619 ) 4620 else: 4621 spec = None 4622 4623 self._match_r_paren() 4624 4625 window = self.expression( 4626 exp.Window, 4627 this=this, 4628 partition_by=partition, 4629 order=order, 4630 spec=spec, 4631 alias=window_alias, 4632 over=over, 4633 first=first, 4634 ) 4635 4636 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4637 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4638 return self._parse_window(window, alias=alias) 4639 4640 return window 4641 4642 def _parse_partition_and_order( 4643 self, 4644 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4645 return self._parse_partition_by(), self._parse_order() 4646 4647 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4648 self._match(TokenType.BETWEEN) 4649 4650 return { 4651 "value": ( 4652 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4653 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4654 or self._parse_bitwise() 4655 ), 4656 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4657 } 4658 4659 def _parse_alias( 4660 self, this: t.Optional[exp.Expression], explicit: bool = False 4661 ) -> t.Optional[exp.Expression]: 4662 any_token = self._match(TokenType.ALIAS) 4663 4664 if explicit and not any_token: 4665 return this 4666 4667 if self._match(TokenType.L_PAREN): 4668 aliases = self.expression( 4669 exp.Aliases, 4670 this=this, 4671 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4672 ) 4673 self._match_r_paren(aliases) 4674 return aliases 4675 4676 alias = self._parse_id_var(any_token) 4677 4678 if alias: 4679 return self.expression(exp.Alias, this=this, alias=alias) 4680 4681 return this 4682 4683 def _parse_id_var( 4684 self, 4685 any_token: bool = True, 4686 tokens: t.Optional[t.Collection[TokenType]] = None, 4687 ) -> t.Optional[exp.Expression]: 4688 identifier = self._parse_identifier() 4689 4690 if identifier: 4691 return identifier 4692 4693 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4694 quoted = self._prev.token_type == TokenType.STRING 4695 return exp.Identifier(this=self._prev.text, quoted=quoted) 4696 4697 return None 4698 4699 def _parse_string(self) -> t.Optional[exp.Expression]: 4700 if self._match(TokenType.STRING): 4701 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4702 return self._parse_placeholder() 4703 4704 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4705 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4706 4707 def _parse_number(self) -> t.Optional[exp.Expression]: 4708 if self._match(TokenType.NUMBER): 4709 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4710 return self._parse_placeholder() 4711 4712 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4713 if self._match(TokenType.IDENTIFIER): 4714 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4715 return self._parse_placeholder() 4716 4717 def _parse_var( 4718 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4719 ) -> t.Optional[exp.Expression]: 4720 if ( 4721 (any_token and self._advance_any()) 4722 or self._match(TokenType.VAR) 4723 or (self._match_set(tokens) if tokens else False) 4724 ): 4725 return self.expression(exp.Var, this=self._prev.text) 4726 return self._parse_placeholder() 4727 4728 def _advance_any(self) -> t.Optional[Token]: 4729 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4730 self._advance() 4731 return self._prev 4732 return None 4733 4734 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4735 return self._parse_var() or self._parse_string() 4736 4737 def _parse_null(self) -> t.Optional[exp.Expression]: 4738 if self._match_set(self.NULL_TOKENS): 4739 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4740 return self._parse_placeholder() 4741 4742 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4743 if self._match(TokenType.TRUE): 4744 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4745 if self._match(TokenType.FALSE): 4746 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4747 return self._parse_placeholder() 4748 4749 def _parse_star(self) -> t.Optional[exp.Expression]: 4750 if self._match(TokenType.STAR): 4751 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4752 return self._parse_placeholder() 4753 4754 def _parse_parameter(self) -> exp.Parameter: 4755 wrapped = self._match(TokenType.L_BRACE) 4756 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4757 self._match(TokenType.R_BRACE) 4758 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4759 4760 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4761 if self._match_set(self.PLACEHOLDER_PARSERS): 4762 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4763 if placeholder: 4764 return placeholder 4765 self._advance(-1) 4766 return None 4767 4768 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4769 if not self._match(TokenType.EXCEPT): 4770 return None 4771 if self._match(TokenType.L_PAREN, advance=False): 4772 return self._parse_wrapped_csv(self._parse_column) 4773 4774 except_column = self._parse_column() 4775 return [except_column] if except_column else None 4776 4777 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4778 if not self._match(TokenType.REPLACE): 4779 return None 4780 if self._match(TokenType.L_PAREN, advance=False): 4781 return self._parse_wrapped_csv(self._parse_expression) 4782 4783 replace_expression = self._parse_expression() 4784 return [replace_expression] if replace_expression else None 4785 4786 def _parse_csv( 4787 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4788 ) -> t.List[exp.Expression]: 4789 parse_result = parse_method() 4790 items = [parse_result] if parse_result is not None else [] 4791 4792 while self._match(sep): 4793 self._add_comments(parse_result) 4794 parse_result = parse_method() 4795 if parse_result is not None: 4796 items.append(parse_result) 4797 4798 return items 4799 4800 def _parse_tokens( 4801 self, parse_method: t.Callable, expressions: t.Dict 4802 ) -> t.Optional[exp.Expression]: 4803 this = parse_method() 4804 4805 while self._match_set(expressions): 4806 this = self.expression( 4807 expressions[self._prev.token_type], 4808 this=this, 4809 comments=self._prev_comments, 4810 expression=parse_method(), 4811 ) 4812 4813 return this 4814 4815 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4816 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4817 4818 def _parse_wrapped_csv( 4819 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4820 ) -> t.List[exp.Expression]: 4821 return self._parse_wrapped( 4822 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4823 ) 4824 4825 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4826 wrapped = self._match(TokenType.L_PAREN) 4827 if not wrapped and not optional: 4828 self.raise_error("Expecting (") 4829 parse_result = parse_method() 4830 if wrapped: 4831 self._match_r_paren() 4832 return parse_result 4833 4834 def _parse_expressions(self) -> t.List[exp.Expression]: 4835 return self._parse_csv(self._parse_expression) 4836 4837 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4838 return self._parse_select() or self._parse_set_operations( 4839 self._parse_expression() if alias else self._parse_conjunction() 4840 ) 4841 4842 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4843 return self._parse_query_modifiers( 4844 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4845 ) 4846 4847 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4848 this = None 4849 if self._match_texts(self.TRANSACTION_KIND): 4850 this = self._prev.text 4851 4852 self._match_texts({"TRANSACTION", "WORK"}) 4853 4854 modes = [] 4855 while True: 4856 mode = [] 4857 while self._match(TokenType.VAR): 4858 mode.append(self._prev.text) 4859 4860 if mode: 4861 modes.append(" ".join(mode)) 4862 if not self._match(TokenType.COMMA): 4863 break 4864 4865 return self.expression(exp.Transaction, this=this, modes=modes) 4866 4867 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4868 chain = None 4869 savepoint = None 4870 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4871 4872 self._match_texts({"TRANSACTION", "WORK"}) 4873 4874 if self._match_text_seq("TO"): 4875 self._match_text_seq("SAVEPOINT") 4876 savepoint = self._parse_id_var() 4877 4878 if self._match(TokenType.AND): 4879 chain = not self._match_text_seq("NO") 4880 self._match_text_seq("CHAIN") 4881 4882 if is_rollback: 4883 return self.expression(exp.Rollback, savepoint=savepoint) 4884 4885 return self.expression(exp.Commit, chain=chain) 4886 4887 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4888 if not self._match_text_seq("ADD"): 4889 return None 4890 4891 self._match(TokenType.COLUMN) 4892 exists_column = self._parse_exists(not_=True) 4893 expression = self._parse_field_def() 4894 4895 if expression: 4896 expression.set("exists", exists_column) 4897 4898 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4899 if self._match_texts(("FIRST", "AFTER")): 4900 position = self._prev.text 4901 column_position = self.expression( 4902 exp.ColumnPosition, this=self._parse_column(), position=position 4903 ) 4904 expression.set("position", column_position) 4905 4906 return expression 4907 4908 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4909 drop = self._match(TokenType.DROP) and self._parse_drop() 4910 if drop and not isinstance(drop, exp.Command): 4911 drop.set("kind", drop.args.get("kind", "COLUMN")) 4912 return drop 4913 4914 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4915 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4916 return self.expression( 4917 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4918 ) 4919 4920 def _parse_add_constraint(self) -> exp.AddConstraint: 4921 this = None 4922 kind = self._prev.token_type 4923 4924 if kind == TokenType.CONSTRAINT: 4925 this = self._parse_id_var() 4926 4927 if self._match_text_seq("CHECK"): 4928 expression = self._parse_wrapped(self._parse_conjunction) 4929 enforced = self._match_text_seq("ENFORCED") 4930 4931 return self.expression( 4932 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4933 ) 4934 4935 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4936 expression = self._parse_foreign_key() 4937 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4938 expression = self._parse_primary_key() 4939 else: 4940 expression = None 4941 4942 return self.expression(exp.AddConstraint, this=this, expression=expression) 4943 4944 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4945 index = self._index - 1 4946 4947 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4948 return self._parse_csv(self._parse_add_constraint) 4949 4950 self._retreat(index) 4951 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4952 return self._parse_csv(self._parse_field_def) 4953 4954 return self._parse_csv(self._parse_add_column) 4955 4956 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4957 self._match(TokenType.COLUMN) 4958 column = self._parse_field(any_token=True) 4959 4960 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4961 return self.expression(exp.AlterColumn, this=column, drop=True) 4962 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4963 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4964 4965 self._match_text_seq("SET", "DATA") 4966 return self.expression( 4967 exp.AlterColumn, 4968 this=column, 4969 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4970 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4971 using=self._match(TokenType.USING) and self._parse_conjunction(), 4972 ) 4973 4974 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4975 index = self._index - 1 4976 4977 partition_exists = self._parse_exists() 4978 if self._match(TokenType.PARTITION, advance=False): 4979 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4980 4981 self._retreat(index) 4982 return self._parse_csv(self._parse_drop_column) 4983 4984 def _parse_alter_table_rename(self) -> exp.RenameTable: 4985 self._match_text_seq("TO") 4986 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4987 4988 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4989 start = self._prev 4990 4991 if not self._match(TokenType.TABLE): 4992 return self._parse_as_command(start) 4993 4994 exists = self._parse_exists() 4995 only = self._match_text_seq("ONLY") 4996 this = self._parse_table(schema=True) 4997 4998 if self._next: 4999 self._advance() 5000 5001 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5002 if parser: 5003 actions = ensure_list(parser(self)) 5004 5005 if not self._curr: 5006 return self.expression( 5007 exp.AlterTable, 5008 this=this, 5009 exists=exists, 5010 actions=actions, 5011 only=only, 5012 ) 5013 5014 return self._parse_as_command(start) 5015 5016 def _parse_merge(self) -> exp.Merge: 5017 self._match(TokenType.INTO) 5018 target = self._parse_table() 5019 5020 if target and self._match(TokenType.ALIAS, advance=False): 5021 target.set("alias", self._parse_table_alias()) 5022 5023 self._match(TokenType.USING) 5024 using = self._parse_table() 5025 5026 self._match(TokenType.ON) 5027 on = self._parse_conjunction() 5028 5029 return self.expression( 5030 exp.Merge, 5031 this=target, 5032 using=using, 5033 on=on, 5034 expressions=self._parse_when_matched(), 5035 ) 5036 5037 def _parse_when_matched(self) -> t.List[exp.When]: 5038 whens = [] 5039 5040 while self._match(TokenType.WHEN): 5041 matched = not self._match(TokenType.NOT) 5042 self._match_text_seq("MATCHED") 5043 source = ( 5044 False 5045 if self._match_text_seq("BY", "TARGET") 5046 else self._match_text_seq("BY", "SOURCE") 5047 ) 5048 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5049 5050 self._match(TokenType.THEN) 5051 5052 if self._match(TokenType.INSERT): 5053 _this = self._parse_star() 5054 if _this: 5055 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5056 else: 5057 then = self.expression( 5058 exp.Insert, 5059 this=self._parse_value(), 5060 expression=self._match(TokenType.VALUES) and self._parse_value(), 5061 ) 5062 elif self._match(TokenType.UPDATE): 5063 expressions = self._parse_star() 5064 if expressions: 5065 then = self.expression(exp.Update, expressions=expressions) 5066 else: 5067 then = self.expression( 5068 exp.Update, 5069 expressions=self._match(TokenType.SET) 5070 and self._parse_csv(self._parse_equality), 5071 ) 5072 elif self._match(TokenType.DELETE): 5073 then = self.expression(exp.Var, this=self._prev.text) 5074 else: 5075 then = None 5076 5077 whens.append( 5078 self.expression( 5079 exp.When, 5080 matched=matched, 5081 source=source, 5082 condition=condition, 5083 then=then, 5084 ) 5085 ) 5086 return whens 5087 5088 def _parse_show(self) -> t.Optional[exp.Expression]: 5089 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5090 if parser: 5091 return parser(self) 5092 return self._parse_as_command(self._prev) 5093 5094 def _parse_set_item_assignment( 5095 self, kind: t.Optional[str] = None 5096 ) -> t.Optional[exp.Expression]: 5097 index = self._index 5098 5099 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5100 return self._parse_set_transaction(global_=kind == "GLOBAL") 5101 5102 left = self._parse_primary() or self._parse_id_var() 5103 assignment_delimiter = self._match_texts(("=", "TO")) 5104 5105 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5106 self._retreat(index) 5107 return None 5108 5109 right = self._parse_statement() or self._parse_id_var() 5110 this = self.expression(exp.EQ, this=left, expression=right) 5111 5112 return self.expression(exp.SetItem, this=this, kind=kind) 5113 5114 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5115 self._match_text_seq("TRANSACTION") 5116 characteristics = self._parse_csv( 5117 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5118 ) 5119 return self.expression( 5120 exp.SetItem, 5121 expressions=characteristics, 5122 kind="TRANSACTION", 5123 **{"global": global_}, # type: ignore 5124 ) 5125 5126 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5127 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5128 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5129 5130 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5131 index = self._index 5132 set_ = self.expression( 5133 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5134 ) 5135 5136 if self._curr: 5137 self._retreat(index) 5138 return self._parse_as_command(self._prev) 5139 5140 return set_ 5141 5142 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5143 for option in options: 5144 if self._match_text_seq(*option.split(" ")): 5145 return exp.var(option) 5146 return None 5147 5148 def _parse_as_command(self, start: Token) -> exp.Command: 5149 while self._curr: 5150 self._advance() 5151 text = self._find_sql(start, self._prev) 5152 size = len(start.text) 5153 return exp.Command(this=text[:size], expression=text[size:]) 5154 5155 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5156 settings = [] 5157 5158 self._match_l_paren() 5159 kind = self._parse_id_var() 5160 5161 if self._match(TokenType.L_PAREN): 5162 while True: 5163 key = self._parse_id_var() 5164 value = self._parse_primary() 5165 5166 if not key and value is None: 5167 break 5168 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5169 self._match(TokenType.R_PAREN) 5170 5171 self._match_r_paren() 5172 5173 return self.expression( 5174 exp.DictProperty, 5175 this=this, 5176 kind=kind.this if kind else None, 5177 settings=settings, 5178 ) 5179 5180 def _parse_dict_range(self, this: str) -> exp.DictRange: 5181 self._match_l_paren() 5182 has_min = self._match_text_seq("MIN") 5183 if has_min: 5184 min = self._parse_var() or self._parse_primary() 5185 self._match_text_seq("MAX") 5186 max = self._parse_var() or self._parse_primary() 5187 else: 5188 max = self._parse_var() or self._parse_primary() 5189 min = exp.Literal.number(0) 5190 self._match_r_paren() 5191 return self.expression(exp.DictRange, this=this, min=min, max=max) 5192 5193 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5194 index = self._index 5195 expression = self._parse_column() 5196 if not self._match(TokenType.IN): 5197 self._retreat(index - 1) 5198 return None 5199 iterator = self._parse_column() 5200 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5201 return self.expression( 5202 exp.Comprehension, 5203 this=this, 5204 expression=expression, 5205 iterator=iterator, 5206 condition=condition, 5207 ) 5208 5209 def _find_parser( 5210 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5211 ) -> t.Optional[t.Callable]: 5212 if not self._curr: 5213 return None 5214 5215 index = self._index 5216 this = [] 5217 while True: 5218 # The current token might be multiple words 5219 curr = self._curr.text.upper() 5220 key = curr.split(" ") 5221 this.append(curr) 5222 5223 self._advance() 5224 result, trie = in_trie(trie, key) 5225 if result == TrieResult.FAILED: 5226 break 5227 5228 if result == TrieResult.EXISTS: 5229 subparser = parsers[" ".join(this)] 5230 return subparser 5231 5232 self._retreat(index) 5233 return None 5234 5235 def _match(self, token_type, advance=True, expression=None): 5236 if not self._curr: 5237 return None 5238 5239 if self._curr.token_type == token_type: 5240 if advance: 5241 self._advance() 5242 self._add_comments(expression) 5243 return True 5244 5245 return None 5246 5247 def _match_set(self, types, advance=True): 5248 if not self._curr: 5249 return None 5250 5251 if self._curr.token_type in types: 5252 if advance: 5253 self._advance() 5254 return True 5255 5256 return None 5257 5258 def _match_pair(self, token_type_a, token_type_b, advance=True): 5259 if not self._curr or not self._next: 5260 return None 5261 5262 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5263 if advance: 5264 self._advance(2) 5265 return True 5266 5267 return None 5268 5269 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5270 if not self._match(TokenType.L_PAREN, expression=expression): 5271 self.raise_error("Expecting (") 5272 5273 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5274 if not self._match(TokenType.R_PAREN, expression=expression): 5275 self.raise_error("Expecting )") 5276 5277 def _match_texts(self, texts, advance=True): 5278 if self._curr and self._curr.text.upper() in texts: 5279 if advance: 5280 self._advance() 5281 return True 5282 return False 5283 5284 def _match_text_seq(self, *texts, advance=True): 5285 index = self._index 5286 for text in texts: 5287 if self._curr and self._curr.text.upper() == text: 5288 self._advance() 5289 else: 5290 self._retreat(index) 5291 return False 5292 5293 if not advance: 5294 self._retreat(index) 5295 5296 return True 5297 5298 @t.overload 5299 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5300 ... 5301 5302 @t.overload 5303 def _replace_columns_with_dots( 5304 self, this: t.Optional[exp.Expression] 5305 ) -> t.Optional[exp.Expression]: 5306 ... 5307 5308 def _replace_columns_with_dots(self, this): 5309 if isinstance(this, exp.Dot): 5310 exp.replace_children(this, self._replace_columns_with_dots) 5311 elif isinstance(this, exp.Column): 5312 exp.replace_children(this, self._replace_columns_with_dots) 5313 table = this.args.get("table") 5314 this = ( 5315 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5316 ) 5317 5318 return this 5319 5320 def _replace_lambda( 5321 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5322 ) -> t.Optional[exp.Expression]: 5323 if not node: 5324 return node 5325 5326 for column in node.find_all(exp.Column): 5327 if column.parts[0].name in lambda_variables: 5328 dot_or_id = column.to_dot() if column.table else column.this 5329 parent = column.parent 5330 5331 while isinstance(parent, exp.Dot): 5332 if not isinstance(parent.parent, exp.Dot): 5333 parent.replace(dot_or_id) 5334 break 5335 parent = parent.parent 5336 else: 5337 if column is node: 5338 node = dot_or_id 5339 else: 5340 column.replace(dot_or_id) 5341 return node 5342 5343 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5344 return [ 5345 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5346 for value in values 5347 if value 5348 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
947 def __init__( 948 self, 949 error_level: t.Optional[ErrorLevel] = None, 950 error_message_context: int = 100, 951 max_errors: int = 3, 952 ): 953 self.error_level = error_level or ErrorLevel.IMMEDIATE 954 self.error_message_context = error_message_context 955 self.max_errors = max_errors 956 self._tokenizer = self.TOKENIZER_CLASS() 957 self.reset()
969 def parse( 970 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 971 ) -> t.List[t.Optional[exp.Expression]]: 972 """ 973 Parses a list of tokens and returns a list of syntax trees, one tree 974 per parsed SQL statement. 975 976 Args: 977 raw_tokens: The list of tokens. 978 sql: The original SQL string, used to produce helpful debug messages. 979 980 Returns: 981 The list of the produced syntax trees. 982 """ 983 return self._parse( 984 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 985 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
987 def parse_into( 988 self, 989 expression_types: exp.IntoType, 990 raw_tokens: t.List[Token], 991 sql: t.Optional[str] = None, 992 ) -> t.List[t.Optional[exp.Expression]]: 993 """ 994 Parses a list of tokens into a given Expression type. If a collection of Expression 995 types is given instead, this method will try to parse the token list into each one 996 of them, stopping at the first for which the parsing succeeds. 997 998 Args: 999 expression_types: The expression type(s) to try and parse the token list into. 1000 raw_tokens: The list of tokens. 1001 sql: The original SQL string, used to produce helpful debug messages. 1002 1003 Returns: 1004 The target Expression. 1005 """ 1006 errors = [] 1007 for expression_type in ensure_list(expression_types): 1008 parser = self.EXPRESSION_PARSERS.get(expression_type) 1009 if not parser: 1010 raise TypeError(f"No parser registered for {expression_type}") 1011 1012 try: 1013 return self._parse(parser, raw_tokens, sql) 1014 except ParseError as e: 1015 e.errors[0]["into_expression"] = expression_type 1016 errors.append(e) 1017 1018 raise ParseError( 1019 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1020 errors=merge_errors(errors), 1021 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1058 def check_errors(self) -> None: 1059 """Logs or raises any found errors, depending on the chosen error level setting.""" 1060 if self.error_level == ErrorLevel.WARN: 1061 for error in self.errors: 1062 logger.error(str(error)) 1063 elif self.error_level == ErrorLevel.RAISE and self.errors: 1064 raise ParseError( 1065 concat_messages(self.errors, self.max_errors), 1066 errors=merge_errors(self.errors), 1067 )
Logs or raises any found errors, depending on the chosen error level setting.
1069 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1070 """ 1071 Appends an error in the list of recorded errors or raises it, depending on the chosen 1072 error level setting. 1073 """ 1074 token = token or self._curr or self._prev or Token.string("") 1075 start = token.start 1076 end = token.end + 1 1077 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1078 highlight = self.sql[start:end] 1079 end_context = self.sql[end : end + self.error_message_context] 1080 1081 error = ParseError.new( 1082 f"{message}. Line {token.line}, Col: {token.col}.\n" 1083 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1084 description=message, 1085 line=token.line, 1086 col=token.col, 1087 start_context=start_context, 1088 highlight=highlight, 1089 end_context=end_context, 1090 ) 1091 1092 if self.error_level == ErrorLevel.IMMEDIATE: 1093 raise error 1094 1095 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1097 def expression( 1098 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1099 ) -> E: 1100 """ 1101 Creates a new, validated Expression. 1102 1103 Args: 1104 exp_class: The expression class to instantiate. 1105 comments: An optional list of comments to attach to the expression. 1106 kwargs: The arguments to set for the expression along with their respective values. 1107 1108 Returns: 1109 The target expression. 1110 """ 1111 instance = exp_class(**kwargs) 1112 instance.add_comments(comments) if comments else self._add_comments(instance) 1113 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1120 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1121 """ 1122 Validates an Expression, making sure that all its mandatory arguments are set. 1123 1124 Args: 1125 expression: The expression to validate. 1126 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1127 1128 Returns: 1129 The validated expression. 1130 """ 1131 if self.error_level != ErrorLevel.IGNORE: 1132 for error_message in expression.error_messages(args): 1133 self.raise_error(error_message) 1134 1135 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.