sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.BIGDECIMAL, 182 TokenType.UUID, 183 TokenType.GEOGRAPHY, 184 TokenType.GEOMETRY, 185 TokenType.HLLSKETCH, 186 TokenType.HSTORE, 187 TokenType.PSEUDO_TYPE, 188 TokenType.SUPER, 189 TokenType.SERIAL, 190 TokenType.SMALLSERIAL, 191 TokenType.BIGSERIAL, 192 TokenType.XML, 193 TokenType.YEAR, 194 TokenType.UNIQUEIDENTIFIER, 195 TokenType.USERDEFINED, 196 TokenType.MONEY, 197 TokenType.SMALLMONEY, 198 TokenType.ROWVERSION, 199 TokenType.IMAGE, 200 TokenType.VARIANT, 201 TokenType.OBJECT, 202 TokenType.OBJECT_IDENTIFIER, 203 TokenType.INET, 204 TokenType.IPADDRESS, 205 TokenType.IPPREFIX, 206 TokenType.UNKNOWN, 207 TokenType.NULL, 208 *ENUM_TYPE_TOKENS, 209 *NESTED_TYPE_TOKENS, 210 } 211 212 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 213 TokenType.BIGINT: TokenType.UBIGINT, 214 TokenType.INT: TokenType.UINT, 215 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 216 TokenType.SMALLINT: TokenType.USMALLINT, 217 TokenType.TINYINT: TokenType.UTINYINT, 218 } 219 220 SUBQUERY_PREDICATES = { 221 TokenType.ANY: exp.Any, 222 TokenType.ALL: exp.All, 223 TokenType.EXISTS: exp.Exists, 224 TokenType.SOME: exp.Any, 225 } 226 227 RESERVED_KEYWORDS = { 228 *Tokenizer.SINGLE_TOKENS.values(), 229 TokenType.SELECT, 230 } 231 232 DB_CREATABLES = { 233 TokenType.DATABASE, 234 TokenType.SCHEMA, 235 TokenType.TABLE, 236 TokenType.VIEW, 237 TokenType.DICTIONARY, 238 } 239 240 CREATABLES = { 241 TokenType.COLUMN, 242 TokenType.FUNCTION, 243 TokenType.INDEX, 244 TokenType.PROCEDURE, 245 *DB_CREATABLES, 246 } 247 248 # Tokens that can represent identifiers 249 ID_VAR_TOKENS = { 250 TokenType.VAR, 251 TokenType.ANTI, 252 TokenType.APPLY, 253 TokenType.ASC, 254 TokenType.AUTO_INCREMENT, 255 TokenType.BEGIN, 256 TokenType.CACHE, 257 TokenType.CASE, 258 TokenType.COLLATE, 259 TokenType.COMMAND, 260 TokenType.COMMENT, 261 TokenType.COMMIT, 262 TokenType.CONSTRAINT, 263 TokenType.DEFAULT, 264 TokenType.DELETE, 265 TokenType.DESC, 266 TokenType.DESCRIBE, 267 TokenType.DICTIONARY, 268 TokenType.DIV, 269 TokenType.END, 270 TokenType.EXECUTE, 271 TokenType.ESCAPE, 272 TokenType.FALSE, 273 TokenType.FIRST, 274 TokenType.FILTER, 275 TokenType.FORMAT, 276 TokenType.FULL, 277 TokenType.IS, 278 TokenType.ISNULL, 279 TokenType.INTERVAL, 280 TokenType.KEEP, 281 TokenType.KILL, 282 TokenType.LEFT, 283 TokenType.LOAD, 284 TokenType.MERGE, 285 TokenType.NATURAL, 286 TokenType.NEXT, 287 TokenType.OFFSET, 288 TokenType.ORDINALITY, 289 TokenType.OVERLAPS, 290 TokenType.OVERWRITE, 291 TokenType.PARTITION, 292 TokenType.PERCENT, 293 TokenType.PIVOT, 294 TokenType.PRAGMA, 295 TokenType.RANGE, 296 TokenType.REFERENCES, 297 TokenType.RIGHT, 298 TokenType.ROW, 299 TokenType.ROWS, 300 TokenType.SEMI, 301 TokenType.SET, 302 TokenType.SETTINGS, 303 TokenType.SHOW, 304 TokenType.TEMPORARY, 305 TokenType.TOP, 306 TokenType.TRUE, 307 TokenType.UNIQUE, 308 TokenType.UNPIVOT, 309 TokenType.UPDATE, 310 TokenType.VOLATILE, 311 TokenType.WINDOW, 312 *CREATABLES, 313 *SUBQUERY_PREDICATES, 314 *TYPE_TOKENS, 315 *NO_PAREN_FUNCTIONS, 316 } 317 318 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 319 320 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 321 TokenType.ANTI, 322 TokenType.APPLY, 323 TokenType.ASOF, 324 TokenType.FULL, 325 TokenType.LEFT, 326 TokenType.LOCK, 327 TokenType.NATURAL, 328 TokenType.OFFSET, 329 TokenType.RIGHT, 330 TokenType.SEMI, 331 TokenType.WINDOW, 332 } 333 334 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 335 336 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 337 338 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 339 340 FUNC_TOKENS = { 341 TokenType.COMMAND, 342 TokenType.CURRENT_DATE, 343 TokenType.CURRENT_DATETIME, 344 TokenType.CURRENT_TIMESTAMP, 345 TokenType.CURRENT_TIME, 346 TokenType.CURRENT_USER, 347 TokenType.FILTER, 348 TokenType.FIRST, 349 TokenType.FORMAT, 350 TokenType.GLOB, 351 TokenType.IDENTIFIER, 352 TokenType.INDEX, 353 TokenType.ISNULL, 354 TokenType.ILIKE, 355 TokenType.INSERT, 356 TokenType.LIKE, 357 TokenType.MERGE, 358 TokenType.OFFSET, 359 TokenType.PRIMARY_KEY, 360 TokenType.RANGE, 361 TokenType.REPLACE, 362 TokenType.RLIKE, 363 TokenType.ROW, 364 TokenType.UNNEST, 365 TokenType.VAR, 366 TokenType.LEFT, 367 TokenType.RIGHT, 368 TokenType.DATE, 369 TokenType.DATETIME, 370 TokenType.TABLE, 371 TokenType.TIMESTAMP, 372 TokenType.TIMESTAMPTZ, 373 TokenType.WINDOW, 374 TokenType.XOR, 375 *TYPE_TOKENS, 376 *SUBQUERY_PREDICATES, 377 } 378 379 CONJUNCTION = { 380 TokenType.AND: exp.And, 381 TokenType.OR: exp.Or, 382 } 383 384 EQUALITY = { 385 TokenType.EQ: exp.EQ, 386 TokenType.NEQ: exp.NEQ, 387 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 388 } 389 390 COMPARISON = { 391 TokenType.GT: exp.GT, 392 TokenType.GTE: exp.GTE, 393 TokenType.LT: exp.LT, 394 TokenType.LTE: exp.LTE, 395 } 396 397 BITWISE = { 398 TokenType.AMP: exp.BitwiseAnd, 399 TokenType.CARET: exp.BitwiseXor, 400 TokenType.PIPE: exp.BitwiseOr, 401 TokenType.DPIPE: exp.DPipe, 402 } 403 404 TERM = { 405 TokenType.DASH: exp.Sub, 406 TokenType.PLUS: exp.Add, 407 TokenType.MOD: exp.Mod, 408 TokenType.COLLATE: exp.Collate, 409 } 410 411 FACTOR = { 412 TokenType.DIV: exp.IntDiv, 413 TokenType.LR_ARROW: exp.Distance, 414 TokenType.SLASH: exp.Div, 415 TokenType.STAR: exp.Mul, 416 } 417 418 TIMES = { 419 TokenType.TIME, 420 TokenType.TIMETZ, 421 } 422 423 TIMESTAMPS = { 424 TokenType.TIMESTAMP, 425 TokenType.TIMESTAMPTZ, 426 TokenType.TIMESTAMPLTZ, 427 *TIMES, 428 } 429 430 SET_OPERATIONS = { 431 TokenType.UNION, 432 TokenType.INTERSECT, 433 TokenType.EXCEPT, 434 } 435 436 JOIN_METHODS = { 437 TokenType.NATURAL, 438 TokenType.ASOF, 439 } 440 441 JOIN_SIDES = { 442 TokenType.LEFT, 443 TokenType.RIGHT, 444 TokenType.FULL, 445 } 446 447 JOIN_KINDS = { 448 TokenType.INNER, 449 TokenType.OUTER, 450 TokenType.CROSS, 451 TokenType.SEMI, 452 TokenType.ANTI, 453 } 454 455 JOIN_HINTS: t.Set[str] = set() 456 457 LAMBDAS = { 458 TokenType.ARROW: lambda self, expressions: self.expression( 459 exp.Lambda, 460 this=self._replace_lambda( 461 self._parse_conjunction(), 462 {node.name for node in expressions}, 463 ), 464 expressions=expressions, 465 ), 466 TokenType.FARROW: lambda self, expressions: self.expression( 467 exp.Kwarg, 468 this=exp.var(expressions[0].name), 469 expression=self._parse_conjunction(), 470 ), 471 } 472 473 COLUMN_OPERATORS = { 474 TokenType.DOT: None, 475 TokenType.DCOLON: lambda self, this, to: self.expression( 476 exp.Cast if self.STRICT_CAST else exp.TryCast, 477 this=this, 478 to=to, 479 ), 480 TokenType.ARROW: lambda self, this, path: self.expression( 481 exp.JSONExtract, 482 this=this, 483 expression=path, 484 ), 485 TokenType.DARROW: lambda self, this, path: self.expression( 486 exp.JSONExtractScalar, 487 this=this, 488 expression=path, 489 ), 490 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 491 exp.JSONBExtract, 492 this=this, 493 expression=path, 494 ), 495 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 496 exp.JSONBExtractScalar, 497 this=this, 498 expression=path, 499 ), 500 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 501 exp.JSONBContains, 502 this=this, 503 expression=key, 504 ), 505 } 506 507 EXPRESSION_PARSERS = { 508 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 509 exp.Column: lambda self: self._parse_column(), 510 exp.Condition: lambda self: self._parse_conjunction(), 511 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 512 exp.Expression: lambda self: self._parse_statement(), 513 exp.From: lambda self: self._parse_from(), 514 exp.Group: lambda self: self._parse_group(), 515 exp.Having: lambda self: self._parse_having(), 516 exp.Identifier: lambda self: self._parse_id_var(), 517 exp.Join: lambda self: self._parse_join(), 518 exp.Lambda: lambda self: self._parse_lambda(), 519 exp.Lateral: lambda self: self._parse_lateral(), 520 exp.Limit: lambda self: self._parse_limit(), 521 exp.Offset: lambda self: self._parse_offset(), 522 exp.Order: lambda self: self._parse_order(), 523 exp.Ordered: lambda self: self._parse_ordered(), 524 exp.Properties: lambda self: self._parse_properties(), 525 exp.Qualify: lambda self: self._parse_qualify(), 526 exp.Returning: lambda self: self._parse_returning(), 527 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 528 exp.Table: lambda self: self._parse_table_parts(), 529 exp.TableAlias: lambda self: self._parse_table_alias(), 530 exp.Where: lambda self: self._parse_where(), 531 exp.Window: lambda self: self._parse_named_window(), 532 exp.With: lambda self: self._parse_with(), 533 "JOIN_TYPE": lambda self: self._parse_join_parts(), 534 } 535 536 STATEMENT_PARSERS = { 537 TokenType.ALTER: lambda self: self._parse_alter(), 538 TokenType.BEGIN: lambda self: self._parse_transaction(), 539 TokenType.CACHE: lambda self: self._parse_cache(), 540 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 541 TokenType.COMMENT: lambda self: self._parse_comment(), 542 TokenType.CREATE: lambda self: self._parse_create(), 543 TokenType.DELETE: lambda self: self._parse_delete(), 544 TokenType.DESC: lambda self: self._parse_describe(), 545 TokenType.DESCRIBE: lambda self: self._parse_describe(), 546 TokenType.DROP: lambda self: self._parse_drop(), 547 TokenType.INSERT: lambda self: self._parse_insert(), 548 TokenType.KILL: lambda self: self._parse_kill(), 549 TokenType.LOAD: lambda self: self._parse_load(), 550 TokenType.MERGE: lambda self: self._parse_merge(), 551 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 552 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 553 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 554 TokenType.SET: lambda self: self._parse_set(), 555 TokenType.UNCACHE: lambda self: self._parse_uncache(), 556 TokenType.UPDATE: lambda self: self._parse_update(), 557 TokenType.USE: lambda self: self.expression( 558 exp.Use, 559 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 560 and exp.var(self._prev.text), 561 this=self._parse_table(schema=False), 562 ), 563 } 564 565 UNARY_PARSERS = { 566 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 567 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 568 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 569 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 570 } 571 572 PRIMARY_PARSERS = { 573 TokenType.STRING: lambda self, token: self.expression( 574 exp.Literal, this=token.text, is_string=True 575 ), 576 TokenType.NUMBER: lambda self, token: self.expression( 577 exp.Literal, this=token.text, is_string=False 578 ), 579 TokenType.STAR: lambda self, _: self.expression( 580 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 581 ), 582 TokenType.NULL: lambda self, _: self.expression(exp.Null), 583 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 584 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 585 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 586 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 587 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 588 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 589 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 590 exp.National, this=token.text 591 ), 592 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 593 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 594 } 595 596 PLACEHOLDER_PARSERS = { 597 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 598 TokenType.PARAMETER: lambda self: self._parse_parameter(), 599 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 600 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 601 else None, 602 } 603 604 RANGE_PARSERS = { 605 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 606 TokenType.GLOB: binary_range_parser(exp.Glob), 607 TokenType.ILIKE: binary_range_parser(exp.ILike), 608 TokenType.IN: lambda self, this: self._parse_in(this), 609 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 610 TokenType.IS: lambda self, this: self._parse_is(this), 611 TokenType.LIKE: binary_range_parser(exp.Like), 612 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 613 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 614 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 615 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 616 } 617 618 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 619 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 620 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 621 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 622 "CHARACTER SET": lambda self: self._parse_character_set(), 623 "CHECKSUM": lambda self: self._parse_checksum(), 624 "CLUSTER BY": lambda self: self._parse_cluster(), 625 "CLUSTERED": lambda self: self._parse_clustered_by(), 626 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 627 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 628 "COPY": lambda self: self._parse_copy_property(), 629 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 630 "DEFINER": lambda self: self._parse_definer(), 631 "DETERMINISTIC": lambda self: self.expression( 632 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 633 ), 634 "DISTKEY": lambda self: self._parse_distkey(), 635 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 636 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 637 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 638 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 639 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 640 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 641 "FREESPACE": lambda self: self._parse_freespace(), 642 "HEAP": lambda self: self.expression(exp.HeapProperty), 643 "IMMUTABLE": lambda self: self.expression( 644 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 645 ), 646 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 647 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 648 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 649 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 650 "LIKE": lambda self: self._parse_create_like(), 651 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 652 "LOCK": lambda self: self._parse_locking(), 653 "LOCKING": lambda self: self._parse_locking(), 654 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 655 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 656 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 657 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 658 "NO": lambda self: self._parse_no_property(), 659 "ON": lambda self: self._parse_on_property(), 660 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 661 "PARTITION BY": lambda self: self._parse_partitioned_by(), 662 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 663 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 664 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 665 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 666 "RETURNS": lambda self: self._parse_returns(), 667 "ROW": lambda self: self._parse_row(), 668 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 669 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 670 "SETTINGS": lambda self: self.expression( 671 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 672 ), 673 "SORTKEY": lambda self: self._parse_sortkey(), 674 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 675 "STABLE": lambda self: self.expression( 676 exp.StabilityProperty, this=exp.Literal.string("STABLE") 677 ), 678 "STORED": lambda self: self._parse_stored(), 679 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 680 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 681 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 682 "TO": lambda self: self._parse_to_table(), 683 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 684 "TTL": lambda self: self._parse_ttl(), 685 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 686 "VOLATILE": lambda self: self._parse_volatile_property(), 687 "WITH": lambda self: self._parse_with_property(), 688 } 689 690 CONSTRAINT_PARSERS = { 691 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 692 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 693 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 694 "CHARACTER SET": lambda self: self.expression( 695 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 696 ), 697 "CHECK": lambda self: self.expression( 698 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 699 ), 700 "COLLATE": lambda self: self.expression( 701 exp.CollateColumnConstraint, this=self._parse_var() 702 ), 703 "COMMENT": lambda self: self.expression( 704 exp.CommentColumnConstraint, this=self._parse_string() 705 ), 706 "COMPRESS": lambda self: self._parse_compress(), 707 "CLUSTERED": lambda self: self.expression( 708 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 709 ), 710 "NONCLUSTERED": lambda self: self.expression( 711 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 712 ), 713 "DEFAULT": lambda self: self.expression( 714 exp.DefaultColumnConstraint, this=self._parse_bitwise() 715 ), 716 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 717 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 718 "FORMAT": lambda self: self.expression( 719 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 720 ), 721 "GENERATED": lambda self: self._parse_generated_as_identity(), 722 "IDENTITY": lambda self: self._parse_auto_increment(), 723 "INLINE": lambda self: self._parse_inline(), 724 "LIKE": lambda self: self._parse_create_like(), 725 "NOT": lambda self: self._parse_not_constraint(), 726 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 727 "ON": lambda self: ( 728 self._match(TokenType.UPDATE) 729 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 730 ) 731 or self.expression(exp.OnProperty, this=self._parse_id_var()), 732 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 733 "PRIMARY KEY": lambda self: self._parse_primary_key(), 734 "REFERENCES": lambda self: self._parse_references(match=False), 735 "TITLE": lambda self: self.expression( 736 exp.TitleColumnConstraint, this=self._parse_var_or_string() 737 ), 738 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 739 "UNIQUE": lambda self: self._parse_unique(), 740 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 741 "WITH": lambda self: self.expression( 742 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 743 ), 744 } 745 746 ALTER_PARSERS = { 747 "ADD": lambda self: self._parse_alter_table_add(), 748 "ALTER": lambda self: self._parse_alter_table_alter(), 749 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 750 "DROP": lambda self: self._parse_alter_table_drop(), 751 "RENAME": lambda self: self._parse_alter_table_rename(), 752 } 753 754 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 755 756 NO_PAREN_FUNCTION_PARSERS = { 757 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 758 "CASE": lambda self: self._parse_case(), 759 "IF": lambda self: self._parse_if(), 760 "NEXT": lambda self: self._parse_next_value_for(), 761 } 762 763 INVALID_FUNC_NAME_TOKENS = { 764 TokenType.IDENTIFIER, 765 TokenType.STRING, 766 } 767 768 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 769 770 FUNCTION_PARSERS = { 771 "ANY_VALUE": lambda self: self._parse_any_value(), 772 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 773 "CONCAT": lambda self: self._parse_concat(), 774 "CONCAT_WS": lambda self: self._parse_concat_ws(), 775 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 776 "DECODE": lambda self: self._parse_decode(), 777 "EXTRACT": lambda self: self._parse_extract(), 778 "JSON_OBJECT": lambda self: self._parse_json_object(), 779 "LOG": lambda self: self._parse_logarithm(), 780 "MATCH": lambda self: self._parse_match_against(), 781 "OPENJSON": lambda self: self._parse_open_json(), 782 "POSITION": lambda self: self._parse_position(), 783 "SAFE_CAST": lambda self: self._parse_cast(False), 784 "STRING_AGG": lambda self: self._parse_string_agg(), 785 "SUBSTRING": lambda self: self._parse_substring(), 786 "TRIM": lambda self: self._parse_trim(), 787 "TRY_CAST": lambda self: self._parse_cast(False), 788 "TRY_CONVERT": lambda self: self._parse_convert(False), 789 } 790 791 QUERY_MODIFIER_PARSERS = { 792 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 793 TokenType.WHERE: lambda self: ("where", self._parse_where()), 794 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 795 TokenType.HAVING: lambda self: ("having", self._parse_having()), 796 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 797 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 798 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 799 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 800 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 801 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 802 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 803 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 804 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 805 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 806 TokenType.CLUSTER_BY: lambda self: ( 807 "cluster", 808 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 809 ), 810 TokenType.DISTRIBUTE_BY: lambda self: ( 811 "distribute", 812 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 813 ), 814 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 815 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 816 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 817 } 818 819 SET_PARSERS = { 820 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 821 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 822 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 823 "TRANSACTION": lambda self: self._parse_set_transaction(), 824 } 825 826 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 827 828 TYPE_LITERAL_PARSERS = { 829 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 830 } 831 832 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 833 834 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 835 836 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 837 838 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 839 TRANSACTION_CHARACTERISTICS = { 840 "ISOLATION LEVEL REPEATABLE READ", 841 "ISOLATION LEVEL READ COMMITTED", 842 "ISOLATION LEVEL READ UNCOMMITTED", 843 "ISOLATION LEVEL SERIALIZABLE", 844 "READ WRITE", 845 "READ ONLY", 846 } 847 848 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 849 850 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 851 852 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 853 854 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 855 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 856 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 857 858 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 859 860 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 861 862 DISTINCT_TOKENS = {TokenType.DISTINCT} 863 864 NULL_TOKENS = {TokenType.NULL} 865 866 STRICT_CAST = True 867 868 # A NULL arg in CONCAT yields NULL by default 869 CONCAT_NULL_OUTPUTS_STRING = False 870 871 PREFIXED_PIVOT_COLUMNS = False 872 IDENTIFY_PIVOT_STRINGS = False 873 874 LOG_BASE_FIRST = True 875 LOG_DEFAULTS_TO_LN = False 876 877 # Whether or not ADD is present for each column added by ALTER TABLE 878 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 879 880 # Whether or not the table sample clause expects CSV syntax 881 TABLESAMPLE_CSV = False 882 883 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments. 884 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 885 886 __slots__ = ( 887 "error_level", 888 "error_message_context", 889 "max_errors", 890 "sql", 891 "errors", 892 "_tokens", 893 "_index", 894 "_curr", 895 "_next", 896 "_prev", 897 "_prev_comments", 898 "_tokenizer", 899 ) 900 901 # Autofilled 902 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 903 INDEX_OFFSET: int = 0 904 UNNEST_COLUMN_ONLY: bool = False 905 ALIAS_POST_TABLESAMPLE: bool = False 906 STRICT_STRING_CONCAT = False 907 SUPPORTS_USER_DEFINED_TYPES = True 908 NORMALIZE_FUNCTIONS = "upper" 909 NULL_ORDERING: str = "nulls_are_small" 910 SHOW_TRIE: t.Dict = {} 911 SET_TRIE: t.Dict = {} 912 FORMAT_MAPPING: t.Dict[str, str] = {} 913 FORMAT_TRIE: t.Dict = {} 914 TIME_MAPPING: t.Dict[str, str] = {} 915 TIME_TRIE: t.Dict = {} 916 917 def __init__( 918 self, 919 error_level: t.Optional[ErrorLevel] = None, 920 error_message_context: int = 100, 921 max_errors: int = 3, 922 ): 923 self.error_level = error_level or ErrorLevel.IMMEDIATE 924 self.error_message_context = error_message_context 925 self.max_errors = max_errors 926 self._tokenizer = self.TOKENIZER_CLASS() 927 self.reset() 928 929 def reset(self): 930 self.sql = "" 931 self.errors = [] 932 self._tokens = [] 933 self._index = 0 934 self._curr = None 935 self._next = None 936 self._prev = None 937 self._prev_comments = None 938 939 def parse( 940 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 941 ) -> t.List[t.Optional[exp.Expression]]: 942 """ 943 Parses a list of tokens and returns a list of syntax trees, one tree 944 per parsed SQL statement. 945 946 Args: 947 raw_tokens: The list of tokens. 948 sql: The original SQL string, used to produce helpful debug messages. 949 950 Returns: 951 The list of the produced syntax trees. 952 """ 953 return self._parse( 954 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 955 ) 956 957 def parse_into( 958 self, 959 expression_types: exp.IntoType, 960 raw_tokens: t.List[Token], 961 sql: t.Optional[str] = None, 962 ) -> t.List[t.Optional[exp.Expression]]: 963 """ 964 Parses a list of tokens into a given Expression type. If a collection of Expression 965 types is given instead, this method will try to parse the token list into each one 966 of them, stopping at the first for which the parsing succeeds. 967 968 Args: 969 expression_types: The expression type(s) to try and parse the token list into. 970 raw_tokens: The list of tokens. 971 sql: The original SQL string, used to produce helpful debug messages. 972 973 Returns: 974 The target Expression. 975 """ 976 errors = [] 977 for expression_type in ensure_list(expression_types): 978 parser = self.EXPRESSION_PARSERS.get(expression_type) 979 if not parser: 980 raise TypeError(f"No parser registered for {expression_type}") 981 982 try: 983 return self._parse(parser, raw_tokens, sql) 984 except ParseError as e: 985 e.errors[0]["into_expression"] = expression_type 986 errors.append(e) 987 988 raise ParseError( 989 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 990 errors=merge_errors(errors), 991 ) from errors[-1] 992 993 def _parse( 994 self, 995 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 996 raw_tokens: t.List[Token], 997 sql: t.Optional[str] = None, 998 ) -> t.List[t.Optional[exp.Expression]]: 999 self.reset() 1000 self.sql = sql or "" 1001 1002 total = len(raw_tokens) 1003 chunks: t.List[t.List[Token]] = [[]] 1004 1005 for i, token in enumerate(raw_tokens): 1006 if token.token_type == TokenType.SEMICOLON: 1007 if i < total - 1: 1008 chunks.append([]) 1009 else: 1010 chunks[-1].append(token) 1011 1012 expressions = [] 1013 1014 for tokens in chunks: 1015 self._index = -1 1016 self._tokens = tokens 1017 self._advance() 1018 1019 expressions.append(parse_method(self)) 1020 1021 if self._index < len(self._tokens): 1022 self.raise_error("Invalid expression / Unexpected token") 1023 1024 self.check_errors() 1025 1026 return expressions 1027 1028 def check_errors(self) -> None: 1029 """Logs or raises any found errors, depending on the chosen error level setting.""" 1030 if self.error_level == ErrorLevel.WARN: 1031 for error in self.errors: 1032 logger.error(str(error)) 1033 elif self.error_level == ErrorLevel.RAISE and self.errors: 1034 raise ParseError( 1035 concat_messages(self.errors, self.max_errors), 1036 errors=merge_errors(self.errors), 1037 ) 1038 1039 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1040 """ 1041 Appends an error in the list of recorded errors or raises it, depending on the chosen 1042 error level setting. 1043 """ 1044 token = token or self._curr or self._prev or Token.string("") 1045 start = token.start 1046 end = token.end + 1 1047 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1048 highlight = self.sql[start:end] 1049 end_context = self.sql[end : end + self.error_message_context] 1050 1051 error = ParseError.new( 1052 f"{message}. Line {token.line}, Col: {token.col}.\n" 1053 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1054 description=message, 1055 line=token.line, 1056 col=token.col, 1057 start_context=start_context, 1058 highlight=highlight, 1059 end_context=end_context, 1060 ) 1061 1062 if self.error_level == ErrorLevel.IMMEDIATE: 1063 raise error 1064 1065 self.errors.append(error) 1066 1067 def expression( 1068 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1069 ) -> E: 1070 """ 1071 Creates a new, validated Expression. 1072 1073 Args: 1074 exp_class: The expression class to instantiate. 1075 comments: An optional list of comments to attach to the expression. 1076 kwargs: The arguments to set for the expression along with their respective values. 1077 1078 Returns: 1079 The target expression. 1080 """ 1081 instance = exp_class(**kwargs) 1082 instance.add_comments(comments) if comments else self._add_comments(instance) 1083 return self.validate_expression(instance) 1084 1085 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1086 if expression and self._prev_comments: 1087 expression.add_comments(self._prev_comments) 1088 self._prev_comments = None 1089 1090 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1091 """ 1092 Validates an Expression, making sure that all its mandatory arguments are set. 1093 1094 Args: 1095 expression: The expression to validate. 1096 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1097 1098 Returns: 1099 The validated expression. 1100 """ 1101 if self.error_level != ErrorLevel.IGNORE: 1102 for error_message in expression.error_messages(args): 1103 self.raise_error(error_message) 1104 1105 return expression 1106 1107 def _find_sql(self, start: Token, end: Token) -> str: 1108 return self.sql[start.start : end.end + 1] 1109 1110 def _advance(self, times: int = 1) -> None: 1111 self._index += times 1112 self._curr = seq_get(self._tokens, self._index) 1113 self._next = seq_get(self._tokens, self._index + 1) 1114 1115 if self._index > 0: 1116 self._prev = self._tokens[self._index - 1] 1117 self._prev_comments = self._prev.comments 1118 else: 1119 self._prev = None 1120 self._prev_comments = None 1121 1122 def _retreat(self, index: int) -> None: 1123 if index != self._index: 1124 self._advance(index - self._index) 1125 1126 def _parse_command(self) -> exp.Command: 1127 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1128 1129 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1130 start = self._prev 1131 exists = self._parse_exists() if allow_exists else None 1132 1133 self._match(TokenType.ON) 1134 1135 kind = self._match_set(self.CREATABLES) and self._prev 1136 if not kind: 1137 return self._parse_as_command(start) 1138 1139 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1140 this = self._parse_user_defined_function(kind=kind.token_type) 1141 elif kind.token_type == TokenType.TABLE: 1142 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1143 elif kind.token_type == TokenType.COLUMN: 1144 this = self._parse_column() 1145 else: 1146 this = self._parse_id_var() 1147 1148 self._match(TokenType.IS) 1149 1150 return self.expression( 1151 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1152 ) 1153 1154 def _parse_to_table( 1155 self, 1156 ) -> exp.ToTableProperty: 1157 table = self._parse_table_parts(schema=True) 1158 return self.expression(exp.ToTableProperty, this=table) 1159 1160 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1161 def _parse_ttl(self) -> exp.Expression: 1162 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1163 this = self._parse_bitwise() 1164 1165 if self._match_text_seq("DELETE"): 1166 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1167 if self._match_text_seq("RECOMPRESS"): 1168 return self.expression( 1169 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1170 ) 1171 if self._match_text_seq("TO", "DISK"): 1172 return self.expression( 1173 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1174 ) 1175 if self._match_text_seq("TO", "VOLUME"): 1176 return self.expression( 1177 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1178 ) 1179 1180 return this 1181 1182 expressions = self._parse_csv(_parse_ttl_action) 1183 where = self._parse_where() 1184 group = self._parse_group() 1185 1186 aggregates = None 1187 if group and self._match(TokenType.SET): 1188 aggregates = self._parse_csv(self._parse_set_item) 1189 1190 return self.expression( 1191 exp.MergeTreeTTL, 1192 expressions=expressions, 1193 where=where, 1194 group=group, 1195 aggregates=aggregates, 1196 ) 1197 1198 def _parse_statement(self) -> t.Optional[exp.Expression]: 1199 if self._curr is None: 1200 return None 1201 1202 if self._match_set(self.STATEMENT_PARSERS): 1203 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1204 1205 if self._match_set(Tokenizer.COMMANDS): 1206 return self._parse_command() 1207 1208 expression = self._parse_expression() 1209 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1210 return self._parse_query_modifiers(expression) 1211 1212 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1213 start = self._prev 1214 temporary = self._match(TokenType.TEMPORARY) 1215 materialized = self._match_text_seq("MATERIALIZED") 1216 1217 kind = self._match_set(self.CREATABLES) and self._prev.text 1218 if not kind: 1219 return self._parse_as_command(start) 1220 1221 return self.expression( 1222 exp.Drop, 1223 comments=start.comments, 1224 exists=exists or self._parse_exists(), 1225 this=self._parse_table(schema=True), 1226 kind=kind, 1227 temporary=temporary, 1228 materialized=materialized, 1229 cascade=self._match_text_seq("CASCADE"), 1230 constraints=self._match_text_seq("CONSTRAINTS"), 1231 purge=self._match_text_seq("PURGE"), 1232 ) 1233 1234 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1235 return ( 1236 self._match_text_seq("IF") 1237 and (not not_ or self._match(TokenType.NOT)) 1238 and self._match(TokenType.EXISTS) 1239 ) 1240 1241 def _parse_create(self) -> exp.Create | exp.Command: 1242 # Note: this can't be None because we've matched a statement parser 1243 start = self._prev 1244 comments = self._prev_comments 1245 1246 replace = start.text.upper() == "REPLACE" or self._match_pair( 1247 TokenType.OR, TokenType.REPLACE 1248 ) 1249 unique = self._match(TokenType.UNIQUE) 1250 1251 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1252 self._advance() 1253 1254 properties = None 1255 create_token = self._match_set(self.CREATABLES) and self._prev 1256 1257 if not create_token: 1258 # exp.Properties.Location.POST_CREATE 1259 properties = self._parse_properties() 1260 create_token = self._match_set(self.CREATABLES) and self._prev 1261 1262 if not properties or not create_token: 1263 return self._parse_as_command(start) 1264 1265 exists = self._parse_exists(not_=True) 1266 this = None 1267 expression: t.Optional[exp.Expression] = None 1268 indexes = None 1269 no_schema_binding = None 1270 begin = None 1271 clone = None 1272 1273 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1274 nonlocal properties 1275 if properties and temp_props: 1276 properties.expressions.extend(temp_props.expressions) 1277 elif temp_props: 1278 properties = temp_props 1279 1280 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1281 this = self._parse_user_defined_function(kind=create_token.token_type) 1282 1283 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1284 extend_props(self._parse_properties()) 1285 1286 self._match(TokenType.ALIAS) 1287 1288 if self._match(TokenType.COMMAND): 1289 expression = self._parse_as_command(self._prev) 1290 else: 1291 begin = self._match(TokenType.BEGIN) 1292 return_ = self._match_text_seq("RETURN") 1293 1294 if self._match(TokenType.STRING, advance=False): 1295 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1296 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1297 expression = self._parse_string() 1298 extend_props(self._parse_properties()) 1299 else: 1300 expression = self._parse_statement() 1301 1302 if return_: 1303 expression = self.expression(exp.Return, this=expression) 1304 elif create_token.token_type == TokenType.INDEX: 1305 this = self._parse_index(index=self._parse_id_var()) 1306 elif create_token.token_type in self.DB_CREATABLES: 1307 table_parts = self._parse_table_parts(schema=True) 1308 1309 # exp.Properties.Location.POST_NAME 1310 self._match(TokenType.COMMA) 1311 extend_props(self._parse_properties(before=True)) 1312 1313 this = self._parse_schema(this=table_parts) 1314 1315 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1316 extend_props(self._parse_properties()) 1317 1318 self._match(TokenType.ALIAS) 1319 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1320 # exp.Properties.Location.POST_ALIAS 1321 extend_props(self._parse_properties()) 1322 1323 expression = self._parse_ddl_select() 1324 1325 if create_token.token_type == TokenType.TABLE: 1326 # exp.Properties.Location.POST_EXPRESSION 1327 extend_props(self._parse_properties()) 1328 1329 indexes = [] 1330 while True: 1331 index = self._parse_index() 1332 1333 # exp.Properties.Location.POST_INDEX 1334 extend_props(self._parse_properties()) 1335 1336 if not index: 1337 break 1338 else: 1339 self._match(TokenType.COMMA) 1340 indexes.append(index) 1341 elif create_token.token_type == TokenType.VIEW: 1342 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1343 no_schema_binding = True 1344 1345 shallow = self._match_text_seq("SHALLOW") 1346 1347 if self._match_text_seq("CLONE"): 1348 clone = self._parse_table(schema=True) 1349 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1350 clone_kind = ( 1351 self._match(TokenType.L_PAREN) 1352 and self._match_texts(self.CLONE_KINDS) 1353 and self._prev.text.upper() 1354 ) 1355 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1356 self._match(TokenType.R_PAREN) 1357 clone = self.expression( 1358 exp.Clone, 1359 this=clone, 1360 when=when, 1361 kind=clone_kind, 1362 shallow=shallow, 1363 expression=clone_expression, 1364 ) 1365 1366 return self.expression( 1367 exp.Create, 1368 comments=comments, 1369 this=this, 1370 kind=create_token.text, 1371 replace=replace, 1372 unique=unique, 1373 expression=expression, 1374 exists=exists, 1375 properties=properties, 1376 indexes=indexes, 1377 no_schema_binding=no_schema_binding, 1378 begin=begin, 1379 clone=clone, 1380 ) 1381 1382 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1383 # only used for teradata currently 1384 self._match(TokenType.COMMA) 1385 1386 kwargs = { 1387 "no": self._match_text_seq("NO"), 1388 "dual": self._match_text_seq("DUAL"), 1389 "before": self._match_text_seq("BEFORE"), 1390 "default": self._match_text_seq("DEFAULT"), 1391 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1392 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1393 "after": self._match_text_seq("AFTER"), 1394 "minimum": self._match_texts(("MIN", "MINIMUM")), 1395 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1396 } 1397 1398 if self._match_texts(self.PROPERTY_PARSERS): 1399 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1400 try: 1401 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1402 except TypeError: 1403 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1404 1405 return None 1406 1407 def _parse_property(self) -> t.Optional[exp.Expression]: 1408 if self._match_texts(self.PROPERTY_PARSERS): 1409 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1410 1411 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1412 return self._parse_character_set(default=True) 1413 1414 if self._match_text_seq("COMPOUND", "SORTKEY"): 1415 return self._parse_sortkey(compound=True) 1416 1417 if self._match_text_seq("SQL", "SECURITY"): 1418 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1419 1420 index = self._index 1421 key = self._parse_column() 1422 1423 if not self._match(TokenType.EQ): 1424 self._retreat(index) 1425 return None 1426 1427 return self.expression( 1428 exp.Property, 1429 this=key.to_dot() if isinstance(key, exp.Column) else key, 1430 value=self._parse_column() or self._parse_var(any_token=True), 1431 ) 1432 1433 def _parse_stored(self) -> exp.FileFormatProperty: 1434 self._match(TokenType.ALIAS) 1435 1436 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1437 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1438 1439 return self.expression( 1440 exp.FileFormatProperty, 1441 this=self.expression( 1442 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1443 ) 1444 if input_format or output_format 1445 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1446 ) 1447 1448 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1449 self._match(TokenType.EQ) 1450 self._match(TokenType.ALIAS) 1451 return self.expression(exp_class, this=self._parse_field()) 1452 1453 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1454 properties = [] 1455 while True: 1456 if before: 1457 prop = self._parse_property_before() 1458 else: 1459 prop = self._parse_property() 1460 1461 if not prop: 1462 break 1463 for p in ensure_list(prop): 1464 properties.append(p) 1465 1466 if properties: 1467 return self.expression(exp.Properties, expressions=properties) 1468 1469 return None 1470 1471 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1472 return self.expression( 1473 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1474 ) 1475 1476 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1477 if self._index >= 2: 1478 pre_volatile_token = self._tokens[self._index - 2] 1479 else: 1480 pre_volatile_token = None 1481 1482 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1483 return exp.VolatileProperty() 1484 1485 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1486 1487 def _parse_with_property( 1488 self, 1489 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1490 if self._match(TokenType.L_PAREN, advance=False): 1491 return self._parse_wrapped_csv(self._parse_property) 1492 1493 if self._match_text_seq("JOURNAL"): 1494 return self._parse_withjournaltable() 1495 1496 if self._match_text_seq("DATA"): 1497 return self._parse_withdata(no=False) 1498 elif self._match_text_seq("NO", "DATA"): 1499 return self._parse_withdata(no=True) 1500 1501 if not self._next: 1502 return None 1503 1504 return self._parse_withisolatedloading() 1505 1506 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1507 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1508 self._match(TokenType.EQ) 1509 1510 user = self._parse_id_var() 1511 self._match(TokenType.PARAMETER) 1512 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1513 1514 if not user or not host: 1515 return None 1516 1517 return exp.DefinerProperty(this=f"{user}@{host}") 1518 1519 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1520 self._match(TokenType.TABLE) 1521 self._match(TokenType.EQ) 1522 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1523 1524 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1525 return self.expression(exp.LogProperty, no=no) 1526 1527 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1528 return self.expression(exp.JournalProperty, **kwargs) 1529 1530 def _parse_checksum(self) -> exp.ChecksumProperty: 1531 self._match(TokenType.EQ) 1532 1533 on = None 1534 if self._match(TokenType.ON): 1535 on = True 1536 elif self._match_text_seq("OFF"): 1537 on = False 1538 1539 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1540 1541 def _parse_cluster(self) -> exp.Cluster: 1542 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1543 1544 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1545 self._match_text_seq("BY") 1546 1547 self._match_l_paren() 1548 expressions = self._parse_csv(self._parse_column) 1549 self._match_r_paren() 1550 1551 if self._match_text_seq("SORTED", "BY"): 1552 self._match_l_paren() 1553 sorted_by = self._parse_csv(self._parse_ordered) 1554 self._match_r_paren() 1555 else: 1556 sorted_by = None 1557 1558 self._match(TokenType.INTO) 1559 buckets = self._parse_number() 1560 self._match_text_seq("BUCKETS") 1561 1562 return self.expression( 1563 exp.ClusteredByProperty, 1564 expressions=expressions, 1565 sorted_by=sorted_by, 1566 buckets=buckets, 1567 ) 1568 1569 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1570 if not self._match_text_seq("GRANTS"): 1571 self._retreat(self._index - 1) 1572 return None 1573 1574 return self.expression(exp.CopyGrantsProperty) 1575 1576 def _parse_freespace(self) -> exp.FreespaceProperty: 1577 self._match(TokenType.EQ) 1578 return self.expression( 1579 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1580 ) 1581 1582 def _parse_mergeblockratio( 1583 self, no: bool = False, default: bool = False 1584 ) -> exp.MergeBlockRatioProperty: 1585 if self._match(TokenType.EQ): 1586 return self.expression( 1587 exp.MergeBlockRatioProperty, 1588 this=self._parse_number(), 1589 percent=self._match(TokenType.PERCENT), 1590 ) 1591 1592 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1593 1594 def _parse_datablocksize( 1595 self, 1596 default: t.Optional[bool] = None, 1597 minimum: t.Optional[bool] = None, 1598 maximum: t.Optional[bool] = None, 1599 ) -> exp.DataBlocksizeProperty: 1600 self._match(TokenType.EQ) 1601 size = self._parse_number() 1602 1603 units = None 1604 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1605 units = self._prev.text 1606 1607 return self.expression( 1608 exp.DataBlocksizeProperty, 1609 size=size, 1610 units=units, 1611 default=default, 1612 minimum=minimum, 1613 maximum=maximum, 1614 ) 1615 1616 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1617 self._match(TokenType.EQ) 1618 always = self._match_text_seq("ALWAYS") 1619 manual = self._match_text_seq("MANUAL") 1620 never = self._match_text_seq("NEVER") 1621 default = self._match_text_seq("DEFAULT") 1622 1623 autotemp = None 1624 if self._match_text_seq("AUTOTEMP"): 1625 autotemp = self._parse_schema() 1626 1627 return self.expression( 1628 exp.BlockCompressionProperty, 1629 always=always, 1630 manual=manual, 1631 never=never, 1632 default=default, 1633 autotemp=autotemp, 1634 ) 1635 1636 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1637 no = self._match_text_seq("NO") 1638 concurrent = self._match_text_seq("CONCURRENT") 1639 self._match_text_seq("ISOLATED", "LOADING") 1640 for_all = self._match_text_seq("FOR", "ALL") 1641 for_insert = self._match_text_seq("FOR", "INSERT") 1642 for_none = self._match_text_seq("FOR", "NONE") 1643 return self.expression( 1644 exp.IsolatedLoadingProperty, 1645 no=no, 1646 concurrent=concurrent, 1647 for_all=for_all, 1648 for_insert=for_insert, 1649 for_none=for_none, 1650 ) 1651 1652 def _parse_locking(self) -> exp.LockingProperty: 1653 if self._match(TokenType.TABLE): 1654 kind = "TABLE" 1655 elif self._match(TokenType.VIEW): 1656 kind = "VIEW" 1657 elif self._match(TokenType.ROW): 1658 kind = "ROW" 1659 elif self._match_text_seq("DATABASE"): 1660 kind = "DATABASE" 1661 else: 1662 kind = None 1663 1664 if kind in ("DATABASE", "TABLE", "VIEW"): 1665 this = self._parse_table_parts() 1666 else: 1667 this = None 1668 1669 if self._match(TokenType.FOR): 1670 for_or_in = "FOR" 1671 elif self._match(TokenType.IN): 1672 for_or_in = "IN" 1673 else: 1674 for_or_in = None 1675 1676 if self._match_text_seq("ACCESS"): 1677 lock_type = "ACCESS" 1678 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1679 lock_type = "EXCLUSIVE" 1680 elif self._match_text_seq("SHARE"): 1681 lock_type = "SHARE" 1682 elif self._match_text_seq("READ"): 1683 lock_type = "READ" 1684 elif self._match_text_seq("WRITE"): 1685 lock_type = "WRITE" 1686 elif self._match_text_seq("CHECKSUM"): 1687 lock_type = "CHECKSUM" 1688 else: 1689 lock_type = None 1690 1691 override = self._match_text_seq("OVERRIDE") 1692 1693 return self.expression( 1694 exp.LockingProperty, 1695 this=this, 1696 kind=kind, 1697 for_or_in=for_or_in, 1698 lock_type=lock_type, 1699 override=override, 1700 ) 1701 1702 def _parse_partition_by(self) -> t.List[exp.Expression]: 1703 if self._match(TokenType.PARTITION_BY): 1704 return self._parse_csv(self._parse_conjunction) 1705 return [] 1706 1707 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1708 self._match(TokenType.EQ) 1709 return self.expression( 1710 exp.PartitionedByProperty, 1711 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1712 ) 1713 1714 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1715 if self._match_text_seq("AND", "STATISTICS"): 1716 statistics = True 1717 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1718 statistics = False 1719 else: 1720 statistics = None 1721 1722 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1723 1724 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1725 if self._match_text_seq("PRIMARY", "INDEX"): 1726 return exp.NoPrimaryIndexProperty() 1727 return None 1728 1729 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1730 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1731 return exp.OnCommitProperty() 1732 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1733 return exp.OnCommitProperty(delete=True) 1734 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1735 1736 def _parse_distkey(self) -> exp.DistKeyProperty: 1737 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1738 1739 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1740 table = self._parse_table(schema=True) 1741 1742 options = [] 1743 while self._match_texts(("INCLUDING", "EXCLUDING")): 1744 this = self._prev.text.upper() 1745 1746 id_var = self._parse_id_var() 1747 if not id_var: 1748 return None 1749 1750 options.append( 1751 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1752 ) 1753 1754 return self.expression(exp.LikeProperty, this=table, expressions=options) 1755 1756 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1757 return self.expression( 1758 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1759 ) 1760 1761 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1762 self._match(TokenType.EQ) 1763 return self.expression( 1764 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1765 ) 1766 1767 def _parse_returns(self) -> exp.ReturnsProperty: 1768 value: t.Optional[exp.Expression] 1769 is_table = self._match(TokenType.TABLE) 1770 1771 if is_table: 1772 if self._match(TokenType.LT): 1773 value = self.expression( 1774 exp.Schema, 1775 this="TABLE", 1776 expressions=self._parse_csv(self._parse_struct_types), 1777 ) 1778 if not self._match(TokenType.GT): 1779 self.raise_error("Expecting >") 1780 else: 1781 value = self._parse_schema(exp.var("TABLE")) 1782 else: 1783 value = self._parse_types() 1784 1785 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1786 1787 def _parse_describe(self) -> exp.Describe: 1788 kind = self._match_set(self.CREATABLES) and self._prev.text 1789 this = self._parse_table(schema=True) 1790 properties = self._parse_properties() 1791 expressions = properties.expressions if properties else None 1792 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1793 1794 def _parse_insert(self) -> exp.Insert: 1795 comments = ensure_list(self._prev_comments) 1796 overwrite = self._match(TokenType.OVERWRITE) 1797 ignore = self._match(TokenType.IGNORE) 1798 local = self._match_text_seq("LOCAL") 1799 alternative = None 1800 1801 if self._match_text_seq("DIRECTORY"): 1802 this: t.Optional[exp.Expression] = self.expression( 1803 exp.Directory, 1804 this=self._parse_var_or_string(), 1805 local=local, 1806 row_format=self._parse_row_format(match_row=True), 1807 ) 1808 else: 1809 if self._match(TokenType.OR): 1810 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1811 1812 self._match(TokenType.INTO) 1813 comments += ensure_list(self._prev_comments) 1814 self._match(TokenType.TABLE) 1815 this = self._parse_table(schema=True) 1816 1817 returning = self._parse_returning() 1818 1819 return self.expression( 1820 exp.Insert, 1821 comments=comments, 1822 this=this, 1823 by_name=self._match_text_seq("BY", "NAME"), 1824 exists=self._parse_exists(), 1825 partition=self._parse_partition(), 1826 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1827 and self._parse_conjunction(), 1828 expression=self._parse_ddl_select(), 1829 conflict=self._parse_on_conflict(), 1830 returning=returning or self._parse_returning(), 1831 overwrite=overwrite, 1832 alternative=alternative, 1833 ignore=ignore, 1834 ) 1835 1836 def _parse_kill(self) -> exp.Kill: 1837 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1838 1839 return self.expression( 1840 exp.Kill, 1841 this=self._parse_primary(), 1842 kind=kind, 1843 ) 1844 1845 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1846 conflict = self._match_text_seq("ON", "CONFLICT") 1847 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1848 1849 if not conflict and not duplicate: 1850 return None 1851 1852 nothing = None 1853 expressions = None 1854 key = None 1855 constraint = None 1856 1857 if conflict: 1858 if self._match_text_seq("ON", "CONSTRAINT"): 1859 constraint = self._parse_id_var() 1860 else: 1861 key = self._parse_csv(self._parse_value) 1862 1863 self._match_text_seq("DO") 1864 if self._match_text_seq("NOTHING"): 1865 nothing = True 1866 else: 1867 self._match(TokenType.UPDATE) 1868 self._match(TokenType.SET) 1869 expressions = self._parse_csv(self._parse_equality) 1870 1871 return self.expression( 1872 exp.OnConflict, 1873 duplicate=duplicate, 1874 expressions=expressions, 1875 nothing=nothing, 1876 key=key, 1877 constraint=constraint, 1878 ) 1879 1880 def _parse_returning(self) -> t.Optional[exp.Returning]: 1881 if not self._match(TokenType.RETURNING): 1882 return None 1883 return self.expression( 1884 exp.Returning, 1885 expressions=self._parse_csv(self._parse_expression), 1886 into=self._match(TokenType.INTO) and self._parse_table_part(), 1887 ) 1888 1889 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1890 if not self._match(TokenType.FORMAT): 1891 return None 1892 return self._parse_row_format() 1893 1894 def _parse_row_format( 1895 self, match_row: bool = False 1896 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1897 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1898 return None 1899 1900 if self._match_text_seq("SERDE"): 1901 this = self._parse_string() 1902 1903 serde_properties = None 1904 if self._match(TokenType.SERDE_PROPERTIES): 1905 serde_properties = self.expression( 1906 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1907 ) 1908 1909 return self.expression( 1910 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1911 ) 1912 1913 self._match_text_seq("DELIMITED") 1914 1915 kwargs = {} 1916 1917 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1918 kwargs["fields"] = self._parse_string() 1919 if self._match_text_seq("ESCAPED", "BY"): 1920 kwargs["escaped"] = self._parse_string() 1921 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1922 kwargs["collection_items"] = self._parse_string() 1923 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1924 kwargs["map_keys"] = self._parse_string() 1925 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1926 kwargs["lines"] = self._parse_string() 1927 if self._match_text_seq("NULL", "DEFINED", "AS"): 1928 kwargs["null"] = self._parse_string() 1929 1930 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1931 1932 def _parse_load(self) -> exp.LoadData | exp.Command: 1933 if self._match_text_seq("DATA"): 1934 local = self._match_text_seq("LOCAL") 1935 self._match_text_seq("INPATH") 1936 inpath = self._parse_string() 1937 overwrite = self._match(TokenType.OVERWRITE) 1938 self._match_pair(TokenType.INTO, TokenType.TABLE) 1939 1940 return self.expression( 1941 exp.LoadData, 1942 this=self._parse_table(schema=True), 1943 local=local, 1944 overwrite=overwrite, 1945 inpath=inpath, 1946 partition=self._parse_partition(), 1947 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1948 serde=self._match_text_seq("SERDE") and self._parse_string(), 1949 ) 1950 return self._parse_as_command(self._prev) 1951 1952 def _parse_delete(self) -> exp.Delete: 1953 # This handles MySQL's "Multiple-Table Syntax" 1954 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1955 tables = None 1956 comments = self._prev_comments 1957 if not self._match(TokenType.FROM, advance=False): 1958 tables = self._parse_csv(self._parse_table) or None 1959 1960 returning = self._parse_returning() 1961 1962 return self.expression( 1963 exp.Delete, 1964 comments=comments, 1965 tables=tables, 1966 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1967 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1968 where=self._parse_where(), 1969 returning=returning or self._parse_returning(), 1970 limit=self._parse_limit(), 1971 ) 1972 1973 def _parse_update(self) -> exp.Update: 1974 comments = self._prev_comments 1975 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1976 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1977 returning = self._parse_returning() 1978 return self.expression( 1979 exp.Update, 1980 comments=comments, 1981 **{ # type: ignore 1982 "this": this, 1983 "expressions": expressions, 1984 "from": self._parse_from(joins=True), 1985 "where": self._parse_where(), 1986 "returning": returning or self._parse_returning(), 1987 "order": self._parse_order(), 1988 "limit": self._parse_limit(), 1989 }, 1990 ) 1991 1992 def _parse_uncache(self) -> exp.Uncache: 1993 if not self._match(TokenType.TABLE): 1994 self.raise_error("Expecting TABLE after UNCACHE") 1995 1996 return self.expression( 1997 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1998 ) 1999 2000 def _parse_cache(self) -> exp.Cache: 2001 lazy = self._match_text_seq("LAZY") 2002 self._match(TokenType.TABLE) 2003 table = self._parse_table(schema=True) 2004 2005 options = [] 2006 if self._match_text_seq("OPTIONS"): 2007 self._match_l_paren() 2008 k = self._parse_string() 2009 self._match(TokenType.EQ) 2010 v = self._parse_string() 2011 options = [k, v] 2012 self._match_r_paren() 2013 2014 self._match(TokenType.ALIAS) 2015 return self.expression( 2016 exp.Cache, 2017 this=table, 2018 lazy=lazy, 2019 options=options, 2020 expression=self._parse_select(nested=True), 2021 ) 2022 2023 def _parse_partition(self) -> t.Optional[exp.Partition]: 2024 if not self._match(TokenType.PARTITION): 2025 return None 2026 2027 return self.expression( 2028 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2029 ) 2030 2031 def _parse_value(self) -> exp.Tuple: 2032 if self._match(TokenType.L_PAREN): 2033 expressions = self._parse_csv(self._parse_conjunction) 2034 self._match_r_paren() 2035 return self.expression(exp.Tuple, expressions=expressions) 2036 2037 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2038 # https://prestodb.io/docs/current/sql/values.html 2039 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2040 2041 def _parse_projections(self) -> t.List[exp.Expression]: 2042 return self._parse_expressions() 2043 2044 def _parse_select( 2045 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2046 ) -> t.Optional[exp.Expression]: 2047 cte = self._parse_with() 2048 2049 if cte: 2050 this = self._parse_statement() 2051 2052 if not this: 2053 self.raise_error("Failed to parse any statement following CTE") 2054 return cte 2055 2056 if "with" in this.arg_types: 2057 this.set("with", cte) 2058 else: 2059 self.raise_error(f"{this.key} does not support CTE") 2060 this = cte 2061 2062 return this 2063 2064 # duckdb supports leading with FROM x 2065 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2066 2067 if self._match(TokenType.SELECT): 2068 comments = self._prev_comments 2069 2070 hint = self._parse_hint() 2071 all_ = self._match(TokenType.ALL) 2072 distinct = self._match_set(self.DISTINCT_TOKENS) 2073 2074 kind = ( 2075 self._match(TokenType.ALIAS) 2076 and self._match_texts(("STRUCT", "VALUE")) 2077 and self._prev.text 2078 ) 2079 2080 if distinct: 2081 distinct = self.expression( 2082 exp.Distinct, 2083 on=self._parse_value() if self._match(TokenType.ON) else None, 2084 ) 2085 2086 if all_ and distinct: 2087 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2088 2089 limit = self._parse_limit(top=True) 2090 projections = self._parse_projections() 2091 2092 this = self.expression( 2093 exp.Select, 2094 kind=kind, 2095 hint=hint, 2096 distinct=distinct, 2097 expressions=projections, 2098 limit=limit, 2099 ) 2100 this.comments = comments 2101 2102 into = self._parse_into() 2103 if into: 2104 this.set("into", into) 2105 2106 if not from_: 2107 from_ = self._parse_from() 2108 2109 if from_: 2110 this.set("from", from_) 2111 2112 this = self._parse_query_modifiers(this) 2113 elif (table or nested) and self._match(TokenType.L_PAREN): 2114 if self._match(TokenType.PIVOT): 2115 this = self._parse_simplified_pivot() 2116 elif self._match(TokenType.FROM): 2117 this = exp.select("*").from_( 2118 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2119 ) 2120 else: 2121 this = self._parse_table() if table else self._parse_select(nested=True) 2122 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2123 2124 self._match_r_paren() 2125 2126 # We return early here so that the UNION isn't attached to the subquery by the 2127 # following call to _parse_set_operations, but instead becomes the parent node 2128 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2129 elif self._match(TokenType.VALUES): 2130 this = self.expression( 2131 exp.Values, 2132 expressions=self._parse_csv(self._parse_value), 2133 alias=self._parse_table_alias(), 2134 ) 2135 elif from_: 2136 this = exp.select("*").from_(from_.this, copy=False) 2137 else: 2138 this = None 2139 2140 return self._parse_set_operations(this) 2141 2142 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2143 if not skip_with_token and not self._match(TokenType.WITH): 2144 return None 2145 2146 comments = self._prev_comments 2147 recursive = self._match(TokenType.RECURSIVE) 2148 2149 expressions = [] 2150 while True: 2151 expressions.append(self._parse_cte()) 2152 2153 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2154 break 2155 else: 2156 self._match(TokenType.WITH) 2157 2158 return self.expression( 2159 exp.With, comments=comments, expressions=expressions, recursive=recursive 2160 ) 2161 2162 def _parse_cte(self) -> exp.CTE: 2163 alias = self._parse_table_alias() 2164 if not alias or not alias.this: 2165 self.raise_error("Expected CTE to have alias") 2166 2167 self._match(TokenType.ALIAS) 2168 return self.expression( 2169 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2170 ) 2171 2172 def _parse_table_alias( 2173 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2174 ) -> t.Optional[exp.TableAlias]: 2175 any_token = self._match(TokenType.ALIAS) 2176 alias = ( 2177 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2178 or self._parse_string_as_identifier() 2179 ) 2180 2181 index = self._index 2182 if self._match(TokenType.L_PAREN): 2183 columns = self._parse_csv(self._parse_function_parameter) 2184 self._match_r_paren() if columns else self._retreat(index) 2185 else: 2186 columns = None 2187 2188 if not alias and not columns: 2189 return None 2190 2191 return self.expression(exp.TableAlias, this=alias, columns=columns) 2192 2193 def _parse_subquery( 2194 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2195 ) -> t.Optional[exp.Subquery]: 2196 if not this: 2197 return None 2198 2199 return self.expression( 2200 exp.Subquery, 2201 this=this, 2202 pivots=self._parse_pivots(), 2203 alias=self._parse_table_alias() if parse_alias else None, 2204 ) 2205 2206 def _parse_query_modifiers( 2207 self, this: t.Optional[exp.Expression] 2208 ) -> t.Optional[exp.Expression]: 2209 if isinstance(this, self.MODIFIABLES): 2210 for join in iter(self._parse_join, None): 2211 this.append("joins", join) 2212 for lateral in iter(self._parse_lateral, None): 2213 this.append("laterals", lateral) 2214 2215 while True: 2216 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2217 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2218 key, expression = parser(self) 2219 2220 if expression: 2221 this.set(key, expression) 2222 if key == "limit": 2223 offset = expression.args.pop("offset", None) 2224 if offset: 2225 this.set("offset", exp.Offset(expression=offset)) 2226 continue 2227 break 2228 return this 2229 2230 def _parse_hint(self) -> t.Optional[exp.Hint]: 2231 if self._match(TokenType.HINT): 2232 hints = [] 2233 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2234 hints.extend(hint) 2235 2236 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2237 self.raise_error("Expected */ after HINT") 2238 2239 return self.expression(exp.Hint, expressions=hints) 2240 2241 return None 2242 2243 def _parse_into(self) -> t.Optional[exp.Into]: 2244 if not self._match(TokenType.INTO): 2245 return None 2246 2247 temp = self._match(TokenType.TEMPORARY) 2248 unlogged = self._match_text_seq("UNLOGGED") 2249 self._match(TokenType.TABLE) 2250 2251 return self.expression( 2252 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2253 ) 2254 2255 def _parse_from( 2256 self, joins: bool = False, skip_from_token: bool = False 2257 ) -> t.Optional[exp.From]: 2258 if not skip_from_token and not self._match(TokenType.FROM): 2259 return None 2260 2261 return self.expression( 2262 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2263 ) 2264 2265 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2266 if not self._match(TokenType.MATCH_RECOGNIZE): 2267 return None 2268 2269 self._match_l_paren() 2270 2271 partition = self._parse_partition_by() 2272 order = self._parse_order() 2273 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2274 2275 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2276 rows = exp.var("ONE ROW PER MATCH") 2277 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2278 text = "ALL ROWS PER MATCH" 2279 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2280 text += f" SHOW EMPTY MATCHES" 2281 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2282 text += f" OMIT EMPTY MATCHES" 2283 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2284 text += f" WITH UNMATCHED ROWS" 2285 rows = exp.var(text) 2286 else: 2287 rows = None 2288 2289 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2290 text = "AFTER MATCH SKIP" 2291 if self._match_text_seq("PAST", "LAST", "ROW"): 2292 text += f" PAST LAST ROW" 2293 elif self._match_text_seq("TO", "NEXT", "ROW"): 2294 text += f" TO NEXT ROW" 2295 elif self._match_text_seq("TO", "FIRST"): 2296 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2297 elif self._match_text_seq("TO", "LAST"): 2298 text += f" TO LAST {self._advance_any().text}" # type: ignore 2299 after = exp.var(text) 2300 else: 2301 after = None 2302 2303 if self._match_text_seq("PATTERN"): 2304 self._match_l_paren() 2305 2306 if not self._curr: 2307 self.raise_error("Expecting )", self._curr) 2308 2309 paren = 1 2310 start = self._curr 2311 2312 while self._curr and paren > 0: 2313 if self._curr.token_type == TokenType.L_PAREN: 2314 paren += 1 2315 if self._curr.token_type == TokenType.R_PAREN: 2316 paren -= 1 2317 2318 end = self._prev 2319 self._advance() 2320 2321 if paren > 0: 2322 self.raise_error("Expecting )", self._curr) 2323 2324 pattern = exp.var(self._find_sql(start, end)) 2325 else: 2326 pattern = None 2327 2328 define = ( 2329 self._parse_csv( 2330 lambda: self.expression( 2331 exp.Alias, 2332 alias=self._parse_id_var(any_token=True), 2333 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2334 ) 2335 ) 2336 if self._match_text_seq("DEFINE") 2337 else None 2338 ) 2339 2340 self._match_r_paren() 2341 2342 return self.expression( 2343 exp.MatchRecognize, 2344 partition_by=partition, 2345 order=order, 2346 measures=measures, 2347 rows=rows, 2348 after=after, 2349 pattern=pattern, 2350 define=define, 2351 alias=self._parse_table_alias(), 2352 ) 2353 2354 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2355 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2356 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2357 2358 if outer_apply or cross_apply: 2359 this = self._parse_select(table=True) 2360 view = None 2361 outer = not cross_apply 2362 elif self._match(TokenType.LATERAL): 2363 this = self._parse_select(table=True) 2364 view = self._match(TokenType.VIEW) 2365 outer = self._match(TokenType.OUTER) 2366 else: 2367 return None 2368 2369 if not this: 2370 this = ( 2371 self._parse_unnest() 2372 or self._parse_function() 2373 or self._parse_id_var(any_token=False) 2374 ) 2375 2376 while self._match(TokenType.DOT): 2377 this = exp.Dot( 2378 this=this, 2379 expression=self._parse_function() or self._parse_id_var(any_token=False), 2380 ) 2381 2382 if view: 2383 table = self._parse_id_var(any_token=False) 2384 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2385 table_alias: t.Optional[exp.TableAlias] = self.expression( 2386 exp.TableAlias, this=table, columns=columns 2387 ) 2388 elif isinstance(this, exp.Subquery) and this.alias: 2389 # Ensures parity between the Subquery's and the Lateral's "alias" args 2390 table_alias = this.args["alias"].copy() 2391 else: 2392 table_alias = self._parse_table_alias() 2393 2394 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2395 2396 def _parse_join_parts( 2397 self, 2398 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2399 return ( 2400 self._match_set(self.JOIN_METHODS) and self._prev, 2401 self._match_set(self.JOIN_SIDES) and self._prev, 2402 self._match_set(self.JOIN_KINDS) and self._prev, 2403 ) 2404 2405 def _parse_join( 2406 self, skip_join_token: bool = False, parse_bracket: bool = False 2407 ) -> t.Optional[exp.Join]: 2408 if self._match(TokenType.COMMA): 2409 return self.expression(exp.Join, this=self._parse_table()) 2410 2411 index = self._index 2412 method, side, kind = self._parse_join_parts() 2413 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2414 join = self._match(TokenType.JOIN) 2415 2416 if not skip_join_token and not join: 2417 self._retreat(index) 2418 kind = None 2419 method = None 2420 side = None 2421 2422 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2423 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2424 2425 if not skip_join_token and not join and not outer_apply and not cross_apply: 2426 return None 2427 2428 if outer_apply: 2429 side = Token(TokenType.LEFT, "LEFT") 2430 2431 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2432 2433 if method: 2434 kwargs["method"] = method.text 2435 if side: 2436 kwargs["side"] = side.text 2437 if kind: 2438 kwargs["kind"] = kind.text 2439 if hint: 2440 kwargs["hint"] = hint 2441 2442 if self._match(TokenType.ON): 2443 kwargs["on"] = self._parse_conjunction() 2444 elif self._match(TokenType.USING): 2445 kwargs["using"] = self._parse_wrapped_id_vars() 2446 elif not (kind and kind.token_type == TokenType.CROSS): 2447 index = self._index 2448 joins = self._parse_joins() 2449 2450 if joins and self._match(TokenType.ON): 2451 kwargs["on"] = self._parse_conjunction() 2452 elif joins and self._match(TokenType.USING): 2453 kwargs["using"] = self._parse_wrapped_id_vars() 2454 else: 2455 joins = None 2456 self._retreat(index) 2457 2458 kwargs["this"].set("joins", joins) 2459 2460 comments = [c for token in (method, side, kind) if token for c in token.comments] 2461 return self.expression(exp.Join, comments=comments, **kwargs) 2462 2463 def _parse_index( 2464 self, 2465 index: t.Optional[exp.Expression] = None, 2466 ) -> t.Optional[exp.Index]: 2467 if index: 2468 unique = None 2469 primary = None 2470 amp = None 2471 2472 self._match(TokenType.ON) 2473 self._match(TokenType.TABLE) # hive 2474 table = self._parse_table_parts(schema=True) 2475 else: 2476 unique = self._match(TokenType.UNIQUE) 2477 primary = self._match_text_seq("PRIMARY") 2478 amp = self._match_text_seq("AMP") 2479 2480 if not self._match(TokenType.INDEX): 2481 return None 2482 2483 index = self._parse_id_var() 2484 table = None 2485 2486 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2487 2488 if self._match(TokenType.L_PAREN, advance=False): 2489 columns = self._parse_wrapped_csv(self._parse_ordered) 2490 else: 2491 columns = None 2492 2493 return self.expression( 2494 exp.Index, 2495 this=index, 2496 table=table, 2497 using=using, 2498 columns=columns, 2499 unique=unique, 2500 primary=primary, 2501 amp=amp, 2502 partition_by=self._parse_partition_by(), 2503 where=self._parse_where(), 2504 ) 2505 2506 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2507 hints: t.List[exp.Expression] = [] 2508 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2509 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2510 hints.append( 2511 self.expression( 2512 exp.WithTableHint, 2513 expressions=self._parse_csv( 2514 lambda: self._parse_function() or self._parse_var(any_token=True) 2515 ), 2516 ) 2517 ) 2518 self._match_r_paren() 2519 else: 2520 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2521 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2522 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2523 2524 self._match_texts({"INDEX", "KEY"}) 2525 if self._match(TokenType.FOR): 2526 hint.set("target", self._advance_any() and self._prev.text.upper()) 2527 2528 hint.set("expressions", self._parse_wrapped_id_vars()) 2529 hints.append(hint) 2530 2531 return hints or None 2532 2533 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2534 return ( 2535 (not schema and self._parse_function(optional_parens=False)) 2536 or self._parse_id_var(any_token=False) 2537 or self._parse_string_as_identifier() 2538 or self._parse_placeholder() 2539 ) 2540 2541 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2542 catalog = None 2543 db = None 2544 table = self._parse_table_part(schema=schema) 2545 2546 while self._match(TokenType.DOT): 2547 if catalog: 2548 # This allows nesting the table in arbitrarily many dot expressions if needed 2549 table = self.expression( 2550 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2551 ) 2552 else: 2553 catalog = db 2554 db = table 2555 table = self._parse_table_part(schema=schema) 2556 2557 if not table: 2558 self.raise_error(f"Expected table name but got {self._curr}") 2559 2560 return self.expression( 2561 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2562 ) 2563 2564 def _parse_table( 2565 self, 2566 schema: bool = False, 2567 joins: bool = False, 2568 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2569 parse_bracket: bool = False, 2570 ) -> t.Optional[exp.Expression]: 2571 lateral = self._parse_lateral() 2572 if lateral: 2573 return lateral 2574 2575 unnest = self._parse_unnest() 2576 if unnest: 2577 return unnest 2578 2579 values = self._parse_derived_table_values() 2580 if values: 2581 return values 2582 2583 subquery = self._parse_select(table=True) 2584 if subquery: 2585 if not subquery.args.get("pivots"): 2586 subquery.set("pivots", self._parse_pivots()) 2587 return subquery 2588 2589 bracket = parse_bracket and self._parse_bracket(None) 2590 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2591 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2592 2593 if schema: 2594 return self._parse_schema(this=this) 2595 2596 version = self._parse_version() 2597 2598 if version: 2599 this.set("version", version) 2600 2601 if self.ALIAS_POST_TABLESAMPLE: 2602 table_sample = self._parse_table_sample() 2603 2604 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2605 if alias: 2606 this.set("alias", alias) 2607 2608 this.set("hints", self._parse_table_hints()) 2609 2610 if not this.args.get("pivots"): 2611 this.set("pivots", self._parse_pivots()) 2612 2613 if not self.ALIAS_POST_TABLESAMPLE: 2614 table_sample = self._parse_table_sample() 2615 2616 if table_sample: 2617 table_sample.set("this", this) 2618 this = table_sample 2619 2620 if joins: 2621 for join in iter(self._parse_join, None): 2622 this.append("joins", join) 2623 2624 return this 2625 2626 def _parse_version(self) -> t.Optional[exp.Version]: 2627 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2628 this = "TIMESTAMP" 2629 elif self._match(TokenType.VERSION_SNAPSHOT): 2630 this = "VERSION" 2631 else: 2632 return None 2633 2634 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2635 kind = self._prev.text.upper() 2636 start = self._parse_bitwise() 2637 self._match_texts(("TO", "AND")) 2638 end = self._parse_bitwise() 2639 expression: t.Optional[exp.Expression] = self.expression( 2640 exp.Tuple, expressions=[start, end] 2641 ) 2642 elif self._match_text_seq("CONTAINED", "IN"): 2643 kind = "CONTAINED IN" 2644 expression = self.expression( 2645 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2646 ) 2647 elif self._match(TokenType.ALL): 2648 kind = "ALL" 2649 expression = None 2650 else: 2651 self._match_text_seq("AS", "OF") 2652 kind = "AS OF" 2653 expression = self._parse_type() 2654 2655 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2656 2657 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2658 if not self._match(TokenType.UNNEST): 2659 return None 2660 2661 expressions = self._parse_wrapped_csv(self._parse_type) 2662 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2663 2664 alias = self._parse_table_alias() if with_alias else None 2665 2666 if alias: 2667 if self.UNNEST_COLUMN_ONLY: 2668 if alias.args.get("columns"): 2669 self.raise_error("Unexpected extra column alias in unnest.") 2670 2671 alias.set("columns", [alias.this]) 2672 alias.set("this", None) 2673 2674 columns = alias.args.get("columns") or [] 2675 if offset and len(expressions) < len(columns): 2676 offset = columns.pop() 2677 2678 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2679 self._match(TokenType.ALIAS) 2680 offset = self._parse_id_var() or exp.to_identifier("offset") 2681 2682 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2683 2684 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2685 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2686 if not is_derived and not self._match(TokenType.VALUES): 2687 return None 2688 2689 expressions = self._parse_csv(self._parse_value) 2690 alias = self._parse_table_alias() 2691 2692 if is_derived: 2693 self._match_r_paren() 2694 2695 return self.expression( 2696 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2697 ) 2698 2699 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2700 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2701 as_modifier and self._match_text_seq("USING", "SAMPLE") 2702 ): 2703 return None 2704 2705 bucket_numerator = None 2706 bucket_denominator = None 2707 bucket_field = None 2708 percent = None 2709 rows = None 2710 size = None 2711 seed = None 2712 2713 kind = ( 2714 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2715 ) 2716 method = self._parse_var(tokens=(TokenType.ROW,)) 2717 2718 self._match(TokenType.L_PAREN) 2719 2720 if self.TABLESAMPLE_CSV: 2721 num = None 2722 expressions = self._parse_csv(self._parse_primary) 2723 else: 2724 expressions = None 2725 num = self._parse_primary() 2726 2727 if self._match_text_seq("BUCKET"): 2728 bucket_numerator = self._parse_number() 2729 self._match_text_seq("OUT", "OF") 2730 bucket_denominator = bucket_denominator = self._parse_number() 2731 self._match(TokenType.ON) 2732 bucket_field = self._parse_field() 2733 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2734 percent = num 2735 elif self._match(TokenType.ROWS): 2736 rows = num 2737 elif num: 2738 size = num 2739 2740 self._match(TokenType.R_PAREN) 2741 2742 if self._match(TokenType.L_PAREN): 2743 method = self._parse_var() 2744 seed = self._match(TokenType.COMMA) and self._parse_number() 2745 self._match_r_paren() 2746 elif self._match_texts(("SEED", "REPEATABLE")): 2747 seed = self._parse_wrapped(self._parse_number) 2748 2749 return self.expression( 2750 exp.TableSample, 2751 expressions=expressions, 2752 method=method, 2753 bucket_numerator=bucket_numerator, 2754 bucket_denominator=bucket_denominator, 2755 bucket_field=bucket_field, 2756 percent=percent, 2757 rows=rows, 2758 size=size, 2759 seed=seed, 2760 kind=kind, 2761 ) 2762 2763 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2764 return list(iter(self._parse_pivot, None)) or None 2765 2766 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2767 return list(iter(self._parse_join, None)) or None 2768 2769 # https://duckdb.org/docs/sql/statements/pivot 2770 def _parse_simplified_pivot(self) -> exp.Pivot: 2771 def _parse_on() -> t.Optional[exp.Expression]: 2772 this = self._parse_bitwise() 2773 return self._parse_in(this) if self._match(TokenType.IN) else this 2774 2775 this = self._parse_table() 2776 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2777 using = self._match(TokenType.USING) and self._parse_csv( 2778 lambda: self._parse_alias(self._parse_function()) 2779 ) 2780 group = self._parse_group() 2781 return self.expression( 2782 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2783 ) 2784 2785 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2786 index = self._index 2787 include_nulls = None 2788 2789 if self._match(TokenType.PIVOT): 2790 unpivot = False 2791 elif self._match(TokenType.UNPIVOT): 2792 unpivot = True 2793 2794 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2795 if self._match_text_seq("INCLUDE", "NULLS"): 2796 include_nulls = True 2797 elif self._match_text_seq("EXCLUDE", "NULLS"): 2798 include_nulls = False 2799 else: 2800 return None 2801 2802 expressions = [] 2803 field = None 2804 2805 if not self._match(TokenType.L_PAREN): 2806 self._retreat(index) 2807 return None 2808 2809 if unpivot: 2810 expressions = self._parse_csv(self._parse_column) 2811 else: 2812 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2813 2814 if not expressions: 2815 self.raise_error("Failed to parse PIVOT's aggregation list") 2816 2817 if not self._match(TokenType.FOR): 2818 self.raise_error("Expecting FOR") 2819 2820 value = self._parse_column() 2821 2822 if not self._match(TokenType.IN): 2823 self.raise_error("Expecting IN") 2824 2825 field = self._parse_in(value, alias=True) 2826 2827 self._match_r_paren() 2828 2829 pivot = self.expression( 2830 exp.Pivot, 2831 expressions=expressions, 2832 field=field, 2833 unpivot=unpivot, 2834 include_nulls=include_nulls, 2835 ) 2836 2837 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2838 pivot.set("alias", self._parse_table_alias()) 2839 2840 if not unpivot: 2841 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2842 2843 columns: t.List[exp.Expression] = [] 2844 for fld in pivot.args["field"].expressions: 2845 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2846 for name in names: 2847 if self.PREFIXED_PIVOT_COLUMNS: 2848 name = f"{name}_{field_name}" if name else field_name 2849 else: 2850 name = f"{field_name}_{name}" if name else field_name 2851 2852 columns.append(exp.to_identifier(name)) 2853 2854 pivot.set("columns", columns) 2855 2856 return pivot 2857 2858 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2859 return [agg.alias for agg in aggregations] 2860 2861 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2862 if not skip_where_token and not self._match(TokenType.WHERE): 2863 return None 2864 2865 return self.expression( 2866 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2867 ) 2868 2869 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2870 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2871 return None 2872 2873 elements = defaultdict(list) 2874 2875 if self._match(TokenType.ALL): 2876 return self.expression(exp.Group, all=True) 2877 2878 while True: 2879 expressions = self._parse_csv(self._parse_conjunction) 2880 if expressions: 2881 elements["expressions"].extend(expressions) 2882 2883 grouping_sets = self._parse_grouping_sets() 2884 if grouping_sets: 2885 elements["grouping_sets"].extend(grouping_sets) 2886 2887 rollup = None 2888 cube = None 2889 totals = None 2890 2891 with_ = self._match(TokenType.WITH) 2892 if self._match(TokenType.ROLLUP): 2893 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2894 elements["rollup"].extend(ensure_list(rollup)) 2895 2896 if self._match(TokenType.CUBE): 2897 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2898 elements["cube"].extend(ensure_list(cube)) 2899 2900 if self._match_text_seq("TOTALS"): 2901 totals = True 2902 elements["totals"] = True # type: ignore 2903 2904 if not (grouping_sets or rollup or cube or totals): 2905 break 2906 2907 return self.expression(exp.Group, **elements) # type: ignore 2908 2909 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2910 if not self._match(TokenType.GROUPING_SETS): 2911 return None 2912 2913 return self._parse_wrapped_csv(self._parse_grouping_set) 2914 2915 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2916 if self._match(TokenType.L_PAREN): 2917 grouping_set = self._parse_csv(self._parse_column) 2918 self._match_r_paren() 2919 return self.expression(exp.Tuple, expressions=grouping_set) 2920 2921 return self._parse_column() 2922 2923 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2924 if not skip_having_token and not self._match(TokenType.HAVING): 2925 return None 2926 return self.expression(exp.Having, this=self._parse_conjunction()) 2927 2928 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2929 if not self._match(TokenType.QUALIFY): 2930 return None 2931 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2932 2933 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2934 if skip_start_token: 2935 start = None 2936 elif self._match(TokenType.START_WITH): 2937 start = self._parse_conjunction() 2938 else: 2939 return None 2940 2941 self._match(TokenType.CONNECT_BY) 2942 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2943 exp.Prior, this=self._parse_bitwise() 2944 ) 2945 connect = self._parse_conjunction() 2946 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2947 2948 if not start and self._match(TokenType.START_WITH): 2949 start = self._parse_conjunction() 2950 2951 return self.expression(exp.Connect, start=start, connect=connect) 2952 2953 def _parse_order( 2954 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2955 ) -> t.Optional[exp.Expression]: 2956 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2957 return this 2958 2959 return self.expression( 2960 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2961 ) 2962 2963 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2964 if not self._match(token): 2965 return None 2966 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2967 2968 def _parse_ordered(self) -> exp.Ordered: 2969 this = self._parse_conjunction() 2970 2971 asc = self._match(TokenType.ASC) 2972 desc = self._match(TokenType.DESC) or (asc and False) 2973 2974 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2975 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2976 2977 nulls_first = is_nulls_first or False 2978 explicitly_null_ordered = is_nulls_first or is_nulls_last 2979 2980 if ( 2981 not explicitly_null_ordered 2982 and ( 2983 (not desc and self.NULL_ORDERING == "nulls_are_small") 2984 or (desc and self.NULL_ORDERING != "nulls_are_small") 2985 ) 2986 and self.NULL_ORDERING != "nulls_are_last" 2987 ): 2988 nulls_first = True 2989 2990 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2991 2992 def _parse_limit( 2993 self, this: t.Optional[exp.Expression] = None, top: bool = False 2994 ) -> t.Optional[exp.Expression]: 2995 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2996 comments = self._prev_comments 2997 if top: 2998 limit_paren = self._match(TokenType.L_PAREN) 2999 expression = self._parse_number() 3000 3001 if limit_paren: 3002 self._match_r_paren() 3003 else: 3004 expression = self._parse_term() 3005 3006 if self._match(TokenType.COMMA): 3007 offset = expression 3008 expression = self._parse_term() 3009 else: 3010 offset = None 3011 3012 limit_exp = self.expression( 3013 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3014 ) 3015 3016 return limit_exp 3017 3018 if self._match(TokenType.FETCH): 3019 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3020 direction = self._prev.text if direction else "FIRST" 3021 3022 count = self._parse_field(tokens=self.FETCH_TOKENS) 3023 percent = self._match(TokenType.PERCENT) 3024 3025 self._match_set((TokenType.ROW, TokenType.ROWS)) 3026 3027 only = self._match_text_seq("ONLY") 3028 with_ties = self._match_text_seq("WITH", "TIES") 3029 3030 if only and with_ties: 3031 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3032 3033 return self.expression( 3034 exp.Fetch, 3035 direction=direction, 3036 count=count, 3037 percent=percent, 3038 with_ties=with_ties, 3039 ) 3040 3041 return this 3042 3043 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3044 if not self._match(TokenType.OFFSET): 3045 return this 3046 3047 count = self._parse_term() 3048 self._match_set((TokenType.ROW, TokenType.ROWS)) 3049 return self.expression(exp.Offset, this=this, expression=count) 3050 3051 def _parse_locks(self) -> t.List[exp.Lock]: 3052 locks = [] 3053 while True: 3054 if self._match_text_seq("FOR", "UPDATE"): 3055 update = True 3056 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3057 "LOCK", "IN", "SHARE", "MODE" 3058 ): 3059 update = False 3060 else: 3061 break 3062 3063 expressions = None 3064 if self._match_text_seq("OF"): 3065 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3066 3067 wait: t.Optional[bool | exp.Expression] = None 3068 if self._match_text_seq("NOWAIT"): 3069 wait = True 3070 elif self._match_text_seq("WAIT"): 3071 wait = self._parse_primary() 3072 elif self._match_text_seq("SKIP", "LOCKED"): 3073 wait = False 3074 3075 locks.append( 3076 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3077 ) 3078 3079 return locks 3080 3081 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3082 if not self._match_set(self.SET_OPERATIONS): 3083 return this 3084 3085 token_type = self._prev.token_type 3086 3087 if token_type == TokenType.UNION: 3088 expression = exp.Union 3089 elif token_type == TokenType.EXCEPT: 3090 expression = exp.Except 3091 else: 3092 expression = exp.Intersect 3093 3094 return self.expression( 3095 expression, 3096 this=this, 3097 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3098 by_name=self._match_text_seq("BY", "NAME"), 3099 expression=self._parse_set_operations(self._parse_select(nested=True)), 3100 ) 3101 3102 def _parse_expression(self) -> t.Optional[exp.Expression]: 3103 return self._parse_alias(self._parse_conjunction()) 3104 3105 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3106 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3107 3108 def _parse_equality(self) -> t.Optional[exp.Expression]: 3109 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3110 3111 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3112 return self._parse_tokens(self._parse_range, self.COMPARISON) 3113 3114 def _parse_range(self) -> t.Optional[exp.Expression]: 3115 this = self._parse_bitwise() 3116 negate = self._match(TokenType.NOT) 3117 3118 if self._match_set(self.RANGE_PARSERS): 3119 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3120 if not expression: 3121 return this 3122 3123 this = expression 3124 elif self._match(TokenType.ISNULL): 3125 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3126 3127 # Postgres supports ISNULL and NOTNULL for conditions. 3128 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3129 if self._match(TokenType.NOTNULL): 3130 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3131 this = self.expression(exp.Not, this=this) 3132 3133 if negate: 3134 this = self.expression(exp.Not, this=this) 3135 3136 if self._match(TokenType.IS): 3137 this = self._parse_is(this) 3138 3139 return this 3140 3141 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3142 index = self._index - 1 3143 negate = self._match(TokenType.NOT) 3144 3145 if self._match_text_seq("DISTINCT", "FROM"): 3146 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3147 return self.expression(klass, this=this, expression=self._parse_expression()) 3148 3149 expression = self._parse_null() or self._parse_boolean() 3150 if not expression: 3151 self._retreat(index) 3152 return None 3153 3154 this = self.expression(exp.Is, this=this, expression=expression) 3155 return self.expression(exp.Not, this=this) if negate else this 3156 3157 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3158 unnest = self._parse_unnest(with_alias=False) 3159 if unnest: 3160 this = self.expression(exp.In, this=this, unnest=unnest) 3161 elif self._match(TokenType.L_PAREN): 3162 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3163 3164 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3165 this = self.expression(exp.In, this=this, query=expressions[0]) 3166 else: 3167 this = self.expression(exp.In, this=this, expressions=expressions) 3168 3169 self._match_r_paren(this) 3170 else: 3171 this = self.expression(exp.In, this=this, field=self._parse_field()) 3172 3173 return this 3174 3175 def _parse_between(self, this: exp.Expression) -> exp.Between: 3176 low = self._parse_bitwise() 3177 self._match(TokenType.AND) 3178 high = self._parse_bitwise() 3179 return self.expression(exp.Between, this=this, low=low, high=high) 3180 3181 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3182 if not self._match(TokenType.ESCAPE): 3183 return this 3184 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3185 3186 def _parse_interval(self) -> t.Optional[exp.Interval]: 3187 index = self._index 3188 3189 if not self._match(TokenType.INTERVAL): 3190 return None 3191 3192 if self._match(TokenType.STRING, advance=False): 3193 this = self._parse_primary() 3194 else: 3195 this = self._parse_term() 3196 3197 if not this: 3198 self._retreat(index) 3199 return None 3200 3201 unit = self._parse_function() or self._parse_var(any_token=True) 3202 3203 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3204 # each INTERVAL expression into this canonical form so it's easy to transpile 3205 if this and this.is_number: 3206 this = exp.Literal.string(this.name) 3207 elif this and this.is_string: 3208 parts = this.name.split() 3209 3210 if len(parts) == 2: 3211 if unit: 3212 # This is not actually a unit, it's something else (e.g. a "window side") 3213 unit = None 3214 self._retreat(self._index - 1) 3215 3216 this = exp.Literal.string(parts[0]) 3217 unit = self.expression(exp.Var, this=parts[1]) 3218 3219 return self.expression(exp.Interval, this=this, unit=unit) 3220 3221 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3222 this = self._parse_term() 3223 3224 while True: 3225 if self._match_set(self.BITWISE): 3226 this = self.expression( 3227 self.BITWISE[self._prev.token_type], 3228 this=this, 3229 expression=self._parse_term(), 3230 ) 3231 elif self._match(TokenType.DQMARK): 3232 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3233 elif self._match_pair(TokenType.LT, TokenType.LT): 3234 this = self.expression( 3235 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3236 ) 3237 elif self._match_pair(TokenType.GT, TokenType.GT): 3238 this = self.expression( 3239 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3240 ) 3241 else: 3242 break 3243 3244 return this 3245 3246 def _parse_term(self) -> t.Optional[exp.Expression]: 3247 return self._parse_tokens(self._parse_factor, self.TERM) 3248 3249 def _parse_factor(self) -> t.Optional[exp.Expression]: 3250 return self._parse_tokens(self._parse_unary, self.FACTOR) 3251 3252 def _parse_unary(self) -> t.Optional[exp.Expression]: 3253 if self._match_set(self.UNARY_PARSERS): 3254 return self.UNARY_PARSERS[self._prev.token_type](self) 3255 return self._parse_at_time_zone(self._parse_type()) 3256 3257 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3258 interval = parse_interval and self._parse_interval() 3259 if interval: 3260 return interval 3261 3262 index = self._index 3263 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3264 this = self._parse_column() 3265 3266 if data_type: 3267 if isinstance(this, exp.Literal): 3268 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3269 if parser: 3270 return parser(self, this, data_type) 3271 return self.expression(exp.Cast, this=this, to=data_type) 3272 if not data_type.expressions: 3273 self._retreat(index) 3274 return self._parse_column() 3275 return self._parse_column_ops(data_type) 3276 3277 return this and self._parse_column_ops(this) 3278 3279 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3280 this = self._parse_type() 3281 if not this: 3282 return None 3283 3284 return self.expression( 3285 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3286 ) 3287 3288 def _parse_types( 3289 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3290 ) -> t.Optional[exp.Expression]: 3291 index = self._index 3292 3293 prefix = self._match_text_seq("SYSUDTLIB", ".") 3294 3295 if not self._match_set(self.TYPE_TOKENS): 3296 identifier = allow_identifiers and self._parse_id_var( 3297 any_token=False, tokens=(TokenType.VAR,) 3298 ) 3299 3300 if identifier: 3301 tokens = self._tokenizer.tokenize(identifier.name) 3302 3303 if len(tokens) != 1: 3304 self.raise_error("Unexpected identifier", self._prev) 3305 3306 if tokens[0].token_type in self.TYPE_TOKENS: 3307 self._prev = tokens[0] 3308 elif self.SUPPORTS_USER_DEFINED_TYPES: 3309 type_name = identifier.name 3310 3311 while self._match(TokenType.DOT): 3312 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3313 3314 return exp.DataType.build(type_name, udt=True) 3315 else: 3316 return None 3317 else: 3318 return None 3319 3320 type_token = self._prev.token_type 3321 3322 if type_token == TokenType.PSEUDO_TYPE: 3323 return self.expression(exp.PseudoType, this=self._prev.text) 3324 3325 if type_token == TokenType.OBJECT_IDENTIFIER: 3326 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3327 3328 nested = type_token in self.NESTED_TYPE_TOKENS 3329 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3330 expressions = None 3331 maybe_func = False 3332 3333 if self._match(TokenType.L_PAREN): 3334 if is_struct: 3335 expressions = self._parse_csv(self._parse_struct_types) 3336 elif nested: 3337 expressions = self._parse_csv( 3338 lambda: self._parse_types( 3339 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3340 ) 3341 ) 3342 elif type_token in self.ENUM_TYPE_TOKENS: 3343 expressions = self._parse_csv(self._parse_equality) 3344 else: 3345 expressions = self._parse_csv(self._parse_type_size) 3346 3347 if not expressions or not self._match(TokenType.R_PAREN): 3348 self._retreat(index) 3349 return None 3350 3351 maybe_func = True 3352 3353 this: t.Optional[exp.Expression] = None 3354 values: t.Optional[t.List[exp.Expression]] = None 3355 3356 if nested and self._match(TokenType.LT): 3357 if is_struct: 3358 expressions = self._parse_csv(self._parse_struct_types) 3359 else: 3360 expressions = self._parse_csv( 3361 lambda: self._parse_types( 3362 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3363 ) 3364 ) 3365 3366 if not self._match(TokenType.GT): 3367 self.raise_error("Expecting >") 3368 3369 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3370 values = self._parse_csv(self._parse_conjunction) 3371 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3372 3373 if type_token in self.TIMESTAMPS: 3374 if self._match_text_seq("WITH", "TIME", "ZONE"): 3375 maybe_func = False 3376 tz_type = ( 3377 exp.DataType.Type.TIMETZ 3378 if type_token in self.TIMES 3379 else exp.DataType.Type.TIMESTAMPTZ 3380 ) 3381 this = exp.DataType(this=tz_type, expressions=expressions) 3382 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3383 maybe_func = False 3384 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3385 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3386 maybe_func = False 3387 elif type_token == TokenType.INTERVAL: 3388 unit = self._parse_var() 3389 3390 if self._match_text_seq("TO"): 3391 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3392 else: 3393 span = None 3394 3395 if span or not unit: 3396 this = self.expression( 3397 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3398 ) 3399 else: 3400 this = self.expression(exp.Interval, unit=unit) 3401 3402 if maybe_func and check_func: 3403 index2 = self._index 3404 peek = self._parse_string() 3405 3406 if not peek: 3407 self._retreat(index) 3408 return None 3409 3410 self._retreat(index2) 3411 3412 if not this: 3413 if self._match_text_seq("UNSIGNED"): 3414 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3415 if not unsigned_type_token: 3416 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3417 3418 type_token = unsigned_type_token or type_token 3419 3420 this = exp.DataType( 3421 this=exp.DataType.Type[type_token.value], 3422 expressions=expressions, 3423 nested=nested, 3424 values=values, 3425 prefix=prefix, 3426 ) 3427 3428 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3429 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3430 3431 return this 3432 3433 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3434 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3435 self._match(TokenType.COLON) 3436 return self._parse_column_def(this) 3437 3438 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3439 if not self._match_text_seq("AT", "TIME", "ZONE"): 3440 return this 3441 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3442 3443 def _parse_column(self) -> t.Optional[exp.Expression]: 3444 this = self._parse_field() 3445 if isinstance(this, exp.Identifier): 3446 this = self.expression(exp.Column, this=this) 3447 elif not this: 3448 return self._parse_bracket(this) 3449 return self._parse_column_ops(this) 3450 3451 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3452 this = self._parse_bracket(this) 3453 3454 while self._match_set(self.COLUMN_OPERATORS): 3455 op_token = self._prev.token_type 3456 op = self.COLUMN_OPERATORS.get(op_token) 3457 3458 if op_token == TokenType.DCOLON: 3459 field = self._parse_types() 3460 if not field: 3461 self.raise_error("Expected type") 3462 elif op and self._curr: 3463 self._advance() 3464 value = self._prev.text 3465 field = ( 3466 exp.Literal.number(value) 3467 if self._prev.token_type == TokenType.NUMBER 3468 else exp.Literal.string(value) 3469 ) 3470 else: 3471 field = self._parse_field(anonymous_func=True, any_token=True) 3472 3473 if isinstance(field, exp.Func): 3474 # bigquery allows function calls like x.y.count(...) 3475 # SAFE.SUBSTR(...) 3476 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3477 this = self._replace_columns_with_dots(this) 3478 3479 if op: 3480 this = op(self, this, field) 3481 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3482 this = self.expression( 3483 exp.Column, 3484 this=field, 3485 table=this.this, 3486 db=this.args.get("table"), 3487 catalog=this.args.get("db"), 3488 ) 3489 else: 3490 this = self.expression(exp.Dot, this=this, expression=field) 3491 this = self._parse_bracket(this) 3492 return this 3493 3494 def _parse_primary(self) -> t.Optional[exp.Expression]: 3495 if self._match_set(self.PRIMARY_PARSERS): 3496 token_type = self._prev.token_type 3497 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3498 3499 if token_type == TokenType.STRING: 3500 expressions = [primary] 3501 while self._match(TokenType.STRING): 3502 expressions.append(exp.Literal.string(self._prev.text)) 3503 3504 if len(expressions) > 1: 3505 return self.expression(exp.Concat, expressions=expressions) 3506 3507 return primary 3508 3509 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3510 return exp.Literal.number(f"0.{self._prev.text}") 3511 3512 if self._match(TokenType.L_PAREN): 3513 comments = self._prev_comments 3514 query = self._parse_select() 3515 3516 if query: 3517 expressions = [query] 3518 else: 3519 expressions = self._parse_expressions() 3520 3521 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3522 3523 if isinstance(this, exp.Subqueryable): 3524 this = self._parse_set_operations( 3525 self._parse_subquery(this=this, parse_alias=False) 3526 ) 3527 elif len(expressions) > 1: 3528 this = self.expression(exp.Tuple, expressions=expressions) 3529 else: 3530 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3531 3532 if this: 3533 this.add_comments(comments) 3534 3535 self._match_r_paren(expression=this) 3536 return this 3537 3538 return None 3539 3540 def _parse_field( 3541 self, 3542 any_token: bool = False, 3543 tokens: t.Optional[t.Collection[TokenType]] = None, 3544 anonymous_func: bool = False, 3545 ) -> t.Optional[exp.Expression]: 3546 return ( 3547 self._parse_primary() 3548 or self._parse_function(anonymous=anonymous_func) 3549 or self._parse_id_var(any_token=any_token, tokens=tokens) 3550 ) 3551 3552 def _parse_function( 3553 self, 3554 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3555 anonymous: bool = False, 3556 optional_parens: bool = True, 3557 ) -> t.Optional[exp.Expression]: 3558 if not self._curr: 3559 return None 3560 3561 token_type = self._curr.token_type 3562 this = self._curr.text 3563 upper = this.upper() 3564 3565 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3566 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3567 self._advance() 3568 return parser(self) 3569 3570 if not self._next or self._next.token_type != TokenType.L_PAREN: 3571 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3572 self._advance() 3573 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3574 3575 return None 3576 3577 if token_type not in self.FUNC_TOKENS: 3578 return None 3579 3580 self._advance(2) 3581 3582 parser = self.FUNCTION_PARSERS.get(upper) 3583 if parser and not anonymous: 3584 this = parser(self) 3585 else: 3586 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3587 3588 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3589 this = self.expression(subquery_predicate, this=self._parse_select()) 3590 self._match_r_paren() 3591 return this 3592 3593 if functions is None: 3594 functions = self.FUNCTIONS 3595 3596 function = functions.get(upper) 3597 3598 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3599 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3600 3601 if function and not anonymous: 3602 func = self.validate_expression(function(args), args) 3603 if not self.NORMALIZE_FUNCTIONS: 3604 func.meta["name"] = this 3605 this = func 3606 else: 3607 this = self.expression(exp.Anonymous, this=this, expressions=args) 3608 3609 self._match_r_paren(this) 3610 return self._parse_window(this) 3611 3612 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3613 return self._parse_column_def(self._parse_id_var()) 3614 3615 def _parse_user_defined_function( 3616 self, kind: t.Optional[TokenType] = None 3617 ) -> t.Optional[exp.Expression]: 3618 this = self._parse_id_var() 3619 3620 while self._match(TokenType.DOT): 3621 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3622 3623 if not self._match(TokenType.L_PAREN): 3624 return this 3625 3626 expressions = self._parse_csv(self._parse_function_parameter) 3627 self._match_r_paren() 3628 return self.expression( 3629 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3630 ) 3631 3632 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3633 literal = self._parse_primary() 3634 if literal: 3635 return self.expression(exp.Introducer, this=token.text, expression=literal) 3636 3637 return self.expression(exp.Identifier, this=token.text) 3638 3639 def _parse_session_parameter(self) -> exp.SessionParameter: 3640 kind = None 3641 this = self._parse_id_var() or self._parse_primary() 3642 3643 if this and self._match(TokenType.DOT): 3644 kind = this.name 3645 this = self._parse_var() or self._parse_primary() 3646 3647 return self.expression(exp.SessionParameter, this=this, kind=kind) 3648 3649 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3650 index = self._index 3651 3652 if self._match(TokenType.L_PAREN): 3653 expressions = t.cast( 3654 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3655 ) 3656 3657 if not self._match(TokenType.R_PAREN): 3658 self._retreat(index) 3659 else: 3660 expressions = [self._parse_id_var()] 3661 3662 if self._match_set(self.LAMBDAS): 3663 return self.LAMBDAS[self._prev.token_type](self, expressions) 3664 3665 self._retreat(index) 3666 3667 this: t.Optional[exp.Expression] 3668 3669 if self._match(TokenType.DISTINCT): 3670 this = self.expression( 3671 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3672 ) 3673 else: 3674 this = self._parse_select_or_expression(alias=alias) 3675 3676 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3677 3678 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3679 index = self._index 3680 3681 if not self.errors: 3682 try: 3683 if self._parse_select(nested=True): 3684 return this 3685 except ParseError: 3686 pass 3687 finally: 3688 self.errors.clear() 3689 self._retreat(index) 3690 3691 if not self._match(TokenType.L_PAREN): 3692 return this 3693 3694 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3695 3696 self._match_r_paren() 3697 return self.expression(exp.Schema, this=this, expressions=args) 3698 3699 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3700 return self._parse_column_def(self._parse_field(any_token=True)) 3701 3702 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3703 # column defs are not really columns, they're identifiers 3704 if isinstance(this, exp.Column): 3705 this = this.this 3706 3707 kind = self._parse_types(schema=True) 3708 3709 if self._match_text_seq("FOR", "ORDINALITY"): 3710 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3711 3712 constraints: t.List[exp.Expression] = [] 3713 3714 if not kind and self._match(TokenType.ALIAS): 3715 constraints.append( 3716 self.expression( 3717 exp.ComputedColumnConstraint, 3718 this=self._parse_conjunction(), 3719 persisted=self._match_text_seq("PERSISTED"), 3720 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3721 ) 3722 ) 3723 3724 while True: 3725 constraint = self._parse_column_constraint() 3726 if not constraint: 3727 break 3728 constraints.append(constraint) 3729 3730 if not kind and not constraints: 3731 return this 3732 3733 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3734 3735 def _parse_auto_increment( 3736 self, 3737 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3738 start = None 3739 increment = None 3740 3741 if self._match(TokenType.L_PAREN, advance=False): 3742 args = self._parse_wrapped_csv(self._parse_bitwise) 3743 start = seq_get(args, 0) 3744 increment = seq_get(args, 1) 3745 elif self._match_text_seq("START"): 3746 start = self._parse_bitwise() 3747 self._match_text_seq("INCREMENT") 3748 increment = self._parse_bitwise() 3749 3750 if start and increment: 3751 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3752 3753 return exp.AutoIncrementColumnConstraint() 3754 3755 def _parse_compress(self) -> exp.CompressColumnConstraint: 3756 if self._match(TokenType.L_PAREN, advance=False): 3757 return self.expression( 3758 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3759 ) 3760 3761 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3762 3763 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3764 if self._match_text_seq("BY", "DEFAULT"): 3765 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3766 this = self.expression( 3767 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3768 ) 3769 else: 3770 self._match_text_seq("ALWAYS") 3771 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3772 3773 self._match(TokenType.ALIAS) 3774 identity = self._match_text_seq("IDENTITY") 3775 3776 if self._match(TokenType.L_PAREN): 3777 if self._match(TokenType.START_WITH): 3778 this.set("start", self._parse_bitwise()) 3779 if self._match_text_seq("INCREMENT", "BY"): 3780 this.set("increment", self._parse_bitwise()) 3781 if self._match_text_seq("MINVALUE"): 3782 this.set("minvalue", self._parse_bitwise()) 3783 if self._match_text_seq("MAXVALUE"): 3784 this.set("maxvalue", self._parse_bitwise()) 3785 3786 if self._match_text_seq("CYCLE"): 3787 this.set("cycle", True) 3788 elif self._match_text_seq("NO", "CYCLE"): 3789 this.set("cycle", False) 3790 3791 if not identity: 3792 this.set("expression", self._parse_bitwise()) 3793 3794 self._match_r_paren() 3795 3796 return this 3797 3798 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3799 self._match_text_seq("LENGTH") 3800 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3801 3802 def _parse_not_constraint( 3803 self, 3804 ) -> t.Optional[exp.Expression]: 3805 if self._match_text_seq("NULL"): 3806 return self.expression(exp.NotNullColumnConstraint) 3807 if self._match_text_seq("CASESPECIFIC"): 3808 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3809 if self._match_text_seq("FOR", "REPLICATION"): 3810 return self.expression(exp.NotForReplicationColumnConstraint) 3811 return None 3812 3813 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3814 if self._match(TokenType.CONSTRAINT): 3815 this = self._parse_id_var() 3816 else: 3817 this = None 3818 3819 if self._match_texts(self.CONSTRAINT_PARSERS): 3820 return self.expression( 3821 exp.ColumnConstraint, 3822 this=this, 3823 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3824 ) 3825 3826 return this 3827 3828 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3829 if not self._match(TokenType.CONSTRAINT): 3830 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3831 3832 this = self._parse_id_var() 3833 expressions = [] 3834 3835 while True: 3836 constraint = self._parse_unnamed_constraint() or self._parse_function() 3837 if not constraint: 3838 break 3839 expressions.append(constraint) 3840 3841 return self.expression(exp.Constraint, this=this, expressions=expressions) 3842 3843 def _parse_unnamed_constraint( 3844 self, constraints: t.Optional[t.Collection[str]] = None 3845 ) -> t.Optional[exp.Expression]: 3846 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3847 return None 3848 3849 constraint = self._prev.text.upper() 3850 if constraint not in self.CONSTRAINT_PARSERS: 3851 self.raise_error(f"No parser found for schema constraint {constraint}.") 3852 3853 return self.CONSTRAINT_PARSERS[constraint](self) 3854 3855 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3856 self._match_text_seq("KEY") 3857 return self.expression( 3858 exp.UniqueColumnConstraint, 3859 this=self._parse_schema(self._parse_id_var(any_token=False)), 3860 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3861 ) 3862 3863 def _parse_key_constraint_options(self) -> t.List[str]: 3864 options = [] 3865 while True: 3866 if not self._curr: 3867 break 3868 3869 if self._match(TokenType.ON): 3870 action = None 3871 on = self._advance_any() and self._prev.text 3872 3873 if self._match_text_seq("NO", "ACTION"): 3874 action = "NO ACTION" 3875 elif self._match_text_seq("CASCADE"): 3876 action = "CASCADE" 3877 elif self._match_text_seq("RESTRICT"): 3878 action = "RESTRICT" 3879 elif self._match_pair(TokenType.SET, TokenType.NULL): 3880 action = "SET NULL" 3881 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3882 action = "SET DEFAULT" 3883 else: 3884 self.raise_error("Invalid key constraint") 3885 3886 options.append(f"ON {on} {action}") 3887 elif self._match_text_seq("NOT", "ENFORCED"): 3888 options.append("NOT ENFORCED") 3889 elif self._match_text_seq("DEFERRABLE"): 3890 options.append("DEFERRABLE") 3891 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3892 options.append("INITIALLY DEFERRED") 3893 elif self._match_text_seq("NORELY"): 3894 options.append("NORELY") 3895 elif self._match_text_seq("MATCH", "FULL"): 3896 options.append("MATCH FULL") 3897 else: 3898 break 3899 3900 return options 3901 3902 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3903 if match and not self._match(TokenType.REFERENCES): 3904 return None 3905 3906 expressions = None 3907 this = self._parse_table(schema=True) 3908 options = self._parse_key_constraint_options() 3909 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3910 3911 def _parse_foreign_key(self) -> exp.ForeignKey: 3912 expressions = self._parse_wrapped_id_vars() 3913 reference = self._parse_references() 3914 options = {} 3915 3916 while self._match(TokenType.ON): 3917 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3918 self.raise_error("Expected DELETE or UPDATE") 3919 3920 kind = self._prev.text.lower() 3921 3922 if self._match_text_seq("NO", "ACTION"): 3923 action = "NO ACTION" 3924 elif self._match(TokenType.SET): 3925 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3926 action = "SET " + self._prev.text.upper() 3927 else: 3928 self._advance() 3929 action = self._prev.text.upper() 3930 3931 options[kind] = action 3932 3933 return self.expression( 3934 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3935 ) 3936 3937 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3938 return self._parse_field() 3939 3940 def _parse_primary_key( 3941 self, wrapped_optional: bool = False, in_props: bool = False 3942 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3943 desc = ( 3944 self._match_set((TokenType.ASC, TokenType.DESC)) 3945 and self._prev.token_type == TokenType.DESC 3946 ) 3947 3948 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3949 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3950 3951 expressions = self._parse_wrapped_csv( 3952 self._parse_primary_key_part, optional=wrapped_optional 3953 ) 3954 options = self._parse_key_constraint_options() 3955 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3956 3957 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3958 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3959 return this 3960 3961 bracket_kind = self._prev.token_type 3962 3963 if self._match(TokenType.COLON): 3964 expressions: t.List[exp.Expression] = [ 3965 self.expression(exp.Slice, expression=self._parse_conjunction()) 3966 ] 3967 else: 3968 expressions = self._parse_csv( 3969 lambda: self._parse_slice( 3970 self._parse_alias(self._parse_conjunction(), explicit=True) 3971 ) 3972 ) 3973 3974 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3975 if bracket_kind == TokenType.L_BRACE: 3976 this = self.expression(exp.Struct, expressions=expressions) 3977 elif not this or this.name.upper() == "ARRAY": 3978 this = self.expression(exp.Array, expressions=expressions) 3979 else: 3980 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3981 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3982 3983 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3984 self.raise_error("Expected ]") 3985 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3986 self.raise_error("Expected }") 3987 3988 self._add_comments(this) 3989 return self._parse_bracket(this) 3990 3991 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3992 if self._match(TokenType.COLON): 3993 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3994 return this 3995 3996 def _parse_case(self) -> t.Optional[exp.Expression]: 3997 ifs = [] 3998 default = None 3999 4000 comments = self._prev_comments 4001 expression = self._parse_conjunction() 4002 4003 while self._match(TokenType.WHEN): 4004 this = self._parse_conjunction() 4005 self._match(TokenType.THEN) 4006 then = self._parse_conjunction() 4007 ifs.append(self.expression(exp.If, this=this, true=then)) 4008 4009 if self._match(TokenType.ELSE): 4010 default = self._parse_conjunction() 4011 4012 if not self._match(TokenType.END): 4013 self.raise_error("Expected END after CASE", self._prev) 4014 4015 return self._parse_window( 4016 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4017 ) 4018 4019 def _parse_if(self) -> t.Optional[exp.Expression]: 4020 if self._match(TokenType.L_PAREN): 4021 args = self._parse_csv(self._parse_conjunction) 4022 this = self.validate_expression(exp.If.from_arg_list(args), args) 4023 self._match_r_paren() 4024 else: 4025 index = self._index - 1 4026 condition = self._parse_conjunction() 4027 4028 if not condition: 4029 self._retreat(index) 4030 return None 4031 4032 self._match(TokenType.THEN) 4033 true = self._parse_conjunction() 4034 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4035 self._match(TokenType.END) 4036 this = self.expression(exp.If, this=condition, true=true, false=false) 4037 4038 return self._parse_window(this) 4039 4040 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4041 if not self._match_text_seq("VALUE", "FOR"): 4042 self._retreat(self._index - 1) 4043 return None 4044 4045 return self.expression( 4046 exp.NextValueFor, 4047 this=self._parse_column(), 4048 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4049 ) 4050 4051 def _parse_extract(self) -> exp.Extract: 4052 this = self._parse_function() or self._parse_var() or self._parse_type() 4053 4054 if self._match(TokenType.FROM): 4055 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4056 4057 if not self._match(TokenType.COMMA): 4058 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4059 4060 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4061 4062 def _parse_any_value(self) -> exp.AnyValue: 4063 this = self._parse_lambda() 4064 is_max = None 4065 having = None 4066 4067 if self._match(TokenType.HAVING): 4068 self._match_texts(("MAX", "MIN")) 4069 is_max = self._prev.text == "MAX" 4070 having = self._parse_column() 4071 4072 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4073 4074 def _parse_cast(self, strict: bool) -> exp.Expression: 4075 this = self._parse_conjunction() 4076 4077 if not self._match(TokenType.ALIAS): 4078 if self._match(TokenType.COMMA): 4079 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4080 4081 self.raise_error("Expected AS after CAST") 4082 4083 fmt = None 4084 to = self._parse_types() 4085 4086 if not to: 4087 self.raise_error("Expected TYPE after CAST") 4088 elif isinstance(to, exp.Identifier): 4089 to = exp.DataType.build(to.name, udt=True) 4090 elif to.this == exp.DataType.Type.CHAR: 4091 if self._match(TokenType.CHARACTER_SET): 4092 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4093 elif self._match(TokenType.FORMAT): 4094 fmt_string = self._parse_string() 4095 fmt = self._parse_at_time_zone(fmt_string) 4096 4097 if to.this in exp.DataType.TEMPORAL_TYPES: 4098 this = self.expression( 4099 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4100 this=this, 4101 format=exp.Literal.string( 4102 format_time( 4103 fmt_string.this if fmt_string else "", 4104 self.FORMAT_MAPPING or self.TIME_MAPPING, 4105 self.FORMAT_TRIE or self.TIME_TRIE, 4106 ) 4107 ), 4108 ) 4109 4110 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4111 this.set("zone", fmt.args["zone"]) 4112 4113 return this 4114 4115 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4116 4117 def _parse_concat(self) -> t.Optional[exp.Expression]: 4118 args = self._parse_csv(self._parse_conjunction) 4119 if self.CONCAT_NULL_OUTPUTS_STRING: 4120 args = self._ensure_string_if_null(args) 4121 4122 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4123 # we find such a call we replace it with its argument. 4124 if len(args) == 1: 4125 return args[0] 4126 4127 return self.expression( 4128 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4129 ) 4130 4131 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4132 args = self._parse_csv(self._parse_conjunction) 4133 if len(args) < 2: 4134 return self.expression(exp.ConcatWs, expressions=args) 4135 delim, *values = args 4136 if self.CONCAT_NULL_OUTPUTS_STRING: 4137 values = self._ensure_string_if_null(values) 4138 4139 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4140 4141 def _parse_string_agg(self) -> exp.Expression: 4142 if self._match(TokenType.DISTINCT): 4143 args: t.List[t.Optional[exp.Expression]] = [ 4144 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4145 ] 4146 if self._match(TokenType.COMMA): 4147 args.extend(self._parse_csv(self._parse_conjunction)) 4148 else: 4149 args = self._parse_csv(self._parse_conjunction) # type: ignore 4150 4151 index = self._index 4152 if not self._match(TokenType.R_PAREN) and args: 4153 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4154 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4155 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4156 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4157 4158 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4159 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4160 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4161 if not self._match_text_seq("WITHIN", "GROUP"): 4162 self._retreat(index) 4163 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4164 4165 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4166 order = self._parse_order(this=seq_get(args, 0)) 4167 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4168 4169 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4170 this = self._parse_bitwise() 4171 4172 if self._match(TokenType.USING): 4173 to: t.Optional[exp.Expression] = self.expression( 4174 exp.CharacterSet, this=self._parse_var() 4175 ) 4176 elif self._match(TokenType.COMMA): 4177 to = self._parse_types() 4178 else: 4179 to = None 4180 4181 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4182 4183 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4184 """ 4185 There are generally two variants of the DECODE function: 4186 4187 - DECODE(bin, charset) 4188 - DECODE(expression, search, result [, search, result] ... [, default]) 4189 4190 The second variant will always be parsed into a CASE expression. Note that NULL 4191 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4192 instead of relying on pattern matching. 4193 """ 4194 args = self._parse_csv(self._parse_conjunction) 4195 4196 if len(args) < 3: 4197 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4198 4199 expression, *expressions = args 4200 if not expression: 4201 return None 4202 4203 ifs = [] 4204 for search, result in zip(expressions[::2], expressions[1::2]): 4205 if not search or not result: 4206 return None 4207 4208 if isinstance(search, exp.Literal): 4209 ifs.append( 4210 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4211 ) 4212 elif isinstance(search, exp.Null): 4213 ifs.append( 4214 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4215 ) 4216 else: 4217 cond = exp.or_( 4218 exp.EQ(this=expression.copy(), expression=search), 4219 exp.and_( 4220 exp.Is(this=expression.copy(), expression=exp.Null()), 4221 exp.Is(this=search.copy(), expression=exp.Null()), 4222 copy=False, 4223 ), 4224 copy=False, 4225 ) 4226 ifs.append(exp.If(this=cond, true=result)) 4227 4228 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4229 4230 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4231 self._match_text_seq("KEY") 4232 key = self._parse_column() 4233 self._match_set((TokenType.COLON, TokenType.COMMA)) 4234 self._match_text_seq("VALUE") 4235 value = self._parse_bitwise() 4236 4237 if not key and not value: 4238 return None 4239 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4240 4241 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4242 if not this or not self._match_text_seq("FORMAT", "JSON"): 4243 return this 4244 4245 return self.expression(exp.FormatJson, this=this) 4246 4247 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4248 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4249 for value in values: 4250 if self._match_text_seq(value, "ON", on): 4251 return f"{value} ON {on}" 4252 4253 return None 4254 4255 def _parse_json_object(self) -> exp.JSONObject: 4256 star = self._parse_star() 4257 expressions = ( 4258 [star] 4259 if star 4260 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4261 ) 4262 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4263 4264 unique_keys = None 4265 if self._match_text_seq("WITH", "UNIQUE"): 4266 unique_keys = True 4267 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4268 unique_keys = False 4269 4270 self._match_text_seq("KEYS") 4271 4272 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4273 self._parse_type() 4274 ) 4275 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4276 4277 return self.expression( 4278 exp.JSONObject, 4279 expressions=expressions, 4280 null_handling=null_handling, 4281 unique_keys=unique_keys, 4282 return_type=return_type, 4283 encoding=encoding, 4284 ) 4285 4286 def _parse_logarithm(self) -> exp.Func: 4287 # Default argument order is base, expression 4288 args = self._parse_csv(self._parse_range) 4289 4290 if len(args) > 1: 4291 if not self.LOG_BASE_FIRST: 4292 args.reverse() 4293 return exp.Log.from_arg_list(args) 4294 4295 return self.expression( 4296 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4297 ) 4298 4299 def _parse_match_against(self) -> exp.MatchAgainst: 4300 expressions = self._parse_csv(self._parse_column) 4301 4302 self._match_text_seq(")", "AGAINST", "(") 4303 4304 this = self._parse_string() 4305 4306 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4307 modifier = "IN NATURAL LANGUAGE MODE" 4308 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4309 modifier = f"{modifier} WITH QUERY EXPANSION" 4310 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4311 modifier = "IN BOOLEAN MODE" 4312 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4313 modifier = "WITH QUERY EXPANSION" 4314 else: 4315 modifier = None 4316 4317 return self.expression( 4318 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4319 ) 4320 4321 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4322 def _parse_open_json(self) -> exp.OpenJSON: 4323 this = self._parse_bitwise() 4324 path = self._match(TokenType.COMMA) and self._parse_string() 4325 4326 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4327 this = self._parse_field(any_token=True) 4328 kind = self._parse_types() 4329 path = self._parse_string() 4330 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4331 4332 return self.expression( 4333 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4334 ) 4335 4336 expressions = None 4337 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4338 self._match_l_paren() 4339 expressions = self._parse_csv(_parse_open_json_column_def) 4340 4341 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4342 4343 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4344 args = self._parse_csv(self._parse_bitwise) 4345 4346 if self._match(TokenType.IN): 4347 return self.expression( 4348 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4349 ) 4350 4351 if haystack_first: 4352 haystack = seq_get(args, 0) 4353 needle = seq_get(args, 1) 4354 else: 4355 needle = seq_get(args, 0) 4356 haystack = seq_get(args, 1) 4357 4358 return self.expression( 4359 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4360 ) 4361 4362 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4363 args = self._parse_csv(self._parse_table) 4364 return exp.JoinHint(this=func_name.upper(), expressions=args) 4365 4366 def _parse_substring(self) -> exp.Substring: 4367 # Postgres supports the form: substring(string [from int] [for int]) 4368 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4369 4370 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4371 4372 if self._match(TokenType.FROM): 4373 args.append(self._parse_bitwise()) 4374 if self._match(TokenType.FOR): 4375 args.append(self._parse_bitwise()) 4376 4377 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4378 4379 def _parse_trim(self) -> exp.Trim: 4380 # https://www.w3resource.com/sql/character-functions/trim.php 4381 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4382 4383 position = None 4384 collation = None 4385 4386 if self._match_texts(self.TRIM_TYPES): 4387 position = self._prev.text.upper() 4388 4389 expression = self._parse_bitwise() 4390 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4391 this = self._parse_bitwise() 4392 else: 4393 this = expression 4394 expression = None 4395 4396 if self._match(TokenType.COLLATE): 4397 collation = self._parse_bitwise() 4398 4399 return self.expression( 4400 exp.Trim, this=this, position=position, expression=expression, collation=collation 4401 ) 4402 4403 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4404 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4405 4406 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4407 return self._parse_window(self._parse_id_var(), alias=True) 4408 4409 def _parse_respect_or_ignore_nulls( 4410 self, this: t.Optional[exp.Expression] 4411 ) -> t.Optional[exp.Expression]: 4412 if self._match_text_seq("IGNORE", "NULLS"): 4413 return self.expression(exp.IgnoreNulls, this=this) 4414 if self._match_text_seq("RESPECT", "NULLS"): 4415 return self.expression(exp.RespectNulls, this=this) 4416 return this 4417 4418 def _parse_window( 4419 self, this: t.Optional[exp.Expression], alias: bool = False 4420 ) -> t.Optional[exp.Expression]: 4421 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4422 self._match(TokenType.WHERE) 4423 this = self.expression( 4424 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4425 ) 4426 self._match_r_paren() 4427 4428 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4429 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4430 if self._match_text_seq("WITHIN", "GROUP"): 4431 order = self._parse_wrapped(self._parse_order) 4432 this = self.expression(exp.WithinGroup, this=this, expression=order) 4433 4434 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4435 # Some dialects choose to implement and some do not. 4436 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4437 4438 # There is some code above in _parse_lambda that handles 4439 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4440 4441 # The below changes handle 4442 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4443 4444 # Oracle allows both formats 4445 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4446 # and Snowflake chose to do the same for familiarity 4447 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4448 this = self._parse_respect_or_ignore_nulls(this) 4449 4450 # bigquery select from window x AS (partition by ...) 4451 if alias: 4452 over = None 4453 self._match(TokenType.ALIAS) 4454 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4455 return this 4456 else: 4457 over = self._prev.text.upper() 4458 4459 if not self._match(TokenType.L_PAREN): 4460 return self.expression( 4461 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4462 ) 4463 4464 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4465 4466 first = self._match(TokenType.FIRST) 4467 if self._match_text_seq("LAST"): 4468 first = False 4469 4470 partition, order = self._parse_partition_and_order() 4471 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4472 4473 if kind: 4474 self._match(TokenType.BETWEEN) 4475 start = self._parse_window_spec() 4476 self._match(TokenType.AND) 4477 end = self._parse_window_spec() 4478 4479 spec = self.expression( 4480 exp.WindowSpec, 4481 kind=kind, 4482 start=start["value"], 4483 start_side=start["side"], 4484 end=end["value"], 4485 end_side=end["side"], 4486 ) 4487 else: 4488 spec = None 4489 4490 self._match_r_paren() 4491 4492 window = self.expression( 4493 exp.Window, 4494 this=this, 4495 partition_by=partition, 4496 order=order, 4497 spec=spec, 4498 alias=window_alias, 4499 over=over, 4500 first=first, 4501 ) 4502 4503 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4504 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4505 return self._parse_window(window, alias=alias) 4506 4507 return window 4508 4509 def _parse_partition_and_order( 4510 self, 4511 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4512 return self._parse_partition_by(), self._parse_order() 4513 4514 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4515 self._match(TokenType.BETWEEN) 4516 4517 return { 4518 "value": ( 4519 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4520 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4521 or self._parse_bitwise() 4522 ), 4523 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4524 } 4525 4526 def _parse_alias( 4527 self, this: t.Optional[exp.Expression], explicit: bool = False 4528 ) -> t.Optional[exp.Expression]: 4529 any_token = self._match(TokenType.ALIAS) 4530 4531 if explicit and not any_token: 4532 return this 4533 4534 if self._match(TokenType.L_PAREN): 4535 aliases = self.expression( 4536 exp.Aliases, 4537 this=this, 4538 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4539 ) 4540 self._match_r_paren(aliases) 4541 return aliases 4542 4543 alias = self._parse_id_var(any_token) 4544 4545 if alias: 4546 return self.expression(exp.Alias, this=this, alias=alias) 4547 4548 return this 4549 4550 def _parse_id_var( 4551 self, 4552 any_token: bool = True, 4553 tokens: t.Optional[t.Collection[TokenType]] = None, 4554 ) -> t.Optional[exp.Expression]: 4555 identifier = self._parse_identifier() 4556 4557 if identifier: 4558 return identifier 4559 4560 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4561 quoted = self._prev.token_type == TokenType.STRING 4562 return exp.Identifier(this=self._prev.text, quoted=quoted) 4563 4564 return None 4565 4566 def _parse_string(self) -> t.Optional[exp.Expression]: 4567 if self._match(TokenType.STRING): 4568 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4569 return self._parse_placeholder() 4570 4571 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4572 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4573 4574 def _parse_number(self) -> t.Optional[exp.Expression]: 4575 if self._match(TokenType.NUMBER): 4576 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4577 return self._parse_placeholder() 4578 4579 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4580 if self._match(TokenType.IDENTIFIER): 4581 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4582 return self._parse_placeholder() 4583 4584 def _parse_var( 4585 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4586 ) -> t.Optional[exp.Expression]: 4587 if ( 4588 (any_token and self._advance_any()) 4589 or self._match(TokenType.VAR) 4590 or (self._match_set(tokens) if tokens else False) 4591 ): 4592 return self.expression(exp.Var, this=self._prev.text) 4593 return self._parse_placeholder() 4594 4595 def _advance_any(self) -> t.Optional[Token]: 4596 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4597 self._advance() 4598 return self._prev 4599 return None 4600 4601 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4602 return self._parse_var() or self._parse_string() 4603 4604 def _parse_null(self) -> t.Optional[exp.Expression]: 4605 if self._match_set(self.NULL_TOKENS): 4606 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4607 return self._parse_placeholder() 4608 4609 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4610 if self._match(TokenType.TRUE): 4611 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4612 if self._match(TokenType.FALSE): 4613 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4614 return self._parse_placeholder() 4615 4616 def _parse_star(self) -> t.Optional[exp.Expression]: 4617 if self._match(TokenType.STAR): 4618 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4619 return self._parse_placeholder() 4620 4621 def _parse_parameter(self) -> exp.Parameter: 4622 wrapped = self._match(TokenType.L_BRACE) 4623 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4624 self._match(TokenType.R_BRACE) 4625 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4626 4627 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4628 if self._match_set(self.PLACEHOLDER_PARSERS): 4629 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4630 if placeholder: 4631 return placeholder 4632 self._advance(-1) 4633 return None 4634 4635 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4636 if not self._match(TokenType.EXCEPT): 4637 return None 4638 if self._match(TokenType.L_PAREN, advance=False): 4639 return self._parse_wrapped_csv(self._parse_column) 4640 4641 except_column = self._parse_column() 4642 return [except_column] if except_column else None 4643 4644 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4645 if not self._match(TokenType.REPLACE): 4646 return None 4647 if self._match(TokenType.L_PAREN, advance=False): 4648 return self._parse_wrapped_csv(self._parse_expression) 4649 4650 replace_expression = self._parse_expression() 4651 return [replace_expression] if replace_expression else None 4652 4653 def _parse_csv( 4654 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4655 ) -> t.List[exp.Expression]: 4656 parse_result = parse_method() 4657 items = [parse_result] if parse_result is not None else [] 4658 4659 while self._match(sep): 4660 self._add_comments(parse_result) 4661 parse_result = parse_method() 4662 if parse_result is not None: 4663 items.append(parse_result) 4664 4665 return items 4666 4667 def _parse_tokens( 4668 self, parse_method: t.Callable, expressions: t.Dict 4669 ) -> t.Optional[exp.Expression]: 4670 this = parse_method() 4671 4672 while self._match_set(expressions): 4673 this = self.expression( 4674 expressions[self._prev.token_type], 4675 this=this, 4676 comments=self._prev_comments, 4677 expression=parse_method(), 4678 ) 4679 4680 return this 4681 4682 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4683 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4684 4685 def _parse_wrapped_csv( 4686 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4687 ) -> t.List[exp.Expression]: 4688 return self._parse_wrapped( 4689 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4690 ) 4691 4692 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4693 wrapped = self._match(TokenType.L_PAREN) 4694 if not wrapped and not optional: 4695 self.raise_error("Expecting (") 4696 parse_result = parse_method() 4697 if wrapped: 4698 self._match_r_paren() 4699 return parse_result 4700 4701 def _parse_expressions(self) -> t.List[exp.Expression]: 4702 return self._parse_csv(self._parse_expression) 4703 4704 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4705 return self._parse_select() or self._parse_set_operations( 4706 self._parse_expression() if alias else self._parse_conjunction() 4707 ) 4708 4709 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4710 return self._parse_query_modifiers( 4711 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4712 ) 4713 4714 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4715 this = None 4716 if self._match_texts(self.TRANSACTION_KIND): 4717 this = self._prev.text 4718 4719 self._match_texts({"TRANSACTION", "WORK"}) 4720 4721 modes = [] 4722 while True: 4723 mode = [] 4724 while self._match(TokenType.VAR): 4725 mode.append(self._prev.text) 4726 4727 if mode: 4728 modes.append(" ".join(mode)) 4729 if not self._match(TokenType.COMMA): 4730 break 4731 4732 return self.expression(exp.Transaction, this=this, modes=modes) 4733 4734 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4735 chain = None 4736 savepoint = None 4737 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4738 4739 self._match_texts({"TRANSACTION", "WORK"}) 4740 4741 if self._match_text_seq("TO"): 4742 self._match_text_seq("SAVEPOINT") 4743 savepoint = self._parse_id_var() 4744 4745 if self._match(TokenType.AND): 4746 chain = not self._match_text_seq("NO") 4747 self._match_text_seq("CHAIN") 4748 4749 if is_rollback: 4750 return self.expression(exp.Rollback, savepoint=savepoint) 4751 4752 return self.expression(exp.Commit, chain=chain) 4753 4754 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4755 if not self._match_text_seq("ADD"): 4756 return None 4757 4758 self._match(TokenType.COLUMN) 4759 exists_column = self._parse_exists(not_=True) 4760 expression = self._parse_field_def() 4761 4762 if expression: 4763 expression.set("exists", exists_column) 4764 4765 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4766 if self._match_texts(("FIRST", "AFTER")): 4767 position = self._prev.text 4768 column_position = self.expression( 4769 exp.ColumnPosition, this=self._parse_column(), position=position 4770 ) 4771 expression.set("position", column_position) 4772 4773 return expression 4774 4775 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4776 drop = self._match(TokenType.DROP) and self._parse_drop() 4777 if drop and not isinstance(drop, exp.Command): 4778 drop.set("kind", drop.args.get("kind", "COLUMN")) 4779 return drop 4780 4781 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4782 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4783 return self.expression( 4784 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4785 ) 4786 4787 def _parse_add_constraint(self) -> exp.AddConstraint: 4788 this = None 4789 kind = self._prev.token_type 4790 4791 if kind == TokenType.CONSTRAINT: 4792 this = self._parse_id_var() 4793 4794 if self._match_text_seq("CHECK"): 4795 expression = self._parse_wrapped(self._parse_conjunction) 4796 enforced = self._match_text_seq("ENFORCED") 4797 4798 return self.expression( 4799 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4800 ) 4801 4802 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4803 expression = self._parse_foreign_key() 4804 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4805 expression = self._parse_primary_key() 4806 else: 4807 expression = None 4808 4809 return self.expression(exp.AddConstraint, this=this, expression=expression) 4810 4811 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4812 index = self._index - 1 4813 4814 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4815 return self._parse_csv(self._parse_add_constraint) 4816 4817 self._retreat(index) 4818 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4819 return self._parse_csv(self._parse_field_def) 4820 4821 return self._parse_csv(self._parse_add_column) 4822 4823 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4824 self._match(TokenType.COLUMN) 4825 column = self._parse_field(any_token=True) 4826 4827 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4828 return self.expression(exp.AlterColumn, this=column, drop=True) 4829 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4830 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4831 4832 self._match_text_seq("SET", "DATA") 4833 return self.expression( 4834 exp.AlterColumn, 4835 this=column, 4836 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4837 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4838 using=self._match(TokenType.USING) and self._parse_conjunction(), 4839 ) 4840 4841 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4842 index = self._index - 1 4843 4844 partition_exists = self._parse_exists() 4845 if self._match(TokenType.PARTITION, advance=False): 4846 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4847 4848 self._retreat(index) 4849 return self._parse_csv(self._parse_drop_column) 4850 4851 def _parse_alter_table_rename(self) -> exp.RenameTable: 4852 self._match_text_seq("TO") 4853 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4854 4855 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4856 start = self._prev 4857 4858 if not self._match(TokenType.TABLE): 4859 return self._parse_as_command(start) 4860 4861 exists = self._parse_exists() 4862 only = self._match_text_seq("ONLY") 4863 this = self._parse_table(schema=True) 4864 4865 if self._next: 4866 self._advance() 4867 4868 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4869 if parser: 4870 actions = ensure_list(parser(self)) 4871 4872 if not self._curr: 4873 return self.expression( 4874 exp.AlterTable, 4875 this=this, 4876 exists=exists, 4877 actions=actions, 4878 only=only, 4879 ) 4880 4881 return self._parse_as_command(start) 4882 4883 def _parse_merge(self) -> exp.Merge: 4884 self._match(TokenType.INTO) 4885 target = self._parse_table() 4886 4887 if target and self._match(TokenType.ALIAS, advance=False): 4888 target.set("alias", self._parse_table_alias()) 4889 4890 self._match(TokenType.USING) 4891 using = self._parse_table() 4892 4893 self._match(TokenType.ON) 4894 on = self._parse_conjunction() 4895 4896 whens = [] 4897 while self._match(TokenType.WHEN): 4898 matched = not self._match(TokenType.NOT) 4899 self._match_text_seq("MATCHED") 4900 source = ( 4901 False 4902 if self._match_text_seq("BY", "TARGET") 4903 else self._match_text_seq("BY", "SOURCE") 4904 ) 4905 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4906 4907 self._match(TokenType.THEN) 4908 4909 if self._match(TokenType.INSERT): 4910 _this = self._parse_star() 4911 if _this: 4912 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4913 else: 4914 then = self.expression( 4915 exp.Insert, 4916 this=self._parse_value(), 4917 expression=self._match(TokenType.VALUES) and self._parse_value(), 4918 ) 4919 elif self._match(TokenType.UPDATE): 4920 expressions = self._parse_star() 4921 if expressions: 4922 then = self.expression(exp.Update, expressions=expressions) 4923 else: 4924 then = self.expression( 4925 exp.Update, 4926 expressions=self._match(TokenType.SET) 4927 and self._parse_csv(self._parse_equality), 4928 ) 4929 elif self._match(TokenType.DELETE): 4930 then = self.expression(exp.Var, this=self._prev.text) 4931 else: 4932 then = None 4933 4934 whens.append( 4935 self.expression( 4936 exp.When, 4937 matched=matched, 4938 source=source, 4939 condition=condition, 4940 then=then, 4941 ) 4942 ) 4943 4944 return self.expression( 4945 exp.Merge, 4946 this=target, 4947 using=using, 4948 on=on, 4949 expressions=whens, 4950 ) 4951 4952 def _parse_show(self) -> t.Optional[exp.Expression]: 4953 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4954 if parser: 4955 return parser(self) 4956 return self._parse_as_command(self._prev) 4957 4958 def _parse_set_item_assignment( 4959 self, kind: t.Optional[str] = None 4960 ) -> t.Optional[exp.Expression]: 4961 index = self._index 4962 4963 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4964 return self._parse_set_transaction(global_=kind == "GLOBAL") 4965 4966 left = self._parse_primary() or self._parse_id_var() 4967 assignment_delimiter = self._match_texts(("=", "TO")) 4968 4969 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 4970 self._retreat(index) 4971 return None 4972 4973 right = self._parse_statement() or self._parse_id_var() 4974 this = self.expression(exp.EQ, this=left, expression=right) 4975 4976 return self.expression(exp.SetItem, this=this, kind=kind) 4977 4978 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4979 self._match_text_seq("TRANSACTION") 4980 characteristics = self._parse_csv( 4981 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4982 ) 4983 return self.expression( 4984 exp.SetItem, 4985 expressions=characteristics, 4986 kind="TRANSACTION", 4987 **{"global": global_}, # type: ignore 4988 ) 4989 4990 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4991 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4992 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4993 4994 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4995 index = self._index 4996 set_ = self.expression( 4997 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4998 ) 4999 5000 if self._curr: 5001 self._retreat(index) 5002 return self._parse_as_command(self._prev) 5003 5004 return set_ 5005 5006 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5007 for option in options: 5008 if self._match_text_seq(*option.split(" ")): 5009 return exp.var(option) 5010 return None 5011 5012 def _parse_as_command(self, start: Token) -> exp.Command: 5013 while self._curr: 5014 self._advance() 5015 text = self._find_sql(start, self._prev) 5016 size = len(start.text) 5017 return exp.Command(this=text[:size], expression=text[size:]) 5018 5019 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5020 settings = [] 5021 5022 self._match_l_paren() 5023 kind = self._parse_id_var() 5024 5025 if self._match(TokenType.L_PAREN): 5026 while True: 5027 key = self._parse_id_var() 5028 value = self._parse_primary() 5029 5030 if not key and value is None: 5031 break 5032 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5033 self._match(TokenType.R_PAREN) 5034 5035 self._match_r_paren() 5036 5037 return self.expression( 5038 exp.DictProperty, 5039 this=this, 5040 kind=kind.this if kind else None, 5041 settings=settings, 5042 ) 5043 5044 def _parse_dict_range(self, this: str) -> exp.DictRange: 5045 self._match_l_paren() 5046 has_min = self._match_text_seq("MIN") 5047 if has_min: 5048 min = self._parse_var() or self._parse_primary() 5049 self._match_text_seq("MAX") 5050 max = self._parse_var() or self._parse_primary() 5051 else: 5052 max = self._parse_var() or self._parse_primary() 5053 min = exp.Literal.number(0) 5054 self._match_r_paren() 5055 return self.expression(exp.DictRange, this=this, min=min, max=max) 5056 5057 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5058 index = self._index 5059 expression = self._parse_column() 5060 if not self._match(TokenType.IN): 5061 self._retreat(index - 1) 5062 return None 5063 iterator = self._parse_column() 5064 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5065 return self.expression( 5066 exp.Comprehension, 5067 this=this, 5068 expression=expression, 5069 iterator=iterator, 5070 condition=condition, 5071 ) 5072 5073 def _find_parser( 5074 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5075 ) -> t.Optional[t.Callable]: 5076 if not self._curr: 5077 return None 5078 5079 index = self._index 5080 this = [] 5081 while True: 5082 # The current token might be multiple words 5083 curr = self._curr.text.upper() 5084 key = curr.split(" ") 5085 this.append(curr) 5086 5087 self._advance() 5088 result, trie = in_trie(trie, key) 5089 if result == TrieResult.FAILED: 5090 break 5091 5092 if result == TrieResult.EXISTS: 5093 subparser = parsers[" ".join(this)] 5094 return subparser 5095 5096 self._retreat(index) 5097 return None 5098 5099 def _match(self, token_type, advance=True, expression=None): 5100 if not self._curr: 5101 return None 5102 5103 if self._curr.token_type == token_type: 5104 if advance: 5105 self._advance() 5106 self._add_comments(expression) 5107 return True 5108 5109 return None 5110 5111 def _match_set(self, types, advance=True): 5112 if not self._curr: 5113 return None 5114 5115 if self._curr.token_type in types: 5116 if advance: 5117 self._advance() 5118 return True 5119 5120 return None 5121 5122 def _match_pair(self, token_type_a, token_type_b, advance=True): 5123 if not self._curr or not self._next: 5124 return None 5125 5126 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5127 if advance: 5128 self._advance(2) 5129 return True 5130 5131 return None 5132 5133 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5134 if not self._match(TokenType.L_PAREN, expression=expression): 5135 self.raise_error("Expecting (") 5136 5137 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5138 if not self._match(TokenType.R_PAREN, expression=expression): 5139 self.raise_error("Expecting )") 5140 5141 def _match_texts(self, texts, advance=True): 5142 if self._curr and self._curr.text.upper() in texts: 5143 if advance: 5144 self._advance() 5145 return True 5146 return False 5147 5148 def _match_text_seq(self, *texts, advance=True): 5149 index = self._index 5150 for text in texts: 5151 if self._curr and self._curr.text.upper() == text: 5152 self._advance() 5153 else: 5154 self._retreat(index) 5155 return False 5156 5157 if not advance: 5158 self._retreat(index) 5159 5160 return True 5161 5162 @t.overload 5163 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5164 ... 5165 5166 @t.overload 5167 def _replace_columns_with_dots( 5168 self, this: t.Optional[exp.Expression] 5169 ) -> t.Optional[exp.Expression]: 5170 ... 5171 5172 def _replace_columns_with_dots(self, this): 5173 if isinstance(this, exp.Dot): 5174 exp.replace_children(this, self._replace_columns_with_dots) 5175 elif isinstance(this, exp.Column): 5176 exp.replace_children(this, self._replace_columns_with_dots) 5177 table = this.args.get("table") 5178 this = ( 5179 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5180 ) 5181 5182 return this 5183 5184 def _replace_lambda( 5185 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5186 ) -> t.Optional[exp.Expression]: 5187 if not node: 5188 return node 5189 5190 for column in node.find_all(exp.Column): 5191 if column.parts[0].name in lambda_variables: 5192 dot_or_id = column.to_dot() if column.table else column.this 5193 parent = column.parent 5194 5195 while isinstance(parent, exp.Dot): 5196 if not isinstance(parent.parent, exp.Dot): 5197 parent.replace(dot_or_id) 5198 break 5199 parent = parent.parent 5200 else: 5201 if column is node: 5202 node = dot_or_id 5203 else: 5204 column.replace(dot_or_id) 5205 return node 5206 5207 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5208 return [ 5209 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5210 for value in values 5211 if value 5212 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 } 220 221 SUBQUERY_PREDICATES = { 222 TokenType.ANY: exp.Any, 223 TokenType.ALL: exp.All, 224 TokenType.EXISTS: exp.Exists, 225 TokenType.SOME: exp.Any, 226 } 227 228 RESERVED_KEYWORDS = { 229 *Tokenizer.SINGLE_TOKENS.values(), 230 TokenType.SELECT, 231 } 232 233 DB_CREATABLES = { 234 TokenType.DATABASE, 235 TokenType.SCHEMA, 236 TokenType.TABLE, 237 TokenType.VIEW, 238 TokenType.DICTIONARY, 239 } 240 241 CREATABLES = { 242 TokenType.COLUMN, 243 TokenType.FUNCTION, 244 TokenType.INDEX, 245 TokenType.PROCEDURE, 246 *DB_CREATABLES, 247 } 248 249 # Tokens that can represent identifiers 250 ID_VAR_TOKENS = { 251 TokenType.VAR, 252 TokenType.ANTI, 253 TokenType.APPLY, 254 TokenType.ASC, 255 TokenType.AUTO_INCREMENT, 256 TokenType.BEGIN, 257 TokenType.CACHE, 258 TokenType.CASE, 259 TokenType.COLLATE, 260 TokenType.COMMAND, 261 TokenType.COMMENT, 262 TokenType.COMMIT, 263 TokenType.CONSTRAINT, 264 TokenType.DEFAULT, 265 TokenType.DELETE, 266 TokenType.DESC, 267 TokenType.DESCRIBE, 268 TokenType.DICTIONARY, 269 TokenType.DIV, 270 TokenType.END, 271 TokenType.EXECUTE, 272 TokenType.ESCAPE, 273 TokenType.FALSE, 274 TokenType.FIRST, 275 TokenType.FILTER, 276 TokenType.FORMAT, 277 TokenType.FULL, 278 TokenType.IS, 279 TokenType.ISNULL, 280 TokenType.INTERVAL, 281 TokenType.KEEP, 282 TokenType.KILL, 283 TokenType.LEFT, 284 TokenType.LOAD, 285 TokenType.MERGE, 286 TokenType.NATURAL, 287 TokenType.NEXT, 288 TokenType.OFFSET, 289 TokenType.ORDINALITY, 290 TokenType.OVERLAPS, 291 TokenType.OVERWRITE, 292 TokenType.PARTITION, 293 TokenType.PERCENT, 294 TokenType.PIVOT, 295 TokenType.PRAGMA, 296 TokenType.RANGE, 297 TokenType.REFERENCES, 298 TokenType.RIGHT, 299 TokenType.ROW, 300 TokenType.ROWS, 301 TokenType.SEMI, 302 TokenType.SET, 303 TokenType.SETTINGS, 304 TokenType.SHOW, 305 TokenType.TEMPORARY, 306 TokenType.TOP, 307 TokenType.TRUE, 308 TokenType.UNIQUE, 309 TokenType.UNPIVOT, 310 TokenType.UPDATE, 311 TokenType.VOLATILE, 312 TokenType.WINDOW, 313 *CREATABLES, 314 *SUBQUERY_PREDICATES, 315 *TYPE_TOKENS, 316 *NO_PAREN_FUNCTIONS, 317 } 318 319 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 320 321 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 322 TokenType.ANTI, 323 TokenType.APPLY, 324 TokenType.ASOF, 325 TokenType.FULL, 326 TokenType.LEFT, 327 TokenType.LOCK, 328 TokenType.NATURAL, 329 TokenType.OFFSET, 330 TokenType.RIGHT, 331 TokenType.SEMI, 332 TokenType.WINDOW, 333 } 334 335 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 336 337 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 338 339 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 340 341 FUNC_TOKENS = { 342 TokenType.COMMAND, 343 TokenType.CURRENT_DATE, 344 TokenType.CURRENT_DATETIME, 345 TokenType.CURRENT_TIMESTAMP, 346 TokenType.CURRENT_TIME, 347 TokenType.CURRENT_USER, 348 TokenType.FILTER, 349 TokenType.FIRST, 350 TokenType.FORMAT, 351 TokenType.GLOB, 352 TokenType.IDENTIFIER, 353 TokenType.INDEX, 354 TokenType.ISNULL, 355 TokenType.ILIKE, 356 TokenType.INSERT, 357 TokenType.LIKE, 358 TokenType.MERGE, 359 TokenType.OFFSET, 360 TokenType.PRIMARY_KEY, 361 TokenType.RANGE, 362 TokenType.REPLACE, 363 TokenType.RLIKE, 364 TokenType.ROW, 365 TokenType.UNNEST, 366 TokenType.VAR, 367 TokenType.LEFT, 368 TokenType.RIGHT, 369 TokenType.DATE, 370 TokenType.DATETIME, 371 TokenType.TABLE, 372 TokenType.TIMESTAMP, 373 TokenType.TIMESTAMPTZ, 374 TokenType.WINDOW, 375 TokenType.XOR, 376 *TYPE_TOKENS, 377 *SUBQUERY_PREDICATES, 378 } 379 380 CONJUNCTION = { 381 TokenType.AND: exp.And, 382 TokenType.OR: exp.Or, 383 } 384 385 EQUALITY = { 386 TokenType.EQ: exp.EQ, 387 TokenType.NEQ: exp.NEQ, 388 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 389 } 390 391 COMPARISON = { 392 TokenType.GT: exp.GT, 393 TokenType.GTE: exp.GTE, 394 TokenType.LT: exp.LT, 395 TokenType.LTE: exp.LTE, 396 } 397 398 BITWISE = { 399 TokenType.AMP: exp.BitwiseAnd, 400 TokenType.CARET: exp.BitwiseXor, 401 TokenType.PIPE: exp.BitwiseOr, 402 TokenType.DPIPE: exp.DPipe, 403 } 404 405 TERM = { 406 TokenType.DASH: exp.Sub, 407 TokenType.PLUS: exp.Add, 408 TokenType.MOD: exp.Mod, 409 TokenType.COLLATE: exp.Collate, 410 } 411 412 FACTOR = { 413 TokenType.DIV: exp.IntDiv, 414 TokenType.LR_ARROW: exp.Distance, 415 TokenType.SLASH: exp.Div, 416 TokenType.STAR: exp.Mul, 417 } 418 419 TIMES = { 420 TokenType.TIME, 421 TokenType.TIMETZ, 422 } 423 424 TIMESTAMPS = { 425 TokenType.TIMESTAMP, 426 TokenType.TIMESTAMPTZ, 427 TokenType.TIMESTAMPLTZ, 428 *TIMES, 429 } 430 431 SET_OPERATIONS = { 432 TokenType.UNION, 433 TokenType.INTERSECT, 434 TokenType.EXCEPT, 435 } 436 437 JOIN_METHODS = { 438 TokenType.NATURAL, 439 TokenType.ASOF, 440 } 441 442 JOIN_SIDES = { 443 TokenType.LEFT, 444 TokenType.RIGHT, 445 TokenType.FULL, 446 } 447 448 JOIN_KINDS = { 449 TokenType.INNER, 450 TokenType.OUTER, 451 TokenType.CROSS, 452 TokenType.SEMI, 453 TokenType.ANTI, 454 } 455 456 JOIN_HINTS: t.Set[str] = set() 457 458 LAMBDAS = { 459 TokenType.ARROW: lambda self, expressions: self.expression( 460 exp.Lambda, 461 this=self._replace_lambda( 462 self._parse_conjunction(), 463 {node.name for node in expressions}, 464 ), 465 expressions=expressions, 466 ), 467 TokenType.FARROW: lambda self, expressions: self.expression( 468 exp.Kwarg, 469 this=exp.var(expressions[0].name), 470 expression=self._parse_conjunction(), 471 ), 472 } 473 474 COLUMN_OPERATORS = { 475 TokenType.DOT: None, 476 TokenType.DCOLON: lambda self, this, to: self.expression( 477 exp.Cast if self.STRICT_CAST else exp.TryCast, 478 this=this, 479 to=to, 480 ), 481 TokenType.ARROW: lambda self, this, path: self.expression( 482 exp.JSONExtract, 483 this=this, 484 expression=path, 485 ), 486 TokenType.DARROW: lambda self, this, path: self.expression( 487 exp.JSONExtractScalar, 488 this=this, 489 expression=path, 490 ), 491 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 492 exp.JSONBExtract, 493 this=this, 494 expression=path, 495 ), 496 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 497 exp.JSONBExtractScalar, 498 this=this, 499 expression=path, 500 ), 501 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 502 exp.JSONBContains, 503 this=this, 504 expression=key, 505 ), 506 } 507 508 EXPRESSION_PARSERS = { 509 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 510 exp.Column: lambda self: self._parse_column(), 511 exp.Condition: lambda self: self._parse_conjunction(), 512 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 513 exp.Expression: lambda self: self._parse_statement(), 514 exp.From: lambda self: self._parse_from(), 515 exp.Group: lambda self: self._parse_group(), 516 exp.Having: lambda self: self._parse_having(), 517 exp.Identifier: lambda self: self._parse_id_var(), 518 exp.Join: lambda self: self._parse_join(), 519 exp.Lambda: lambda self: self._parse_lambda(), 520 exp.Lateral: lambda self: self._parse_lateral(), 521 exp.Limit: lambda self: self._parse_limit(), 522 exp.Offset: lambda self: self._parse_offset(), 523 exp.Order: lambda self: self._parse_order(), 524 exp.Ordered: lambda self: self._parse_ordered(), 525 exp.Properties: lambda self: self._parse_properties(), 526 exp.Qualify: lambda self: self._parse_qualify(), 527 exp.Returning: lambda self: self._parse_returning(), 528 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 529 exp.Table: lambda self: self._parse_table_parts(), 530 exp.TableAlias: lambda self: self._parse_table_alias(), 531 exp.Where: lambda self: self._parse_where(), 532 exp.Window: lambda self: self._parse_named_window(), 533 exp.With: lambda self: self._parse_with(), 534 "JOIN_TYPE": lambda self: self._parse_join_parts(), 535 } 536 537 STATEMENT_PARSERS = { 538 TokenType.ALTER: lambda self: self._parse_alter(), 539 TokenType.BEGIN: lambda self: self._parse_transaction(), 540 TokenType.CACHE: lambda self: self._parse_cache(), 541 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 542 TokenType.COMMENT: lambda self: self._parse_comment(), 543 TokenType.CREATE: lambda self: self._parse_create(), 544 TokenType.DELETE: lambda self: self._parse_delete(), 545 TokenType.DESC: lambda self: self._parse_describe(), 546 TokenType.DESCRIBE: lambda self: self._parse_describe(), 547 TokenType.DROP: lambda self: self._parse_drop(), 548 TokenType.INSERT: lambda self: self._parse_insert(), 549 TokenType.KILL: lambda self: self._parse_kill(), 550 TokenType.LOAD: lambda self: self._parse_load(), 551 TokenType.MERGE: lambda self: self._parse_merge(), 552 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 553 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 554 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 555 TokenType.SET: lambda self: self._parse_set(), 556 TokenType.UNCACHE: lambda self: self._parse_uncache(), 557 TokenType.UPDATE: lambda self: self._parse_update(), 558 TokenType.USE: lambda self: self.expression( 559 exp.Use, 560 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 561 and exp.var(self._prev.text), 562 this=self._parse_table(schema=False), 563 ), 564 } 565 566 UNARY_PARSERS = { 567 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 568 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 569 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 570 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 571 } 572 573 PRIMARY_PARSERS = { 574 TokenType.STRING: lambda self, token: self.expression( 575 exp.Literal, this=token.text, is_string=True 576 ), 577 TokenType.NUMBER: lambda self, token: self.expression( 578 exp.Literal, this=token.text, is_string=False 579 ), 580 TokenType.STAR: lambda self, _: self.expression( 581 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 582 ), 583 TokenType.NULL: lambda self, _: self.expression(exp.Null), 584 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 585 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 586 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 587 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 588 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 589 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 590 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 591 exp.National, this=token.text 592 ), 593 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 594 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 595 } 596 597 PLACEHOLDER_PARSERS = { 598 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 599 TokenType.PARAMETER: lambda self: self._parse_parameter(), 600 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 601 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 602 else None, 603 } 604 605 RANGE_PARSERS = { 606 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 607 TokenType.GLOB: binary_range_parser(exp.Glob), 608 TokenType.ILIKE: binary_range_parser(exp.ILike), 609 TokenType.IN: lambda self, this: self._parse_in(this), 610 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 611 TokenType.IS: lambda self, this: self._parse_is(this), 612 TokenType.LIKE: binary_range_parser(exp.Like), 613 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 614 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 615 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 616 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 617 } 618 619 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 620 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 621 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 622 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 623 "CHARACTER SET": lambda self: self._parse_character_set(), 624 "CHECKSUM": lambda self: self._parse_checksum(), 625 "CLUSTER BY": lambda self: self._parse_cluster(), 626 "CLUSTERED": lambda self: self._parse_clustered_by(), 627 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 628 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 629 "COPY": lambda self: self._parse_copy_property(), 630 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 631 "DEFINER": lambda self: self._parse_definer(), 632 "DETERMINISTIC": lambda self: self.expression( 633 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 634 ), 635 "DISTKEY": lambda self: self._parse_distkey(), 636 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 637 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 638 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 639 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 640 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 641 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 642 "FREESPACE": lambda self: self._parse_freespace(), 643 "HEAP": lambda self: self.expression(exp.HeapProperty), 644 "IMMUTABLE": lambda self: self.expression( 645 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 646 ), 647 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 648 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 649 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 650 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 651 "LIKE": lambda self: self._parse_create_like(), 652 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 653 "LOCK": lambda self: self._parse_locking(), 654 "LOCKING": lambda self: self._parse_locking(), 655 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 656 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 657 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 658 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 659 "NO": lambda self: self._parse_no_property(), 660 "ON": lambda self: self._parse_on_property(), 661 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 662 "PARTITION BY": lambda self: self._parse_partitioned_by(), 663 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 664 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 665 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 666 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 667 "RETURNS": lambda self: self._parse_returns(), 668 "ROW": lambda self: self._parse_row(), 669 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 670 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 671 "SETTINGS": lambda self: self.expression( 672 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 673 ), 674 "SORTKEY": lambda self: self._parse_sortkey(), 675 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 676 "STABLE": lambda self: self.expression( 677 exp.StabilityProperty, this=exp.Literal.string("STABLE") 678 ), 679 "STORED": lambda self: self._parse_stored(), 680 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 681 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 682 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 683 "TO": lambda self: self._parse_to_table(), 684 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 685 "TTL": lambda self: self._parse_ttl(), 686 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 687 "VOLATILE": lambda self: self._parse_volatile_property(), 688 "WITH": lambda self: self._parse_with_property(), 689 } 690 691 CONSTRAINT_PARSERS = { 692 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 693 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 694 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 695 "CHARACTER SET": lambda self: self.expression( 696 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 697 ), 698 "CHECK": lambda self: self.expression( 699 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 700 ), 701 "COLLATE": lambda self: self.expression( 702 exp.CollateColumnConstraint, this=self._parse_var() 703 ), 704 "COMMENT": lambda self: self.expression( 705 exp.CommentColumnConstraint, this=self._parse_string() 706 ), 707 "COMPRESS": lambda self: self._parse_compress(), 708 "CLUSTERED": lambda self: self.expression( 709 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 710 ), 711 "NONCLUSTERED": lambda self: self.expression( 712 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 713 ), 714 "DEFAULT": lambda self: self.expression( 715 exp.DefaultColumnConstraint, this=self._parse_bitwise() 716 ), 717 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 718 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 719 "FORMAT": lambda self: self.expression( 720 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 721 ), 722 "GENERATED": lambda self: self._parse_generated_as_identity(), 723 "IDENTITY": lambda self: self._parse_auto_increment(), 724 "INLINE": lambda self: self._parse_inline(), 725 "LIKE": lambda self: self._parse_create_like(), 726 "NOT": lambda self: self._parse_not_constraint(), 727 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 728 "ON": lambda self: ( 729 self._match(TokenType.UPDATE) 730 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 731 ) 732 or self.expression(exp.OnProperty, this=self._parse_id_var()), 733 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 734 "PRIMARY KEY": lambda self: self._parse_primary_key(), 735 "REFERENCES": lambda self: self._parse_references(match=False), 736 "TITLE": lambda self: self.expression( 737 exp.TitleColumnConstraint, this=self._parse_var_or_string() 738 ), 739 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 740 "UNIQUE": lambda self: self._parse_unique(), 741 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 742 "WITH": lambda self: self.expression( 743 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 744 ), 745 } 746 747 ALTER_PARSERS = { 748 "ADD": lambda self: self._parse_alter_table_add(), 749 "ALTER": lambda self: self._parse_alter_table_alter(), 750 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 751 "DROP": lambda self: self._parse_alter_table_drop(), 752 "RENAME": lambda self: self._parse_alter_table_rename(), 753 } 754 755 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 756 757 NO_PAREN_FUNCTION_PARSERS = { 758 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 759 "CASE": lambda self: self._parse_case(), 760 "IF": lambda self: self._parse_if(), 761 "NEXT": lambda self: self._parse_next_value_for(), 762 } 763 764 INVALID_FUNC_NAME_TOKENS = { 765 TokenType.IDENTIFIER, 766 TokenType.STRING, 767 } 768 769 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 770 771 FUNCTION_PARSERS = { 772 "ANY_VALUE": lambda self: self._parse_any_value(), 773 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 774 "CONCAT": lambda self: self._parse_concat(), 775 "CONCAT_WS": lambda self: self._parse_concat_ws(), 776 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 777 "DECODE": lambda self: self._parse_decode(), 778 "EXTRACT": lambda self: self._parse_extract(), 779 "JSON_OBJECT": lambda self: self._parse_json_object(), 780 "LOG": lambda self: self._parse_logarithm(), 781 "MATCH": lambda self: self._parse_match_against(), 782 "OPENJSON": lambda self: self._parse_open_json(), 783 "POSITION": lambda self: self._parse_position(), 784 "SAFE_CAST": lambda self: self._parse_cast(False), 785 "STRING_AGG": lambda self: self._parse_string_agg(), 786 "SUBSTRING": lambda self: self._parse_substring(), 787 "TRIM": lambda self: self._parse_trim(), 788 "TRY_CAST": lambda self: self._parse_cast(False), 789 "TRY_CONVERT": lambda self: self._parse_convert(False), 790 } 791 792 QUERY_MODIFIER_PARSERS = { 793 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 794 TokenType.WHERE: lambda self: ("where", self._parse_where()), 795 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 796 TokenType.HAVING: lambda self: ("having", self._parse_having()), 797 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 798 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 799 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 800 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 801 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 802 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 803 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 804 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 805 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 806 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 807 TokenType.CLUSTER_BY: lambda self: ( 808 "cluster", 809 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 810 ), 811 TokenType.DISTRIBUTE_BY: lambda self: ( 812 "distribute", 813 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 814 ), 815 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 816 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 817 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 818 } 819 820 SET_PARSERS = { 821 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 822 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 823 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 824 "TRANSACTION": lambda self: self._parse_set_transaction(), 825 } 826 827 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 828 829 TYPE_LITERAL_PARSERS = { 830 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 831 } 832 833 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 834 835 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 836 837 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 838 839 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 840 TRANSACTION_CHARACTERISTICS = { 841 "ISOLATION LEVEL REPEATABLE READ", 842 "ISOLATION LEVEL READ COMMITTED", 843 "ISOLATION LEVEL READ UNCOMMITTED", 844 "ISOLATION LEVEL SERIALIZABLE", 845 "READ WRITE", 846 "READ ONLY", 847 } 848 849 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 850 851 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 852 853 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 854 855 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 856 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 857 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 858 859 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 860 861 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 862 863 DISTINCT_TOKENS = {TokenType.DISTINCT} 864 865 NULL_TOKENS = {TokenType.NULL} 866 867 STRICT_CAST = True 868 869 # A NULL arg in CONCAT yields NULL by default 870 CONCAT_NULL_OUTPUTS_STRING = False 871 872 PREFIXED_PIVOT_COLUMNS = False 873 IDENTIFY_PIVOT_STRINGS = False 874 875 LOG_BASE_FIRST = True 876 LOG_DEFAULTS_TO_LN = False 877 878 # Whether or not ADD is present for each column added by ALTER TABLE 879 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 880 881 # Whether or not the table sample clause expects CSV syntax 882 TABLESAMPLE_CSV = False 883 884 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments. 885 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 886 887 __slots__ = ( 888 "error_level", 889 "error_message_context", 890 "max_errors", 891 "sql", 892 "errors", 893 "_tokens", 894 "_index", 895 "_curr", 896 "_next", 897 "_prev", 898 "_prev_comments", 899 "_tokenizer", 900 ) 901 902 # Autofilled 903 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 904 INDEX_OFFSET: int = 0 905 UNNEST_COLUMN_ONLY: bool = False 906 ALIAS_POST_TABLESAMPLE: bool = False 907 STRICT_STRING_CONCAT = False 908 SUPPORTS_USER_DEFINED_TYPES = True 909 NORMALIZE_FUNCTIONS = "upper" 910 NULL_ORDERING: str = "nulls_are_small" 911 SHOW_TRIE: t.Dict = {} 912 SET_TRIE: t.Dict = {} 913 FORMAT_MAPPING: t.Dict[str, str] = {} 914 FORMAT_TRIE: t.Dict = {} 915 TIME_MAPPING: t.Dict[str, str] = {} 916 TIME_TRIE: t.Dict = {} 917 918 def __init__( 919 self, 920 error_level: t.Optional[ErrorLevel] = None, 921 error_message_context: int = 100, 922 max_errors: int = 3, 923 ): 924 self.error_level = error_level or ErrorLevel.IMMEDIATE 925 self.error_message_context = error_message_context 926 self.max_errors = max_errors 927 self._tokenizer = self.TOKENIZER_CLASS() 928 self.reset() 929 930 def reset(self): 931 self.sql = "" 932 self.errors = [] 933 self._tokens = [] 934 self._index = 0 935 self._curr = None 936 self._next = None 937 self._prev = None 938 self._prev_comments = None 939 940 def parse( 941 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 942 ) -> t.List[t.Optional[exp.Expression]]: 943 """ 944 Parses a list of tokens and returns a list of syntax trees, one tree 945 per parsed SQL statement. 946 947 Args: 948 raw_tokens: The list of tokens. 949 sql: The original SQL string, used to produce helpful debug messages. 950 951 Returns: 952 The list of the produced syntax trees. 953 """ 954 return self._parse( 955 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 956 ) 957 958 def parse_into( 959 self, 960 expression_types: exp.IntoType, 961 raw_tokens: t.List[Token], 962 sql: t.Optional[str] = None, 963 ) -> t.List[t.Optional[exp.Expression]]: 964 """ 965 Parses a list of tokens into a given Expression type. If a collection of Expression 966 types is given instead, this method will try to parse the token list into each one 967 of them, stopping at the first for which the parsing succeeds. 968 969 Args: 970 expression_types: The expression type(s) to try and parse the token list into. 971 raw_tokens: The list of tokens. 972 sql: The original SQL string, used to produce helpful debug messages. 973 974 Returns: 975 The target Expression. 976 """ 977 errors = [] 978 for expression_type in ensure_list(expression_types): 979 parser = self.EXPRESSION_PARSERS.get(expression_type) 980 if not parser: 981 raise TypeError(f"No parser registered for {expression_type}") 982 983 try: 984 return self._parse(parser, raw_tokens, sql) 985 except ParseError as e: 986 e.errors[0]["into_expression"] = expression_type 987 errors.append(e) 988 989 raise ParseError( 990 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 991 errors=merge_errors(errors), 992 ) from errors[-1] 993 994 def _parse( 995 self, 996 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 997 raw_tokens: t.List[Token], 998 sql: t.Optional[str] = None, 999 ) -> t.List[t.Optional[exp.Expression]]: 1000 self.reset() 1001 self.sql = sql or "" 1002 1003 total = len(raw_tokens) 1004 chunks: t.List[t.List[Token]] = [[]] 1005 1006 for i, token in enumerate(raw_tokens): 1007 if token.token_type == TokenType.SEMICOLON: 1008 if i < total - 1: 1009 chunks.append([]) 1010 else: 1011 chunks[-1].append(token) 1012 1013 expressions = [] 1014 1015 for tokens in chunks: 1016 self._index = -1 1017 self._tokens = tokens 1018 self._advance() 1019 1020 expressions.append(parse_method(self)) 1021 1022 if self._index < len(self._tokens): 1023 self.raise_error("Invalid expression / Unexpected token") 1024 1025 self.check_errors() 1026 1027 return expressions 1028 1029 def check_errors(self) -> None: 1030 """Logs or raises any found errors, depending on the chosen error level setting.""" 1031 if self.error_level == ErrorLevel.WARN: 1032 for error in self.errors: 1033 logger.error(str(error)) 1034 elif self.error_level == ErrorLevel.RAISE and self.errors: 1035 raise ParseError( 1036 concat_messages(self.errors, self.max_errors), 1037 errors=merge_errors(self.errors), 1038 ) 1039 1040 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1041 """ 1042 Appends an error in the list of recorded errors or raises it, depending on the chosen 1043 error level setting. 1044 """ 1045 token = token or self._curr or self._prev or Token.string("") 1046 start = token.start 1047 end = token.end + 1 1048 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1049 highlight = self.sql[start:end] 1050 end_context = self.sql[end : end + self.error_message_context] 1051 1052 error = ParseError.new( 1053 f"{message}. Line {token.line}, Col: {token.col}.\n" 1054 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1055 description=message, 1056 line=token.line, 1057 col=token.col, 1058 start_context=start_context, 1059 highlight=highlight, 1060 end_context=end_context, 1061 ) 1062 1063 if self.error_level == ErrorLevel.IMMEDIATE: 1064 raise error 1065 1066 self.errors.append(error) 1067 1068 def expression( 1069 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1070 ) -> E: 1071 """ 1072 Creates a new, validated Expression. 1073 1074 Args: 1075 exp_class: The expression class to instantiate. 1076 comments: An optional list of comments to attach to the expression. 1077 kwargs: The arguments to set for the expression along with their respective values. 1078 1079 Returns: 1080 The target expression. 1081 """ 1082 instance = exp_class(**kwargs) 1083 instance.add_comments(comments) if comments else self._add_comments(instance) 1084 return self.validate_expression(instance) 1085 1086 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1087 if expression and self._prev_comments: 1088 expression.add_comments(self._prev_comments) 1089 self._prev_comments = None 1090 1091 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1092 """ 1093 Validates an Expression, making sure that all its mandatory arguments are set. 1094 1095 Args: 1096 expression: The expression to validate. 1097 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1098 1099 Returns: 1100 The validated expression. 1101 """ 1102 if self.error_level != ErrorLevel.IGNORE: 1103 for error_message in expression.error_messages(args): 1104 self.raise_error(error_message) 1105 1106 return expression 1107 1108 def _find_sql(self, start: Token, end: Token) -> str: 1109 return self.sql[start.start : end.end + 1] 1110 1111 def _advance(self, times: int = 1) -> None: 1112 self._index += times 1113 self._curr = seq_get(self._tokens, self._index) 1114 self._next = seq_get(self._tokens, self._index + 1) 1115 1116 if self._index > 0: 1117 self._prev = self._tokens[self._index - 1] 1118 self._prev_comments = self._prev.comments 1119 else: 1120 self._prev = None 1121 self._prev_comments = None 1122 1123 def _retreat(self, index: int) -> None: 1124 if index != self._index: 1125 self._advance(index - self._index) 1126 1127 def _parse_command(self) -> exp.Command: 1128 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1129 1130 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1131 start = self._prev 1132 exists = self._parse_exists() if allow_exists else None 1133 1134 self._match(TokenType.ON) 1135 1136 kind = self._match_set(self.CREATABLES) and self._prev 1137 if not kind: 1138 return self._parse_as_command(start) 1139 1140 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1141 this = self._parse_user_defined_function(kind=kind.token_type) 1142 elif kind.token_type == TokenType.TABLE: 1143 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1144 elif kind.token_type == TokenType.COLUMN: 1145 this = self._parse_column() 1146 else: 1147 this = self._parse_id_var() 1148 1149 self._match(TokenType.IS) 1150 1151 return self.expression( 1152 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1153 ) 1154 1155 def _parse_to_table( 1156 self, 1157 ) -> exp.ToTableProperty: 1158 table = self._parse_table_parts(schema=True) 1159 return self.expression(exp.ToTableProperty, this=table) 1160 1161 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1162 def _parse_ttl(self) -> exp.Expression: 1163 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1164 this = self._parse_bitwise() 1165 1166 if self._match_text_seq("DELETE"): 1167 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1168 if self._match_text_seq("RECOMPRESS"): 1169 return self.expression( 1170 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1171 ) 1172 if self._match_text_seq("TO", "DISK"): 1173 return self.expression( 1174 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1175 ) 1176 if self._match_text_seq("TO", "VOLUME"): 1177 return self.expression( 1178 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1179 ) 1180 1181 return this 1182 1183 expressions = self._parse_csv(_parse_ttl_action) 1184 where = self._parse_where() 1185 group = self._parse_group() 1186 1187 aggregates = None 1188 if group and self._match(TokenType.SET): 1189 aggregates = self._parse_csv(self._parse_set_item) 1190 1191 return self.expression( 1192 exp.MergeTreeTTL, 1193 expressions=expressions, 1194 where=where, 1195 group=group, 1196 aggregates=aggregates, 1197 ) 1198 1199 def _parse_statement(self) -> t.Optional[exp.Expression]: 1200 if self._curr is None: 1201 return None 1202 1203 if self._match_set(self.STATEMENT_PARSERS): 1204 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1205 1206 if self._match_set(Tokenizer.COMMANDS): 1207 return self._parse_command() 1208 1209 expression = self._parse_expression() 1210 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1211 return self._parse_query_modifiers(expression) 1212 1213 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1214 start = self._prev 1215 temporary = self._match(TokenType.TEMPORARY) 1216 materialized = self._match_text_seq("MATERIALIZED") 1217 1218 kind = self._match_set(self.CREATABLES) and self._prev.text 1219 if not kind: 1220 return self._parse_as_command(start) 1221 1222 return self.expression( 1223 exp.Drop, 1224 comments=start.comments, 1225 exists=exists or self._parse_exists(), 1226 this=self._parse_table(schema=True), 1227 kind=kind, 1228 temporary=temporary, 1229 materialized=materialized, 1230 cascade=self._match_text_seq("CASCADE"), 1231 constraints=self._match_text_seq("CONSTRAINTS"), 1232 purge=self._match_text_seq("PURGE"), 1233 ) 1234 1235 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1236 return ( 1237 self._match_text_seq("IF") 1238 and (not not_ or self._match(TokenType.NOT)) 1239 and self._match(TokenType.EXISTS) 1240 ) 1241 1242 def _parse_create(self) -> exp.Create | exp.Command: 1243 # Note: this can't be None because we've matched a statement parser 1244 start = self._prev 1245 comments = self._prev_comments 1246 1247 replace = start.text.upper() == "REPLACE" or self._match_pair( 1248 TokenType.OR, TokenType.REPLACE 1249 ) 1250 unique = self._match(TokenType.UNIQUE) 1251 1252 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1253 self._advance() 1254 1255 properties = None 1256 create_token = self._match_set(self.CREATABLES) and self._prev 1257 1258 if not create_token: 1259 # exp.Properties.Location.POST_CREATE 1260 properties = self._parse_properties() 1261 create_token = self._match_set(self.CREATABLES) and self._prev 1262 1263 if not properties or not create_token: 1264 return self._parse_as_command(start) 1265 1266 exists = self._parse_exists(not_=True) 1267 this = None 1268 expression: t.Optional[exp.Expression] = None 1269 indexes = None 1270 no_schema_binding = None 1271 begin = None 1272 clone = None 1273 1274 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1275 nonlocal properties 1276 if properties and temp_props: 1277 properties.expressions.extend(temp_props.expressions) 1278 elif temp_props: 1279 properties = temp_props 1280 1281 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1282 this = self._parse_user_defined_function(kind=create_token.token_type) 1283 1284 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1285 extend_props(self._parse_properties()) 1286 1287 self._match(TokenType.ALIAS) 1288 1289 if self._match(TokenType.COMMAND): 1290 expression = self._parse_as_command(self._prev) 1291 else: 1292 begin = self._match(TokenType.BEGIN) 1293 return_ = self._match_text_seq("RETURN") 1294 1295 if self._match(TokenType.STRING, advance=False): 1296 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1297 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1298 expression = self._parse_string() 1299 extend_props(self._parse_properties()) 1300 else: 1301 expression = self._parse_statement() 1302 1303 if return_: 1304 expression = self.expression(exp.Return, this=expression) 1305 elif create_token.token_type == TokenType.INDEX: 1306 this = self._parse_index(index=self._parse_id_var()) 1307 elif create_token.token_type in self.DB_CREATABLES: 1308 table_parts = self._parse_table_parts(schema=True) 1309 1310 # exp.Properties.Location.POST_NAME 1311 self._match(TokenType.COMMA) 1312 extend_props(self._parse_properties(before=True)) 1313 1314 this = self._parse_schema(this=table_parts) 1315 1316 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1317 extend_props(self._parse_properties()) 1318 1319 self._match(TokenType.ALIAS) 1320 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1321 # exp.Properties.Location.POST_ALIAS 1322 extend_props(self._parse_properties()) 1323 1324 expression = self._parse_ddl_select() 1325 1326 if create_token.token_type == TokenType.TABLE: 1327 # exp.Properties.Location.POST_EXPRESSION 1328 extend_props(self._parse_properties()) 1329 1330 indexes = [] 1331 while True: 1332 index = self._parse_index() 1333 1334 # exp.Properties.Location.POST_INDEX 1335 extend_props(self._parse_properties()) 1336 1337 if not index: 1338 break 1339 else: 1340 self._match(TokenType.COMMA) 1341 indexes.append(index) 1342 elif create_token.token_type == TokenType.VIEW: 1343 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1344 no_schema_binding = True 1345 1346 shallow = self._match_text_seq("SHALLOW") 1347 1348 if self._match_text_seq("CLONE"): 1349 clone = self._parse_table(schema=True) 1350 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1351 clone_kind = ( 1352 self._match(TokenType.L_PAREN) 1353 and self._match_texts(self.CLONE_KINDS) 1354 and self._prev.text.upper() 1355 ) 1356 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1357 self._match(TokenType.R_PAREN) 1358 clone = self.expression( 1359 exp.Clone, 1360 this=clone, 1361 when=when, 1362 kind=clone_kind, 1363 shallow=shallow, 1364 expression=clone_expression, 1365 ) 1366 1367 return self.expression( 1368 exp.Create, 1369 comments=comments, 1370 this=this, 1371 kind=create_token.text, 1372 replace=replace, 1373 unique=unique, 1374 expression=expression, 1375 exists=exists, 1376 properties=properties, 1377 indexes=indexes, 1378 no_schema_binding=no_schema_binding, 1379 begin=begin, 1380 clone=clone, 1381 ) 1382 1383 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1384 # only used for teradata currently 1385 self._match(TokenType.COMMA) 1386 1387 kwargs = { 1388 "no": self._match_text_seq("NO"), 1389 "dual": self._match_text_seq("DUAL"), 1390 "before": self._match_text_seq("BEFORE"), 1391 "default": self._match_text_seq("DEFAULT"), 1392 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1393 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1394 "after": self._match_text_seq("AFTER"), 1395 "minimum": self._match_texts(("MIN", "MINIMUM")), 1396 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1397 } 1398 1399 if self._match_texts(self.PROPERTY_PARSERS): 1400 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1401 try: 1402 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1403 except TypeError: 1404 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1405 1406 return None 1407 1408 def _parse_property(self) -> t.Optional[exp.Expression]: 1409 if self._match_texts(self.PROPERTY_PARSERS): 1410 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1411 1412 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1413 return self._parse_character_set(default=True) 1414 1415 if self._match_text_seq("COMPOUND", "SORTKEY"): 1416 return self._parse_sortkey(compound=True) 1417 1418 if self._match_text_seq("SQL", "SECURITY"): 1419 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1420 1421 index = self._index 1422 key = self._parse_column() 1423 1424 if not self._match(TokenType.EQ): 1425 self._retreat(index) 1426 return None 1427 1428 return self.expression( 1429 exp.Property, 1430 this=key.to_dot() if isinstance(key, exp.Column) else key, 1431 value=self._parse_column() or self._parse_var(any_token=True), 1432 ) 1433 1434 def _parse_stored(self) -> exp.FileFormatProperty: 1435 self._match(TokenType.ALIAS) 1436 1437 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1438 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1439 1440 return self.expression( 1441 exp.FileFormatProperty, 1442 this=self.expression( 1443 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1444 ) 1445 if input_format or output_format 1446 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1447 ) 1448 1449 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1450 self._match(TokenType.EQ) 1451 self._match(TokenType.ALIAS) 1452 return self.expression(exp_class, this=self._parse_field()) 1453 1454 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1455 properties = [] 1456 while True: 1457 if before: 1458 prop = self._parse_property_before() 1459 else: 1460 prop = self._parse_property() 1461 1462 if not prop: 1463 break 1464 for p in ensure_list(prop): 1465 properties.append(p) 1466 1467 if properties: 1468 return self.expression(exp.Properties, expressions=properties) 1469 1470 return None 1471 1472 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1473 return self.expression( 1474 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1475 ) 1476 1477 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1478 if self._index >= 2: 1479 pre_volatile_token = self._tokens[self._index - 2] 1480 else: 1481 pre_volatile_token = None 1482 1483 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1484 return exp.VolatileProperty() 1485 1486 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1487 1488 def _parse_with_property( 1489 self, 1490 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1491 if self._match(TokenType.L_PAREN, advance=False): 1492 return self._parse_wrapped_csv(self._parse_property) 1493 1494 if self._match_text_seq("JOURNAL"): 1495 return self._parse_withjournaltable() 1496 1497 if self._match_text_seq("DATA"): 1498 return self._parse_withdata(no=False) 1499 elif self._match_text_seq("NO", "DATA"): 1500 return self._parse_withdata(no=True) 1501 1502 if not self._next: 1503 return None 1504 1505 return self._parse_withisolatedloading() 1506 1507 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1508 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1509 self._match(TokenType.EQ) 1510 1511 user = self._parse_id_var() 1512 self._match(TokenType.PARAMETER) 1513 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1514 1515 if not user or not host: 1516 return None 1517 1518 return exp.DefinerProperty(this=f"{user}@{host}") 1519 1520 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1521 self._match(TokenType.TABLE) 1522 self._match(TokenType.EQ) 1523 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1524 1525 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1526 return self.expression(exp.LogProperty, no=no) 1527 1528 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1529 return self.expression(exp.JournalProperty, **kwargs) 1530 1531 def _parse_checksum(self) -> exp.ChecksumProperty: 1532 self._match(TokenType.EQ) 1533 1534 on = None 1535 if self._match(TokenType.ON): 1536 on = True 1537 elif self._match_text_seq("OFF"): 1538 on = False 1539 1540 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1541 1542 def _parse_cluster(self) -> exp.Cluster: 1543 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1544 1545 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1546 self._match_text_seq("BY") 1547 1548 self._match_l_paren() 1549 expressions = self._parse_csv(self._parse_column) 1550 self._match_r_paren() 1551 1552 if self._match_text_seq("SORTED", "BY"): 1553 self._match_l_paren() 1554 sorted_by = self._parse_csv(self._parse_ordered) 1555 self._match_r_paren() 1556 else: 1557 sorted_by = None 1558 1559 self._match(TokenType.INTO) 1560 buckets = self._parse_number() 1561 self._match_text_seq("BUCKETS") 1562 1563 return self.expression( 1564 exp.ClusteredByProperty, 1565 expressions=expressions, 1566 sorted_by=sorted_by, 1567 buckets=buckets, 1568 ) 1569 1570 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1571 if not self._match_text_seq("GRANTS"): 1572 self._retreat(self._index - 1) 1573 return None 1574 1575 return self.expression(exp.CopyGrantsProperty) 1576 1577 def _parse_freespace(self) -> exp.FreespaceProperty: 1578 self._match(TokenType.EQ) 1579 return self.expression( 1580 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1581 ) 1582 1583 def _parse_mergeblockratio( 1584 self, no: bool = False, default: bool = False 1585 ) -> exp.MergeBlockRatioProperty: 1586 if self._match(TokenType.EQ): 1587 return self.expression( 1588 exp.MergeBlockRatioProperty, 1589 this=self._parse_number(), 1590 percent=self._match(TokenType.PERCENT), 1591 ) 1592 1593 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1594 1595 def _parse_datablocksize( 1596 self, 1597 default: t.Optional[bool] = None, 1598 minimum: t.Optional[bool] = None, 1599 maximum: t.Optional[bool] = None, 1600 ) -> exp.DataBlocksizeProperty: 1601 self._match(TokenType.EQ) 1602 size = self._parse_number() 1603 1604 units = None 1605 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1606 units = self._prev.text 1607 1608 return self.expression( 1609 exp.DataBlocksizeProperty, 1610 size=size, 1611 units=units, 1612 default=default, 1613 minimum=minimum, 1614 maximum=maximum, 1615 ) 1616 1617 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1618 self._match(TokenType.EQ) 1619 always = self._match_text_seq("ALWAYS") 1620 manual = self._match_text_seq("MANUAL") 1621 never = self._match_text_seq("NEVER") 1622 default = self._match_text_seq("DEFAULT") 1623 1624 autotemp = None 1625 if self._match_text_seq("AUTOTEMP"): 1626 autotemp = self._parse_schema() 1627 1628 return self.expression( 1629 exp.BlockCompressionProperty, 1630 always=always, 1631 manual=manual, 1632 never=never, 1633 default=default, 1634 autotemp=autotemp, 1635 ) 1636 1637 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1638 no = self._match_text_seq("NO") 1639 concurrent = self._match_text_seq("CONCURRENT") 1640 self._match_text_seq("ISOLATED", "LOADING") 1641 for_all = self._match_text_seq("FOR", "ALL") 1642 for_insert = self._match_text_seq("FOR", "INSERT") 1643 for_none = self._match_text_seq("FOR", "NONE") 1644 return self.expression( 1645 exp.IsolatedLoadingProperty, 1646 no=no, 1647 concurrent=concurrent, 1648 for_all=for_all, 1649 for_insert=for_insert, 1650 for_none=for_none, 1651 ) 1652 1653 def _parse_locking(self) -> exp.LockingProperty: 1654 if self._match(TokenType.TABLE): 1655 kind = "TABLE" 1656 elif self._match(TokenType.VIEW): 1657 kind = "VIEW" 1658 elif self._match(TokenType.ROW): 1659 kind = "ROW" 1660 elif self._match_text_seq("DATABASE"): 1661 kind = "DATABASE" 1662 else: 1663 kind = None 1664 1665 if kind in ("DATABASE", "TABLE", "VIEW"): 1666 this = self._parse_table_parts() 1667 else: 1668 this = None 1669 1670 if self._match(TokenType.FOR): 1671 for_or_in = "FOR" 1672 elif self._match(TokenType.IN): 1673 for_or_in = "IN" 1674 else: 1675 for_or_in = None 1676 1677 if self._match_text_seq("ACCESS"): 1678 lock_type = "ACCESS" 1679 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1680 lock_type = "EXCLUSIVE" 1681 elif self._match_text_seq("SHARE"): 1682 lock_type = "SHARE" 1683 elif self._match_text_seq("READ"): 1684 lock_type = "READ" 1685 elif self._match_text_seq("WRITE"): 1686 lock_type = "WRITE" 1687 elif self._match_text_seq("CHECKSUM"): 1688 lock_type = "CHECKSUM" 1689 else: 1690 lock_type = None 1691 1692 override = self._match_text_seq("OVERRIDE") 1693 1694 return self.expression( 1695 exp.LockingProperty, 1696 this=this, 1697 kind=kind, 1698 for_or_in=for_or_in, 1699 lock_type=lock_type, 1700 override=override, 1701 ) 1702 1703 def _parse_partition_by(self) -> t.List[exp.Expression]: 1704 if self._match(TokenType.PARTITION_BY): 1705 return self._parse_csv(self._parse_conjunction) 1706 return [] 1707 1708 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1709 self._match(TokenType.EQ) 1710 return self.expression( 1711 exp.PartitionedByProperty, 1712 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1713 ) 1714 1715 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1716 if self._match_text_seq("AND", "STATISTICS"): 1717 statistics = True 1718 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1719 statistics = False 1720 else: 1721 statistics = None 1722 1723 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1724 1725 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1726 if self._match_text_seq("PRIMARY", "INDEX"): 1727 return exp.NoPrimaryIndexProperty() 1728 return None 1729 1730 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1731 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1732 return exp.OnCommitProperty() 1733 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1734 return exp.OnCommitProperty(delete=True) 1735 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1736 1737 def _parse_distkey(self) -> exp.DistKeyProperty: 1738 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1739 1740 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1741 table = self._parse_table(schema=True) 1742 1743 options = [] 1744 while self._match_texts(("INCLUDING", "EXCLUDING")): 1745 this = self._prev.text.upper() 1746 1747 id_var = self._parse_id_var() 1748 if not id_var: 1749 return None 1750 1751 options.append( 1752 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1753 ) 1754 1755 return self.expression(exp.LikeProperty, this=table, expressions=options) 1756 1757 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1758 return self.expression( 1759 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1760 ) 1761 1762 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1763 self._match(TokenType.EQ) 1764 return self.expression( 1765 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1766 ) 1767 1768 def _parse_returns(self) -> exp.ReturnsProperty: 1769 value: t.Optional[exp.Expression] 1770 is_table = self._match(TokenType.TABLE) 1771 1772 if is_table: 1773 if self._match(TokenType.LT): 1774 value = self.expression( 1775 exp.Schema, 1776 this="TABLE", 1777 expressions=self._parse_csv(self._parse_struct_types), 1778 ) 1779 if not self._match(TokenType.GT): 1780 self.raise_error("Expecting >") 1781 else: 1782 value = self._parse_schema(exp.var("TABLE")) 1783 else: 1784 value = self._parse_types() 1785 1786 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1787 1788 def _parse_describe(self) -> exp.Describe: 1789 kind = self._match_set(self.CREATABLES) and self._prev.text 1790 this = self._parse_table(schema=True) 1791 properties = self._parse_properties() 1792 expressions = properties.expressions if properties else None 1793 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1794 1795 def _parse_insert(self) -> exp.Insert: 1796 comments = ensure_list(self._prev_comments) 1797 overwrite = self._match(TokenType.OVERWRITE) 1798 ignore = self._match(TokenType.IGNORE) 1799 local = self._match_text_seq("LOCAL") 1800 alternative = None 1801 1802 if self._match_text_seq("DIRECTORY"): 1803 this: t.Optional[exp.Expression] = self.expression( 1804 exp.Directory, 1805 this=self._parse_var_or_string(), 1806 local=local, 1807 row_format=self._parse_row_format(match_row=True), 1808 ) 1809 else: 1810 if self._match(TokenType.OR): 1811 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1812 1813 self._match(TokenType.INTO) 1814 comments += ensure_list(self._prev_comments) 1815 self._match(TokenType.TABLE) 1816 this = self._parse_table(schema=True) 1817 1818 returning = self._parse_returning() 1819 1820 return self.expression( 1821 exp.Insert, 1822 comments=comments, 1823 this=this, 1824 by_name=self._match_text_seq("BY", "NAME"), 1825 exists=self._parse_exists(), 1826 partition=self._parse_partition(), 1827 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1828 and self._parse_conjunction(), 1829 expression=self._parse_ddl_select(), 1830 conflict=self._parse_on_conflict(), 1831 returning=returning or self._parse_returning(), 1832 overwrite=overwrite, 1833 alternative=alternative, 1834 ignore=ignore, 1835 ) 1836 1837 def _parse_kill(self) -> exp.Kill: 1838 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1839 1840 return self.expression( 1841 exp.Kill, 1842 this=self._parse_primary(), 1843 kind=kind, 1844 ) 1845 1846 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1847 conflict = self._match_text_seq("ON", "CONFLICT") 1848 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1849 1850 if not conflict and not duplicate: 1851 return None 1852 1853 nothing = None 1854 expressions = None 1855 key = None 1856 constraint = None 1857 1858 if conflict: 1859 if self._match_text_seq("ON", "CONSTRAINT"): 1860 constraint = self._parse_id_var() 1861 else: 1862 key = self._parse_csv(self._parse_value) 1863 1864 self._match_text_seq("DO") 1865 if self._match_text_seq("NOTHING"): 1866 nothing = True 1867 else: 1868 self._match(TokenType.UPDATE) 1869 self._match(TokenType.SET) 1870 expressions = self._parse_csv(self._parse_equality) 1871 1872 return self.expression( 1873 exp.OnConflict, 1874 duplicate=duplicate, 1875 expressions=expressions, 1876 nothing=nothing, 1877 key=key, 1878 constraint=constraint, 1879 ) 1880 1881 def _parse_returning(self) -> t.Optional[exp.Returning]: 1882 if not self._match(TokenType.RETURNING): 1883 return None 1884 return self.expression( 1885 exp.Returning, 1886 expressions=self._parse_csv(self._parse_expression), 1887 into=self._match(TokenType.INTO) and self._parse_table_part(), 1888 ) 1889 1890 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1891 if not self._match(TokenType.FORMAT): 1892 return None 1893 return self._parse_row_format() 1894 1895 def _parse_row_format( 1896 self, match_row: bool = False 1897 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1898 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1899 return None 1900 1901 if self._match_text_seq("SERDE"): 1902 this = self._parse_string() 1903 1904 serde_properties = None 1905 if self._match(TokenType.SERDE_PROPERTIES): 1906 serde_properties = self.expression( 1907 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1908 ) 1909 1910 return self.expression( 1911 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1912 ) 1913 1914 self._match_text_seq("DELIMITED") 1915 1916 kwargs = {} 1917 1918 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1919 kwargs["fields"] = self._parse_string() 1920 if self._match_text_seq("ESCAPED", "BY"): 1921 kwargs["escaped"] = self._parse_string() 1922 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1923 kwargs["collection_items"] = self._parse_string() 1924 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1925 kwargs["map_keys"] = self._parse_string() 1926 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1927 kwargs["lines"] = self._parse_string() 1928 if self._match_text_seq("NULL", "DEFINED", "AS"): 1929 kwargs["null"] = self._parse_string() 1930 1931 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1932 1933 def _parse_load(self) -> exp.LoadData | exp.Command: 1934 if self._match_text_seq("DATA"): 1935 local = self._match_text_seq("LOCAL") 1936 self._match_text_seq("INPATH") 1937 inpath = self._parse_string() 1938 overwrite = self._match(TokenType.OVERWRITE) 1939 self._match_pair(TokenType.INTO, TokenType.TABLE) 1940 1941 return self.expression( 1942 exp.LoadData, 1943 this=self._parse_table(schema=True), 1944 local=local, 1945 overwrite=overwrite, 1946 inpath=inpath, 1947 partition=self._parse_partition(), 1948 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1949 serde=self._match_text_seq("SERDE") and self._parse_string(), 1950 ) 1951 return self._parse_as_command(self._prev) 1952 1953 def _parse_delete(self) -> exp.Delete: 1954 # This handles MySQL's "Multiple-Table Syntax" 1955 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1956 tables = None 1957 comments = self._prev_comments 1958 if not self._match(TokenType.FROM, advance=False): 1959 tables = self._parse_csv(self._parse_table) or None 1960 1961 returning = self._parse_returning() 1962 1963 return self.expression( 1964 exp.Delete, 1965 comments=comments, 1966 tables=tables, 1967 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1968 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1969 where=self._parse_where(), 1970 returning=returning or self._parse_returning(), 1971 limit=self._parse_limit(), 1972 ) 1973 1974 def _parse_update(self) -> exp.Update: 1975 comments = self._prev_comments 1976 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1977 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1978 returning = self._parse_returning() 1979 return self.expression( 1980 exp.Update, 1981 comments=comments, 1982 **{ # type: ignore 1983 "this": this, 1984 "expressions": expressions, 1985 "from": self._parse_from(joins=True), 1986 "where": self._parse_where(), 1987 "returning": returning or self._parse_returning(), 1988 "order": self._parse_order(), 1989 "limit": self._parse_limit(), 1990 }, 1991 ) 1992 1993 def _parse_uncache(self) -> exp.Uncache: 1994 if not self._match(TokenType.TABLE): 1995 self.raise_error("Expecting TABLE after UNCACHE") 1996 1997 return self.expression( 1998 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1999 ) 2000 2001 def _parse_cache(self) -> exp.Cache: 2002 lazy = self._match_text_seq("LAZY") 2003 self._match(TokenType.TABLE) 2004 table = self._parse_table(schema=True) 2005 2006 options = [] 2007 if self._match_text_seq("OPTIONS"): 2008 self._match_l_paren() 2009 k = self._parse_string() 2010 self._match(TokenType.EQ) 2011 v = self._parse_string() 2012 options = [k, v] 2013 self._match_r_paren() 2014 2015 self._match(TokenType.ALIAS) 2016 return self.expression( 2017 exp.Cache, 2018 this=table, 2019 lazy=lazy, 2020 options=options, 2021 expression=self._parse_select(nested=True), 2022 ) 2023 2024 def _parse_partition(self) -> t.Optional[exp.Partition]: 2025 if not self._match(TokenType.PARTITION): 2026 return None 2027 2028 return self.expression( 2029 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2030 ) 2031 2032 def _parse_value(self) -> exp.Tuple: 2033 if self._match(TokenType.L_PAREN): 2034 expressions = self._parse_csv(self._parse_conjunction) 2035 self._match_r_paren() 2036 return self.expression(exp.Tuple, expressions=expressions) 2037 2038 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2039 # https://prestodb.io/docs/current/sql/values.html 2040 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2041 2042 def _parse_projections(self) -> t.List[exp.Expression]: 2043 return self._parse_expressions() 2044 2045 def _parse_select( 2046 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2047 ) -> t.Optional[exp.Expression]: 2048 cte = self._parse_with() 2049 2050 if cte: 2051 this = self._parse_statement() 2052 2053 if not this: 2054 self.raise_error("Failed to parse any statement following CTE") 2055 return cte 2056 2057 if "with" in this.arg_types: 2058 this.set("with", cte) 2059 else: 2060 self.raise_error(f"{this.key} does not support CTE") 2061 this = cte 2062 2063 return this 2064 2065 # duckdb supports leading with FROM x 2066 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2067 2068 if self._match(TokenType.SELECT): 2069 comments = self._prev_comments 2070 2071 hint = self._parse_hint() 2072 all_ = self._match(TokenType.ALL) 2073 distinct = self._match_set(self.DISTINCT_TOKENS) 2074 2075 kind = ( 2076 self._match(TokenType.ALIAS) 2077 and self._match_texts(("STRUCT", "VALUE")) 2078 and self._prev.text 2079 ) 2080 2081 if distinct: 2082 distinct = self.expression( 2083 exp.Distinct, 2084 on=self._parse_value() if self._match(TokenType.ON) else None, 2085 ) 2086 2087 if all_ and distinct: 2088 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2089 2090 limit = self._parse_limit(top=True) 2091 projections = self._parse_projections() 2092 2093 this = self.expression( 2094 exp.Select, 2095 kind=kind, 2096 hint=hint, 2097 distinct=distinct, 2098 expressions=projections, 2099 limit=limit, 2100 ) 2101 this.comments = comments 2102 2103 into = self._parse_into() 2104 if into: 2105 this.set("into", into) 2106 2107 if not from_: 2108 from_ = self._parse_from() 2109 2110 if from_: 2111 this.set("from", from_) 2112 2113 this = self._parse_query_modifiers(this) 2114 elif (table or nested) and self._match(TokenType.L_PAREN): 2115 if self._match(TokenType.PIVOT): 2116 this = self._parse_simplified_pivot() 2117 elif self._match(TokenType.FROM): 2118 this = exp.select("*").from_( 2119 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2120 ) 2121 else: 2122 this = self._parse_table() if table else self._parse_select(nested=True) 2123 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2124 2125 self._match_r_paren() 2126 2127 # We return early here so that the UNION isn't attached to the subquery by the 2128 # following call to _parse_set_operations, but instead becomes the parent node 2129 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2130 elif self._match(TokenType.VALUES): 2131 this = self.expression( 2132 exp.Values, 2133 expressions=self._parse_csv(self._parse_value), 2134 alias=self._parse_table_alias(), 2135 ) 2136 elif from_: 2137 this = exp.select("*").from_(from_.this, copy=False) 2138 else: 2139 this = None 2140 2141 return self._parse_set_operations(this) 2142 2143 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2144 if not skip_with_token and not self._match(TokenType.WITH): 2145 return None 2146 2147 comments = self._prev_comments 2148 recursive = self._match(TokenType.RECURSIVE) 2149 2150 expressions = [] 2151 while True: 2152 expressions.append(self._parse_cte()) 2153 2154 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2155 break 2156 else: 2157 self._match(TokenType.WITH) 2158 2159 return self.expression( 2160 exp.With, comments=comments, expressions=expressions, recursive=recursive 2161 ) 2162 2163 def _parse_cte(self) -> exp.CTE: 2164 alias = self._parse_table_alias() 2165 if not alias or not alias.this: 2166 self.raise_error("Expected CTE to have alias") 2167 2168 self._match(TokenType.ALIAS) 2169 return self.expression( 2170 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2171 ) 2172 2173 def _parse_table_alias( 2174 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2175 ) -> t.Optional[exp.TableAlias]: 2176 any_token = self._match(TokenType.ALIAS) 2177 alias = ( 2178 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2179 or self._parse_string_as_identifier() 2180 ) 2181 2182 index = self._index 2183 if self._match(TokenType.L_PAREN): 2184 columns = self._parse_csv(self._parse_function_parameter) 2185 self._match_r_paren() if columns else self._retreat(index) 2186 else: 2187 columns = None 2188 2189 if not alias and not columns: 2190 return None 2191 2192 return self.expression(exp.TableAlias, this=alias, columns=columns) 2193 2194 def _parse_subquery( 2195 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2196 ) -> t.Optional[exp.Subquery]: 2197 if not this: 2198 return None 2199 2200 return self.expression( 2201 exp.Subquery, 2202 this=this, 2203 pivots=self._parse_pivots(), 2204 alias=self._parse_table_alias() if parse_alias else None, 2205 ) 2206 2207 def _parse_query_modifiers( 2208 self, this: t.Optional[exp.Expression] 2209 ) -> t.Optional[exp.Expression]: 2210 if isinstance(this, self.MODIFIABLES): 2211 for join in iter(self._parse_join, None): 2212 this.append("joins", join) 2213 for lateral in iter(self._parse_lateral, None): 2214 this.append("laterals", lateral) 2215 2216 while True: 2217 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2218 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2219 key, expression = parser(self) 2220 2221 if expression: 2222 this.set(key, expression) 2223 if key == "limit": 2224 offset = expression.args.pop("offset", None) 2225 if offset: 2226 this.set("offset", exp.Offset(expression=offset)) 2227 continue 2228 break 2229 return this 2230 2231 def _parse_hint(self) -> t.Optional[exp.Hint]: 2232 if self._match(TokenType.HINT): 2233 hints = [] 2234 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2235 hints.extend(hint) 2236 2237 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2238 self.raise_error("Expected */ after HINT") 2239 2240 return self.expression(exp.Hint, expressions=hints) 2241 2242 return None 2243 2244 def _parse_into(self) -> t.Optional[exp.Into]: 2245 if not self._match(TokenType.INTO): 2246 return None 2247 2248 temp = self._match(TokenType.TEMPORARY) 2249 unlogged = self._match_text_seq("UNLOGGED") 2250 self._match(TokenType.TABLE) 2251 2252 return self.expression( 2253 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2254 ) 2255 2256 def _parse_from( 2257 self, joins: bool = False, skip_from_token: bool = False 2258 ) -> t.Optional[exp.From]: 2259 if not skip_from_token and not self._match(TokenType.FROM): 2260 return None 2261 2262 return self.expression( 2263 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2264 ) 2265 2266 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2267 if not self._match(TokenType.MATCH_RECOGNIZE): 2268 return None 2269 2270 self._match_l_paren() 2271 2272 partition = self._parse_partition_by() 2273 order = self._parse_order() 2274 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2275 2276 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2277 rows = exp.var("ONE ROW PER MATCH") 2278 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2279 text = "ALL ROWS PER MATCH" 2280 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2281 text += f" SHOW EMPTY MATCHES" 2282 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2283 text += f" OMIT EMPTY MATCHES" 2284 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2285 text += f" WITH UNMATCHED ROWS" 2286 rows = exp.var(text) 2287 else: 2288 rows = None 2289 2290 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2291 text = "AFTER MATCH SKIP" 2292 if self._match_text_seq("PAST", "LAST", "ROW"): 2293 text += f" PAST LAST ROW" 2294 elif self._match_text_seq("TO", "NEXT", "ROW"): 2295 text += f" TO NEXT ROW" 2296 elif self._match_text_seq("TO", "FIRST"): 2297 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2298 elif self._match_text_seq("TO", "LAST"): 2299 text += f" TO LAST {self._advance_any().text}" # type: ignore 2300 after = exp.var(text) 2301 else: 2302 after = None 2303 2304 if self._match_text_seq("PATTERN"): 2305 self._match_l_paren() 2306 2307 if not self._curr: 2308 self.raise_error("Expecting )", self._curr) 2309 2310 paren = 1 2311 start = self._curr 2312 2313 while self._curr and paren > 0: 2314 if self._curr.token_type == TokenType.L_PAREN: 2315 paren += 1 2316 if self._curr.token_type == TokenType.R_PAREN: 2317 paren -= 1 2318 2319 end = self._prev 2320 self._advance() 2321 2322 if paren > 0: 2323 self.raise_error("Expecting )", self._curr) 2324 2325 pattern = exp.var(self._find_sql(start, end)) 2326 else: 2327 pattern = None 2328 2329 define = ( 2330 self._parse_csv( 2331 lambda: self.expression( 2332 exp.Alias, 2333 alias=self._parse_id_var(any_token=True), 2334 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2335 ) 2336 ) 2337 if self._match_text_seq("DEFINE") 2338 else None 2339 ) 2340 2341 self._match_r_paren() 2342 2343 return self.expression( 2344 exp.MatchRecognize, 2345 partition_by=partition, 2346 order=order, 2347 measures=measures, 2348 rows=rows, 2349 after=after, 2350 pattern=pattern, 2351 define=define, 2352 alias=self._parse_table_alias(), 2353 ) 2354 2355 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2356 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2357 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2358 2359 if outer_apply or cross_apply: 2360 this = self._parse_select(table=True) 2361 view = None 2362 outer = not cross_apply 2363 elif self._match(TokenType.LATERAL): 2364 this = self._parse_select(table=True) 2365 view = self._match(TokenType.VIEW) 2366 outer = self._match(TokenType.OUTER) 2367 else: 2368 return None 2369 2370 if not this: 2371 this = ( 2372 self._parse_unnest() 2373 or self._parse_function() 2374 or self._parse_id_var(any_token=False) 2375 ) 2376 2377 while self._match(TokenType.DOT): 2378 this = exp.Dot( 2379 this=this, 2380 expression=self._parse_function() or self._parse_id_var(any_token=False), 2381 ) 2382 2383 if view: 2384 table = self._parse_id_var(any_token=False) 2385 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2386 table_alias: t.Optional[exp.TableAlias] = self.expression( 2387 exp.TableAlias, this=table, columns=columns 2388 ) 2389 elif isinstance(this, exp.Subquery) and this.alias: 2390 # Ensures parity between the Subquery's and the Lateral's "alias" args 2391 table_alias = this.args["alias"].copy() 2392 else: 2393 table_alias = self._parse_table_alias() 2394 2395 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2396 2397 def _parse_join_parts( 2398 self, 2399 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2400 return ( 2401 self._match_set(self.JOIN_METHODS) and self._prev, 2402 self._match_set(self.JOIN_SIDES) and self._prev, 2403 self._match_set(self.JOIN_KINDS) and self._prev, 2404 ) 2405 2406 def _parse_join( 2407 self, skip_join_token: bool = False, parse_bracket: bool = False 2408 ) -> t.Optional[exp.Join]: 2409 if self._match(TokenType.COMMA): 2410 return self.expression(exp.Join, this=self._parse_table()) 2411 2412 index = self._index 2413 method, side, kind = self._parse_join_parts() 2414 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2415 join = self._match(TokenType.JOIN) 2416 2417 if not skip_join_token and not join: 2418 self._retreat(index) 2419 kind = None 2420 method = None 2421 side = None 2422 2423 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2424 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2425 2426 if not skip_join_token and not join and not outer_apply and not cross_apply: 2427 return None 2428 2429 if outer_apply: 2430 side = Token(TokenType.LEFT, "LEFT") 2431 2432 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2433 2434 if method: 2435 kwargs["method"] = method.text 2436 if side: 2437 kwargs["side"] = side.text 2438 if kind: 2439 kwargs["kind"] = kind.text 2440 if hint: 2441 kwargs["hint"] = hint 2442 2443 if self._match(TokenType.ON): 2444 kwargs["on"] = self._parse_conjunction() 2445 elif self._match(TokenType.USING): 2446 kwargs["using"] = self._parse_wrapped_id_vars() 2447 elif not (kind and kind.token_type == TokenType.CROSS): 2448 index = self._index 2449 joins = self._parse_joins() 2450 2451 if joins and self._match(TokenType.ON): 2452 kwargs["on"] = self._parse_conjunction() 2453 elif joins and self._match(TokenType.USING): 2454 kwargs["using"] = self._parse_wrapped_id_vars() 2455 else: 2456 joins = None 2457 self._retreat(index) 2458 2459 kwargs["this"].set("joins", joins) 2460 2461 comments = [c for token in (method, side, kind) if token for c in token.comments] 2462 return self.expression(exp.Join, comments=comments, **kwargs) 2463 2464 def _parse_index( 2465 self, 2466 index: t.Optional[exp.Expression] = None, 2467 ) -> t.Optional[exp.Index]: 2468 if index: 2469 unique = None 2470 primary = None 2471 amp = None 2472 2473 self._match(TokenType.ON) 2474 self._match(TokenType.TABLE) # hive 2475 table = self._parse_table_parts(schema=True) 2476 else: 2477 unique = self._match(TokenType.UNIQUE) 2478 primary = self._match_text_seq("PRIMARY") 2479 amp = self._match_text_seq("AMP") 2480 2481 if not self._match(TokenType.INDEX): 2482 return None 2483 2484 index = self._parse_id_var() 2485 table = None 2486 2487 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2488 2489 if self._match(TokenType.L_PAREN, advance=False): 2490 columns = self._parse_wrapped_csv(self._parse_ordered) 2491 else: 2492 columns = None 2493 2494 return self.expression( 2495 exp.Index, 2496 this=index, 2497 table=table, 2498 using=using, 2499 columns=columns, 2500 unique=unique, 2501 primary=primary, 2502 amp=amp, 2503 partition_by=self._parse_partition_by(), 2504 where=self._parse_where(), 2505 ) 2506 2507 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2508 hints: t.List[exp.Expression] = [] 2509 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2510 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2511 hints.append( 2512 self.expression( 2513 exp.WithTableHint, 2514 expressions=self._parse_csv( 2515 lambda: self._parse_function() or self._parse_var(any_token=True) 2516 ), 2517 ) 2518 ) 2519 self._match_r_paren() 2520 else: 2521 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2522 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2523 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2524 2525 self._match_texts({"INDEX", "KEY"}) 2526 if self._match(TokenType.FOR): 2527 hint.set("target", self._advance_any() and self._prev.text.upper()) 2528 2529 hint.set("expressions", self._parse_wrapped_id_vars()) 2530 hints.append(hint) 2531 2532 return hints or None 2533 2534 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2535 return ( 2536 (not schema and self._parse_function(optional_parens=False)) 2537 or self._parse_id_var(any_token=False) 2538 or self._parse_string_as_identifier() 2539 or self._parse_placeholder() 2540 ) 2541 2542 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2543 catalog = None 2544 db = None 2545 table = self._parse_table_part(schema=schema) 2546 2547 while self._match(TokenType.DOT): 2548 if catalog: 2549 # This allows nesting the table in arbitrarily many dot expressions if needed 2550 table = self.expression( 2551 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2552 ) 2553 else: 2554 catalog = db 2555 db = table 2556 table = self._parse_table_part(schema=schema) 2557 2558 if not table: 2559 self.raise_error(f"Expected table name but got {self._curr}") 2560 2561 return self.expression( 2562 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2563 ) 2564 2565 def _parse_table( 2566 self, 2567 schema: bool = False, 2568 joins: bool = False, 2569 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2570 parse_bracket: bool = False, 2571 ) -> t.Optional[exp.Expression]: 2572 lateral = self._parse_lateral() 2573 if lateral: 2574 return lateral 2575 2576 unnest = self._parse_unnest() 2577 if unnest: 2578 return unnest 2579 2580 values = self._parse_derived_table_values() 2581 if values: 2582 return values 2583 2584 subquery = self._parse_select(table=True) 2585 if subquery: 2586 if not subquery.args.get("pivots"): 2587 subquery.set("pivots", self._parse_pivots()) 2588 return subquery 2589 2590 bracket = parse_bracket and self._parse_bracket(None) 2591 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2592 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2593 2594 if schema: 2595 return self._parse_schema(this=this) 2596 2597 version = self._parse_version() 2598 2599 if version: 2600 this.set("version", version) 2601 2602 if self.ALIAS_POST_TABLESAMPLE: 2603 table_sample = self._parse_table_sample() 2604 2605 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2606 if alias: 2607 this.set("alias", alias) 2608 2609 this.set("hints", self._parse_table_hints()) 2610 2611 if not this.args.get("pivots"): 2612 this.set("pivots", self._parse_pivots()) 2613 2614 if not self.ALIAS_POST_TABLESAMPLE: 2615 table_sample = self._parse_table_sample() 2616 2617 if table_sample: 2618 table_sample.set("this", this) 2619 this = table_sample 2620 2621 if joins: 2622 for join in iter(self._parse_join, None): 2623 this.append("joins", join) 2624 2625 return this 2626 2627 def _parse_version(self) -> t.Optional[exp.Version]: 2628 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2629 this = "TIMESTAMP" 2630 elif self._match(TokenType.VERSION_SNAPSHOT): 2631 this = "VERSION" 2632 else: 2633 return None 2634 2635 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2636 kind = self._prev.text.upper() 2637 start = self._parse_bitwise() 2638 self._match_texts(("TO", "AND")) 2639 end = self._parse_bitwise() 2640 expression: t.Optional[exp.Expression] = self.expression( 2641 exp.Tuple, expressions=[start, end] 2642 ) 2643 elif self._match_text_seq("CONTAINED", "IN"): 2644 kind = "CONTAINED IN" 2645 expression = self.expression( 2646 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2647 ) 2648 elif self._match(TokenType.ALL): 2649 kind = "ALL" 2650 expression = None 2651 else: 2652 self._match_text_seq("AS", "OF") 2653 kind = "AS OF" 2654 expression = self._parse_type() 2655 2656 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2657 2658 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2659 if not self._match(TokenType.UNNEST): 2660 return None 2661 2662 expressions = self._parse_wrapped_csv(self._parse_type) 2663 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2664 2665 alias = self._parse_table_alias() if with_alias else None 2666 2667 if alias: 2668 if self.UNNEST_COLUMN_ONLY: 2669 if alias.args.get("columns"): 2670 self.raise_error("Unexpected extra column alias in unnest.") 2671 2672 alias.set("columns", [alias.this]) 2673 alias.set("this", None) 2674 2675 columns = alias.args.get("columns") or [] 2676 if offset and len(expressions) < len(columns): 2677 offset = columns.pop() 2678 2679 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2680 self._match(TokenType.ALIAS) 2681 offset = self._parse_id_var() or exp.to_identifier("offset") 2682 2683 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2684 2685 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2686 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2687 if not is_derived and not self._match(TokenType.VALUES): 2688 return None 2689 2690 expressions = self._parse_csv(self._parse_value) 2691 alias = self._parse_table_alias() 2692 2693 if is_derived: 2694 self._match_r_paren() 2695 2696 return self.expression( 2697 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2698 ) 2699 2700 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2701 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2702 as_modifier and self._match_text_seq("USING", "SAMPLE") 2703 ): 2704 return None 2705 2706 bucket_numerator = None 2707 bucket_denominator = None 2708 bucket_field = None 2709 percent = None 2710 rows = None 2711 size = None 2712 seed = None 2713 2714 kind = ( 2715 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2716 ) 2717 method = self._parse_var(tokens=(TokenType.ROW,)) 2718 2719 self._match(TokenType.L_PAREN) 2720 2721 if self.TABLESAMPLE_CSV: 2722 num = None 2723 expressions = self._parse_csv(self._parse_primary) 2724 else: 2725 expressions = None 2726 num = self._parse_primary() 2727 2728 if self._match_text_seq("BUCKET"): 2729 bucket_numerator = self._parse_number() 2730 self._match_text_seq("OUT", "OF") 2731 bucket_denominator = bucket_denominator = self._parse_number() 2732 self._match(TokenType.ON) 2733 bucket_field = self._parse_field() 2734 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2735 percent = num 2736 elif self._match(TokenType.ROWS): 2737 rows = num 2738 elif num: 2739 size = num 2740 2741 self._match(TokenType.R_PAREN) 2742 2743 if self._match(TokenType.L_PAREN): 2744 method = self._parse_var() 2745 seed = self._match(TokenType.COMMA) and self._parse_number() 2746 self._match_r_paren() 2747 elif self._match_texts(("SEED", "REPEATABLE")): 2748 seed = self._parse_wrapped(self._parse_number) 2749 2750 return self.expression( 2751 exp.TableSample, 2752 expressions=expressions, 2753 method=method, 2754 bucket_numerator=bucket_numerator, 2755 bucket_denominator=bucket_denominator, 2756 bucket_field=bucket_field, 2757 percent=percent, 2758 rows=rows, 2759 size=size, 2760 seed=seed, 2761 kind=kind, 2762 ) 2763 2764 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2765 return list(iter(self._parse_pivot, None)) or None 2766 2767 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2768 return list(iter(self._parse_join, None)) or None 2769 2770 # https://duckdb.org/docs/sql/statements/pivot 2771 def _parse_simplified_pivot(self) -> exp.Pivot: 2772 def _parse_on() -> t.Optional[exp.Expression]: 2773 this = self._parse_bitwise() 2774 return self._parse_in(this) if self._match(TokenType.IN) else this 2775 2776 this = self._parse_table() 2777 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2778 using = self._match(TokenType.USING) and self._parse_csv( 2779 lambda: self._parse_alias(self._parse_function()) 2780 ) 2781 group = self._parse_group() 2782 return self.expression( 2783 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2784 ) 2785 2786 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2787 index = self._index 2788 include_nulls = None 2789 2790 if self._match(TokenType.PIVOT): 2791 unpivot = False 2792 elif self._match(TokenType.UNPIVOT): 2793 unpivot = True 2794 2795 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2796 if self._match_text_seq("INCLUDE", "NULLS"): 2797 include_nulls = True 2798 elif self._match_text_seq("EXCLUDE", "NULLS"): 2799 include_nulls = False 2800 else: 2801 return None 2802 2803 expressions = [] 2804 field = None 2805 2806 if not self._match(TokenType.L_PAREN): 2807 self._retreat(index) 2808 return None 2809 2810 if unpivot: 2811 expressions = self._parse_csv(self._parse_column) 2812 else: 2813 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2814 2815 if not expressions: 2816 self.raise_error("Failed to parse PIVOT's aggregation list") 2817 2818 if not self._match(TokenType.FOR): 2819 self.raise_error("Expecting FOR") 2820 2821 value = self._parse_column() 2822 2823 if not self._match(TokenType.IN): 2824 self.raise_error("Expecting IN") 2825 2826 field = self._parse_in(value, alias=True) 2827 2828 self._match_r_paren() 2829 2830 pivot = self.expression( 2831 exp.Pivot, 2832 expressions=expressions, 2833 field=field, 2834 unpivot=unpivot, 2835 include_nulls=include_nulls, 2836 ) 2837 2838 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2839 pivot.set("alias", self._parse_table_alias()) 2840 2841 if not unpivot: 2842 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2843 2844 columns: t.List[exp.Expression] = [] 2845 for fld in pivot.args["field"].expressions: 2846 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2847 for name in names: 2848 if self.PREFIXED_PIVOT_COLUMNS: 2849 name = f"{name}_{field_name}" if name else field_name 2850 else: 2851 name = f"{field_name}_{name}" if name else field_name 2852 2853 columns.append(exp.to_identifier(name)) 2854 2855 pivot.set("columns", columns) 2856 2857 return pivot 2858 2859 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2860 return [agg.alias for agg in aggregations] 2861 2862 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2863 if not skip_where_token and not self._match(TokenType.WHERE): 2864 return None 2865 2866 return self.expression( 2867 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2868 ) 2869 2870 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2871 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2872 return None 2873 2874 elements = defaultdict(list) 2875 2876 if self._match(TokenType.ALL): 2877 return self.expression(exp.Group, all=True) 2878 2879 while True: 2880 expressions = self._parse_csv(self._parse_conjunction) 2881 if expressions: 2882 elements["expressions"].extend(expressions) 2883 2884 grouping_sets = self._parse_grouping_sets() 2885 if grouping_sets: 2886 elements["grouping_sets"].extend(grouping_sets) 2887 2888 rollup = None 2889 cube = None 2890 totals = None 2891 2892 with_ = self._match(TokenType.WITH) 2893 if self._match(TokenType.ROLLUP): 2894 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2895 elements["rollup"].extend(ensure_list(rollup)) 2896 2897 if self._match(TokenType.CUBE): 2898 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2899 elements["cube"].extend(ensure_list(cube)) 2900 2901 if self._match_text_seq("TOTALS"): 2902 totals = True 2903 elements["totals"] = True # type: ignore 2904 2905 if not (grouping_sets or rollup or cube or totals): 2906 break 2907 2908 return self.expression(exp.Group, **elements) # type: ignore 2909 2910 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2911 if not self._match(TokenType.GROUPING_SETS): 2912 return None 2913 2914 return self._parse_wrapped_csv(self._parse_grouping_set) 2915 2916 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2917 if self._match(TokenType.L_PAREN): 2918 grouping_set = self._parse_csv(self._parse_column) 2919 self._match_r_paren() 2920 return self.expression(exp.Tuple, expressions=grouping_set) 2921 2922 return self._parse_column() 2923 2924 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2925 if not skip_having_token and not self._match(TokenType.HAVING): 2926 return None 2927 return self.expression(exp.Having, this=self._parse_conjunction()) 2928 2929 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2930 if not self._match(TokenType.QUALIFY): 2931 return None 2932 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2933 2934 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2935 if skip_start_token: 2936 start = None 2937 elif self._match(TokenType.START_WITH): 2938 start = self._parse_conjunction() 2939 else: 2940 return None 2941 2942 self._match(TokenType.CONNECT_BY) 2943 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2944 exp.Prior, this=self._parse_bitwise() 2945 ) 2946 connect = self._parse_conjunction() 2947 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2948 2949 if not start and self._match(TokenType.START_WITH): 2950 start = self._parse_conjunction() 2951 2952 return self.expression(exp.Connect, start=start, connect=connect) 2953 2954 def _parse_order( 2955 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2956 ) -> t.Optional[exp.Expression]: 2957 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2958 return this 2959 2960 return self.expression( 2961 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2962 ) 2963 2964 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2965 if not self._match(token): 2966 return None 2967 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2968 2969 def _parse_ordered(self) -> exp.Ordered: 2970 this = self._parse_conjunction() 2971 2972 asc = self._match(TokenType.ASC) 2973 desc = self._match(TokenType.DESC) or (asc and False) 2974 2975 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2976 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2977 2978 nulls_first = is_nulls_first or False 2979 explicitly_null_ordered = is_nulls_first or is_nulls_last 2980 2981 if ( 2982 not explicitly_null_ordered 2983 and ( 2984 (not desc and self.NULL_ORDERING == "nulls_are_small") 2985 or (desc and self.NULL_ORDERING != "nulls_are_small") 2986 ) 2987 and self.NULL_ORDERING != "nulls_are_last" 2988 ): 2989 nulls_first = True 2990 2991 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2992 2993 def _parse_limit( 2994 self, this: t.Optional[exp.Expression] = None, top: bool = False 2995 ) -> t.Optional[exp.Expression]: 2996 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2997 comments = self._prev_comments 2998 if top: 2999 limit_paren = self._match(TokenType.L_PAREN) 3000 expression = self._parse_number() 3001 3002 if limit_paren: 3003 self._match_r_paren() 3004 else: 3005 expression = self._parse_term() 3006 3007 if self._match(TokenType.COMMA): 3008 offset = expression 3009 expression = self._parse_term() 3010 else: 3011 offset = None 3012 3013 limit_exp = self.expression( 3014 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3015 ) 3016 3017 return limit_exp 3018 3019 if self._match(TokenType.FETCH): 3020 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3021 direction = self._prev.text if direction else "FIRST" 3022 3023 count = self._parse_field(tokens=self.FETCH_TOKENS) 3024 percent = self._match(TokenType.PERCENT) 3025 3026 self._match_set((TokenType.ROW, TokenType.ROWS)) 3027 3028 only = self._match_text_seq("ONLY") 3029 with_ties = self._match_text_seq("WITH", "TIES") 3030 3031 if only and with_ties: 3032 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3033 3034 return self.expression( 3035 exp.Fetch, 3036 direction=direction, 3037 count=count, 3038 percent=percent, 3039 with_ties=with_ties, 3040 ) 3041 3042 return this 3043 3044 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3045 if not self._match(TokenType.OFFSET): 3046 return this 3047 3048 count = self._parse_term() 3049 self._match_set((TokenType.ROW, TokenType.ROWS)) 3050 return self.expression(exp.Offset, this=this, expression=count) 3051 3052 def _parse_locks(self) -> t.List[exp.Lock]: 3053 locks = [] 3054 while True: 3055 if self._match_text_seq("FOR", "UPDATE"): 3056 update = True 3057 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3058 "LOCK", "IN", "SHARE", "MODE" 3059 ): 3060 update = False 3061 else: 3062 break 3063 3064 expressions = None 3065 if self._match_text_seq("OF"): 3066 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3067 3068 wait: t.Optional[bool | exp.Expression] = None 3069 if self._match_text_seq("NOWAIT"): 3070 wait = True 3071 elif self._match_text_seq("WAIT"): 3072 wait = self._parse_primary() 3073 elif self._match_text_seq("SKIP", "LOCKED"): 3074 wait = False 3075 3076 locks.append( 3077 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3078 ) 3079 3080 return locks 3081 3082 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3083 if not self._match_set(self.SET_OPERATIONS): 3084 return this 3085 3086 token_type = self._prev.token_type 3087 3088 if token_type == TokenType.UNION: 3089 expression = exp.Union 3090 elif token_type == TokenType.EXCEPT: 3091 expression = exp.Except 3092 else: 3093 expression = exp.Intersect 3094 3095 return self.expression( 3096 expression, 3097 this=this, 3098 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3099 by_name=self._match_text_seq("BY", "NAME"), 3100 expression=self._parse_set_operations(self._parse_select(nested=True)), 3101 ) 3102 3103 def _parse_expression(self) -> t.Optional[exp.Expression]: 3104 return self._parse_alias(self._parse_conjunction()) 3105 3106 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3107 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3108 3109 def _parse_equality(self) -> t.Optional[exp.Expression]: 3110 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3111 3112 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3113 return self._parse_tokens(self._parse_range, self.COMPARISON) 3114 3115 def _parse_range(self) -> t.Optional[exp.Expression]: 3116 this = self._parse_bitwise() 3117 negate = self._match(TokenType.NOT) 3118 3119 if self._match_set(self.RANGE_PARSERS): 3120 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3121 if not expression: 3122 return this 3123 3124 this = expression 3125 elif self._match(TokenType.ISNULL): 3126 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3127 3128 # Postgres supports ISNULL and NOTNULL for conditions. 3129 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3130 if self._match(TokenType.NOTNULL): 3131 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3132 this = self.expression(exp.Not, this=this) 3133 3134 if negate: 3135 this = self.expression(exp.Not, this=this) 3136 3137 if self._match(TokenType.IS): 3138 this = self._parse_is(this) 3139 3140 return this 3141 3142 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3143 index = self._index - 1 3144 negate = self._match(TokenType.NOT) 3145 3146 if self._match_text_seq("DISTINCT", "FROM"): 3147 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3148 return self.expression(klass, this=this, expression=self._parse_expression()) 3149 3150 expression = self._parse_null() or self._parse_boolean() 3151 if not expression: 3152 self._retreat(index) 3153 return None 3154 3155 this = self.expression(exp.Is, this=this, expression=expression) 3156 return self.expression(exp.Not, this=this) if negate else this 3157 3158 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3159 unnest = self._parse_unnest(with_alias=False) 3160 if unnest: 3161 this = self.expression(exp.In, this=this, unnest=unnest) 3162 elif self._match(TokenType.L_PAREN): 3163 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3164 3165 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3166 this = self.expression(exp.In, this=this, query=expressions[0]) 3167 else: 3168 this = self.expression(exp.In, this=this, expressions=expressions) 3169 3170 self._match_r_paren(this) 3171 else: 3172 this = self.expression(exp.In, this=this, field=self._parse_field()) 3173 3174 return this 3175 3176 def _parse_between(self, this: exp.Expression) -> exp.Between: 3177 low = self._parse_bitwise() 3178 self._match(TokenType.AND) 3179 high = self._parse_bitwise() 3180 return self.expression(exp.Between, this=this, low=low, high=high) 3181 3182 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3183 if not self._match(TokenType.ESCAPE): 3184 return this 3185 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3186 3187 def _parse_interval(self) -> t.Optional[exp.Interval]: 3188 index = self._index 3189 3190 if not self._match(TokenType.INTERVAL): 3191 return None 3192 3193 if self._match(TokenType.STRING, advance=False): 3194 this = self._parse_primary() 3195 else: 3196 this = self._parse_term() 3197 3198 if not this: 3199 self._retreat(index) 3200 return None 3201 3202 unit = self._parse_function() or self._parse_var(any_token=True) 3203 3204 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3205 # each INTERVAL expression into this canonical form so it's easy to transpile 3206 if this and this.is_number: 3207 this = exp.Literal.string(this.name) 3208 elif this and this.is_string: 3209 parts = this.name.split() 3210 3211 if len(parts) == 2: 3212 if unit: 3213 # This is not actually a unit, it's something else (e.g. a "window side") 3214 unit = None 3215 self._retreat(self._index - 1) 3216 3217 this = exp.Literal.string(parts[0]) 3218 unit = self.expression(exp.Var, this=parts[1]) 3219 3220 return self.expression(exp.Interval, this=this, unit=unit) 3221 3222 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3223 this = self._parse_term() 3224 3225 while True: 3226 if self._match_set(self.BITWISE): 3227 this = self.expression( 3228 self.BITWISE[self._prev.token_type], 3229 this=this, 3230 expression=self._parse_term(), 3231 ) 3232 elif self._match(TokenType.DQMARK): 3233 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3234 elif self._match_pair(TokenType.LT, TokenType.LT): 3235 this = self.expression( 3236 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3237 ) 3238 elif self._match_pair(TokenType.GT, TokenType.GT): 3239 this = self.expression( 3240 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3241 ) 3242 else: 3243 break 3244 3245 return this 3246 3247 def _parse_term(self) -> t.Optional[exp.Expression]: 3248 return self._parse_tokens(self._parse_factor, self.TERM) 3249 3250 def _parse_factor(self) -> t.Optional[exp.Expression]: 3251 return self._parse_tokens(self._parse_unary, self.FACTOR) 3252 3253 def _parse_unary(self) -> t.Optional[exp.Expression]: 3254 if self._match_set(self.UNARY_PARSERS): 3255 return self.UNARY_PARSERS[self._prev.token_type](self) 3256 return self._parse_at_time_zone(self._parse_type()) 3257 3258 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3259 interval = parse_interval and self._parse_interval() 3260 if interval: 3261 return interval 3262 3263 index = self._index 3264 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3265 this = self._parse_column() 3266 3267 if data_type: 3268 if isinstance(this, exp.Literal): 3269 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3270 if parser: 3271 return parser(self, this, data_type) 3272 return self.expression(exp.Cast, this=this, to=data_type) 3273 if not data_type.expressions: 3274 self._retreat(index) 3275 return self._parse_column() 3276 return self._parse_column_ops(data_type) 3277 3278 return this and self._parse_column_ops(this) 3279 3280 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3281 this = self._parse_type() 3282 if not this: 3283 return None 3284 3285 return self.expression( 3286 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3287 ) 3288 3289 def _parse_types( 3290 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3291 ) -> t.Optional[exp.Expression]: 3292 index = self._index 3293 3294 prefix = self._match_text_seq("SYSUDTLIB", ".") 3295 3296 if not self._match_set(self.TYPE_TOKENS): 3297 identifier = allow_identifiers and self._parse_id_var( 3298 any_token=False, tokens=(TokenType.VAR,) 3299 ) 3300 3301 if identifier: 3302 tokens = self._tokenizer.tokenize(identifier.name) 3303 3304 if len(tokens) != 1: 3305 self.raise_error("Unexpected identifier", self._prev) 3306 3307 if tokens[0].token_type in self.TYPE_TOKENS: 3308 self._prev = tokens[0] 3309 elif self.SUPPORTS_USER_DEFINED_TYPES: 3310 type_name = identifier.name 3311 3312 while self._match(TokenType.DOT): 3313 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3314 3315 return exp.DataType.build(type_name, udt=True) 3316 else: 3317 return None 3318 else: 3319 return None 3320 3321 type_token = self._prev.token_type 3322 3323 if type_token == TokenType.PSEUDO_TYPE: 3324 return self.expression(exp.PseudoType, this=self._prev.text) 3325 3326 if type_token == TokenType.OBJECT_IDENTIFIER: 3327 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3328 3329 nested = type_token in self.NESTED_TYPE_TOKENS 3330 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3331 expressions = None 3332 maybe_func = False 3333 3334 if self._match(TokenType.L_PAREN): 3335 if is_struct: 3336 expressions = self._parse_csv(self._parse_struct_types) 3337 elif nested: 3338 expressions = self._parse_csv( 3339 lambda: self._parse_types( 3340 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3341 ) 3342 ) 3343 elif type_token in self.ENUM_TYPE_TOKENS: 3344 expressions = self._parse_csv(self._parse_equality) 3345 else: 3346 expressions = self._parse_csv(self._parse_type_size) 3347 3348 if not expressions or not self._match(TokenType.R_PAREN): 3349 self._retreat(index) 3350 return None 3351 3352 maybe_func = True 3353 3354 this: t.Optional[exp.Expression] = None 3355 values: t.Optional[t.List[exp.Expression]] = None 3356 3357 if nested and self._match(TokenType.LT): 3358 if is_struct: 3359 expressions = self._parse_csv(self._parse_struct_types) 3360 else: 3361 expressions = self._parse_csv( 3362 lambda: self._parse_types( 3363 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3364 ) 3365 ) 3366 3367 if not self._match(TokenType.GT): 3368 self.raise_error("Expecting >") 3369 3370 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3371 values = self._parse_csv(self._parse_conjunction) 3372 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3373 3374 if type_token in self.TIMESTAMPS: 3375 if self._match_text_seq("WITH", "TIME", "ZONE"): 3376 maybe_func = False 3377 tz_type = ( 3378 exp.DataType.Type.TIMETZ 3379 if type_token in self.TIMES 3380 else exp.DataType.Type.TIMESTAMPTZ 3381 ) 3382 this = exp.DataType(this=tz_type, expressions=expressions) 3383 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3384 maybe_func = False 3385 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3386 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3387 maybe_func = False 3388 elif type_token == TokenType.INTERVAL: 3389 unit = self._parse_var() 3390 3391 if self._match_text_seq("TO"): 3392 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3393 else: 3394 span = None 3395 3396 if span or not unit: 3397 this = self.expression( 3398 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3399 ) 3400 else: 3401 this = self.expression(exp.Interval, unit=unit) 3402 3403 if maybe_func and check_func: 3404 index2 = self._index 3405 peek = self._parse_string() 3406 3407 if not peek: 3408 self._retreat(index) 3409 return None 3410 3411 self._retreat(index2) 3412 3413 if not this: 3414 if self._match_text_seq("UNSIGNED"): 3415 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3416 if not unsigned_type_token: 3417 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3418 3419 type_token = unsigned_type_token or type_token 3420 3421 this = exp.DataType( 3422 this=exp.DataType.Type[type_token.value], 3423 expressions=expressions, 3424 nested=nested, 3425 values=values, 3426 prefix=prefix, 3427 ) 3428 3429 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3430 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3431 3432 return this 3433 3434 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3435 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3436 self._match(TokenType.COLON) 3437 return self._parse_column_def(this) 3438 3439 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3440 if not self._match_text_seq("AT", "TIME", "ZONE"): 3441 return this 3442 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3443 3444 def _parse_column(self) -> t.Optional[exp.Expression]: 3445 this = self._parse_field() 3446 if isinstance(this, exp.Identifier): 3447 this = self.expression(exp.Column, this=this) 3448 elif not this: 3449 return self._parse_bracket(this) 3450 return self._parse_column_ops(this) 3451 3452 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3453 this = self._parse_bracket(this) 3454 3455 while self._match_set(self.COLUMN_OPERATORS): 3456 op_token = self._prev.token_type 3457 op = self.COLUMN_OPERATORS.get(op_token) 3458 3459 if op_token == TokenType.DCOLON: 3460 field = self._parse_types() 3461 if not field: 3462 self.raise_error("Expected type") 3463 elif op and self._curr: 3464 self._advance() 3465 value = self._prev.text 3466 field = ( 3467 exp.Literal.number(value) 3468 if self._prev.token_type == TokenType.NUMBER 3469 else exp.Literal.string(value) 3470 ) 3471 else: 3472 field = self._parse_field(anonymous_func=True, any_token=True) 3473 3474 if isinstance(field, exp.Func): 3475 # bigquery allows function calls like x.y.count(...) 3476 # SAFE.SUBSTR(...) 3477 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3478 this = self._replace_columns_with_dots(this) 3479 3480 if op: 3481 this = op(self, this, field) 3482 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3483 this = self.expression( 3484 exp.Column, 3485 this=field, 3486 table=this.this, 3487 db=this.args.get("table"), 3488 catalog=this.args.get("db"), 3489 ) 3490 else: 3491 this = self.expression(exp.Dot, this=this, expression=field) 3492 this = self._parse_bracket(this) 3493 return this 3494 3495 def _parse_primary(self) -> t.Optional[exp.Expression]: 3496 if self._match_set(self.PRIMARY_PARSERS): 3497 token_type = self._prev.token_type 3498 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3499 3500 if token_type == TokenType.STRING: 3501 expressions = [primary] 3502 while self._match(TokenType.STRING): 3503 expressions.append(exp.Literal.string(self._prev.text)) 3504 3505 if len(expressions) > 1: 3506 return self.expression(exp.Concat, expressions=expressions) 3507 3508 return primary 3509 3510 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3511 return exp.Literal.number(f"0.{self._prev.text}") 3512 3513 if self._match(TokenType.L_PAREN): 3514 comments = self._prev_comments 3515 query = self._parse_select() 3516 3517 if query: 3518 expressions = [query] 3519 else: 3520 expressions = self._parse_expressions() 3521 3522 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3523 3524 if isinstance(this, exp.Subqueryable): 3525 this = self._parse_set_operations( 3526 self._parse_subquery(this=this, parse_alias=False) 3527 ) 3528 elif len(expressions) > 1: 3529 this = self.expression(exp.Tuple, expressions=expressions) 3530 else: 3531 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3532 3533 if this: 3534 this.add_comments(comments) 3535 3536 self._match_r_paren(expression=this) 3537 return this 3538 3539 return None 3540 3541 def _parse_field( 3542 self, 3543 any_token: bool = False, 3544 tokens: t.Optional[t.Collection[TokenType]] = None, 3545 anonymous_func: bool = False, 3546 ) -> t.Optional[exp.Expression]: 3547 return ( 3548 self._parse_primary() 3549 or self._parse_function(anonymous=anonymous_func) 3550 or self._parse_id_var(any_token=any_token, tokens=tokens) 3551 ) 3552 3553 def _parse_function( 3554 self, 3555 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3556 anonymous: bool = False, 3557 optional_parens: bool = True, 3558 ) -> t.Optional[exp.Expression]: 3559 if not self._curr: 3560 return None 3561 3562 token_type = self._curr.token_type 3563 this = self._curr.text 3564 upper = this.upper() 3565 3566 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3567 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3568 self._advance() 3569 return parser(self) 3570 3571 if not self._next or self._next.token_type != TokenType.L_PAREN: 3572 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3573 self._advance() 3574 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3575 3576 return None 3577 3578 if token_type not in self.FUNC_TOKENS: 3579 return None 3580 3581 self._advance(2) 3582 3583 parser = self.FUNCTION_PARSERS.get(upper) 3584 if parser and not anonymous: 3585 this = parser(self) 3586 else: 3587 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3588 3589 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3590 this = self.expression(subquery_predicate, this=self._parse_select()) 3591 self._match_r_paren() 3592 return this 3593 3594 if functions is None: 3595 functions = self.FUNCTIONS 3596 3597 function = functions.get(upper) 3598 3599 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3600 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3601 3602 if function and not anonymous: 3603 func = self.validate_expression(function(args), args) 3604 if not self.NORMALIZE_FUNCTIONS: 3605 func.meta["name"] = this 3606 this = func 3607 else: 3608 this = self.expression(exp.Anonymous, this=this, expressions=args) 3609 3610 self._match_r_paren(this) 3611 return self._parse_window(this) 3612 3613 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3614 return self._parse_column_def(self._parse_id_var()) 3615 3616 def _parse_user_defined_function( 3617 self, kind: t.Optional[TokenType] = None 3618 ) -> t.Optional[exp.Expression]: 3619 this = self._parse_id_var() 3620 3621 while self._match(TokenType.DOT): 3622 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3623 3624 if not self._match(TokenType.L_PAREN): 3625 return this 3626 3627 expressions = self._parse_csv(self._parse_function_parameter) 3628 self._match_r_paren() 3629 return self.expression( 3630 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3631 ) 3632 3633 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3634 literal = self._parse_primary() 3635 if literal: 3636 return self.expression(exp.Introducer, this=token.text, expression=literal) 3637 3638 return self.expression(exp.Identifier, this=token.text) 3639 3640 def _parse_session_parameter(self) -> exp.SessionParameter: 3641 kind = None 3642 this = self._parse_id_var() or self._parse_primary() 3643 3644 if this and self._match(TokenType.DOT): 3645 kind = this.name 3646 this = self._parse_var() or self._parse_primary() 3647 3648 return self.expression(exp.SessionParameter, this=this, kind=kind) 3649 3650 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3651 index = self._index 3652 3653 if self._match(TokenType.L_PAREN): 3654 expressions = t.cast( 3655 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3656 ) 3657 3658 if not self._match(TokenType.R_PAREN): 3659 self._retreat(index) 3660 else: 3661 expressions = [self._parse_id_var()] 3662 3663 if self._match_set(self.LAMBDAS): 3664 return self.LAMBDAS[self._prev.token_type](self, expressions) 3665 3666 self._retreat(index) 3667 3668 this: t.Optional[exp.Expression] 3669 3670 if self._match(TokenType.DISTINCT): 3671 this = self.expression( 3672 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3673 ) 3674 else: 3675 this = self._parse_select_or_expression(alias=alias) 3676 3677 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3678 3679 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3680 index = self._index 3681 3682 if not self.errors: 3683 try: 3684 if self._parse_select(nested=True): 3685 return this 3686 except ParseError: 3687 pass 3688 finally: 3689 self.errors.clear() 3690 self._retreat(index) 3691 3692 if not self._match(TokenType.L_PAREN): 3693 return this 3694 3695 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3696 3697 self._match_r_paren() 3698 return self.expression(exp.Schema, this=this, expressions=args) 3699 3700 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3701 return self._parse_column_def(self._parse_field(any_token=True)) 3702 3703 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3704 # column defs are not really columns, they're identifiers 3705 if isinstance(this, exp.Column): 3706 this = this.this 3707 3708 kind = self._parse_types(schema=True) 3709 3710 if self._match_text_seq("FOR", "ORDINALITY"): 3711 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3712 3713 constraints: t.List[exp.Expression] = [] 3714 3715 if not kind and self._match(TokenType.ALIAS): 3716 constraints.append( 3717 self.expression( 3718 exp.ComputedColumnConstraint, 3719 this=self._parse_conjunction(), 3720 persisted=self._match_text_seq("PERSISTED"), 3721 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3722 ) 3723 ) 3724 3725 while True: 3726 constraint = self._parse_column_constraint() 3727 if not constraint: 3728 break 3729 constraints.append(constraint) 3730 3731 if not kind and not constraints: 3732 return this 3733 3734 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3735 3736 def _parse_auto_increment( 3737 self, 3738 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3739 start = None 3740 increment = None 3741 3742 if self._match(TokenType.L_PAREN, advance=False): 3743 args = self._parse_wrapped_csv(self._parse_bitwise) 3744 start = seq_get(args, 0) 3745 increment = seq_get(args, 1) 3746 elif self._match_text_seq("START"): 3747 start = self._parse_bitwise() 3748 self._match_text_seq("INCREMENT") 3749 increment = self._parse_bitwise() 3750 3751 if start and increment: 3752 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3753 3754 return exp.AutoIncrementColumnConstraint() 3755 3756 def _parse_compress(self) -> exp.CompressColumnConstraint: 3757 if self._match(TokenType.L_PAREN, advance=False): 3758 return self.expression( 3759 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3760 ) 3761 3762 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3763 3764 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3765 if self._match_text_seq("BY", "DEFAULT"): 3766 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3767 this = self.expression( 3768 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3769 ) 3770 else: 3771 self._match_text_seq("ALWAYS") 3772 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3773 3774 self._match(TokenType.ALIAS) 3775 identity = self._match_text_seq("IDENTITY") 3776 3777 if self._match(TokenType.L_PAREN): 3778 if self._match(TokenType.START_WITH): 3779 this.set("start", self._parse_bitwise()) 3780 if self._match_text_seq("INCREMENT", "BY"): 3781 this.set("increment", self._parse_bitwise()) 3782 if self._match_text_seq("MINVALUE"): 3783 this.set("minvalue", self._parse_bitwise()) 3784 if self._match_text_seq("MAXVALUE"): 3785 this.set("maxvalue", self._parse_bitwise()) 3786 3787 if self._match_text_seq("CYCLE"): 3788 this.set("cycle", True) 3789 elif self._match_text_seq("NO", "CYCLE"): 3790 this.set("cycle", False) 3791 3792 if not identity: 3793 this.set("expression", self._parse_bitwise()) 3794 3795 self._match_r_paren() 3796 3797 return this 3798 3799 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3800 self._match_text_seq("LENGTH") 3801 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3802 3803 def _parse_not_constraint( 3804 self, 3805 ) -> t.Optional[exp.Expression]: 3806 if self._match_text_seq("NULL"): 3807 return self.expression(exp.NotNullColumnConstraint) 3808 if self._match_text_seq("CASESPECIFIC"): 3809 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3810 if self._match_text_seq("FOR", "REPLICATION"): 3811 return self.expression(exp.NotForReplicationColumnConstraint) 3812 return None 3813 3814 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3815 if self._match(TokenType.CONSTRAINT): 3816 this = self._parse_id_var() 3817 else: 3818 this = None 3819 3820 if self._match_texts(self.CONSTRAINT_PARSERS): 3821 return self.expression( 3822 exp.ColumnConstraint, 3823 this=this, 3824 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3825 ) 3826 3827 return this 3828 3829 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3830 if not self._match(TokenType.CONSTRAINT): 3831 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3832 3833 this = self._parse_id_var() 3834 expressions = [] 3835 3836 while True: 3837 constraint = self._parse_unnamed_constraint() or self._parse_function() 3838 if not constraint: 3839 break 3840 expressions.append(constraint) 3841 3842 return self.expression(exp.Constraint, this=this, expressions=expressions) 3843 3844 def _parse_unnamed_constraint( 3845 self, constraints: t.Optional[t.Collection[str]] = None 3846 ) -> t.Optional[exp.Expression]: 3847 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3848 return None 3849 3850 constraint = self._prev.text.upper() 3851 if constraint not in self.CONSTRAINT_PARSERS: 3852 self.raise_error(f"No parser found for schema constraint {constraint}.") 3853 3854 return self.CONSTRAINT_PARSERS[constraint](self) 3855 3856 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3857 self._match_text_seq("KEY") 3858 return self.expression( 3859 exp.UniqueColumnConstraint, 3860 this=self._parse_schema(self._parse_id_var(any_token=False)), 3861 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3862 ) 3863 3864 def _parse_key_constraint_options(self) -> t.List[str]: 3865 options = [] 3866 while True: 3867 if not self._curr: 3868 break 3869 3870 if self._match(TokenType.ON): 3871 action = None 3872 on = self._advance_any() and self._prev.text 3873 3874 if self._match_text_seq("NO", "ACTION"): 3875 action = "NO ACTION" 3876 elif self._match_text_seq("CASCADE"): 3877 action = "CASCADE" 3878 elif self._match_text_seq("RESTRICT"): 3879 action = "RESTRICT" 3880 elif self._match_pair(TokenType.SET, TokenType.NULL): 3881 action = "SET NULL" 3882 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3883 action = "SET DEFAULT" 3884 else: 3885 self.raise_error("Invalid key constraint") 3886 3887 options.append(f"ON {on} {action}") 3888 elif self._match_text_seq("NOT", "ENFORCED"): 3889 options.append("NOT ENFORCED") 3890 elif self._match_text_seq("DEFERRABLE"): 3891 options.append("DEFERRABLE") 3892 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3893 options.append("INITIALLY DEFERRED") 3894 elif self._match_text_seq("NORELY"): 3895 options.append("NORELY") 3896 elif self._match_text_seq("MATCH", "FULL"): 3897 options.append("MATCH FULL") 3898 else: 3899 break 3900 3901 return options 3902 3903 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3904 if match and not self._match(TokenType.REFERENCES): 3905 return None 3906 3907 expressions = None 3908 this = self._parse_table(schema=True) 3909 options = self._parse_key_constraint_options() 3910 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3911 3912 def _parse_foreign_key(self) -> exp.ForeignKey: 3913 expressions = self._parse_wrapped_id_vars() 3914 reference = self._parse_references() 3915 options = {} 3916 3917 while self._match(TokenType.ON): 3918 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3919 self.raise_error("Expected DELETE or UPDATE") 3920 3921 kind = self._prev.text.lower() 3922 3923 if self._match_text_seq("NO", "ACTION"): 3924 action = "NO ACTION" 3925 elif self._match(TokenType.SET): 3926 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3927 action = "SET " + self._prev.text.upper() 3928 else: 3929 self._advance() 3930 action = self._prev.text.upper() 3931 3932 options[kind] = action 3933 3934 return self.expression( 3935 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3936 ) 3937 3938 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3939 return self._parse_field() 3940 3941 def _parse_primary_key( 3942 self, wrapped_optional: bool = False, in_props: bool = False 3943 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3944 desc = ( 3945 self._match_set((TokenType.ASC, TokenType.DESC)) 3946 and self._prev.token_type == TokenType.DESC 3947 ) 3948 3949 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3950 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3951 3952 expressions = self._parse_wrapped_csv( 3953 self._parse_primary_key_part, optional=wrapped_optional 3954 ) 3955 options = self._parse_key_constraint_options() 3956 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3957 3958 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3959 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3960 return this 3961 3962 bracket_kind = self._prev.token_type 3963 3964 if self._match(TokenType.COLON): 3965 expressions: t.List[exp.Expression] = [ 3966 self.expression(exp.Slice, expression=self._parse_conjunction()) 3967 ] 3968 else: 3969 expressions = self._parse_csv( 3970 lambda: self._parse_slice( 3971 self._parse_alias(self._parse_conjunction(), explicit=True) 3972 ) 3973 ) 3974 3975 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3976 if bracket_kind == TokenType.L_BRACE: 3977 this = self.expression(exp.Struct, expressions=expressions) 3978 elif not this or this.name.upper() == "ARRAY": 3979 this = self.expression(exp.Array, expressions=expressions) 3980 else: 3981 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3982 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3983 3984 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3985 self.raise_error("Expected ]") 3986 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3987 self.raise_error("Expected }") 3988 3989 self._add_comments(this) 3990 return self._parse_bracket(this) 3991 3992 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3993 if self._match(TokenType.COLON): 3994 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3995 return this 3996 3997 def _parse_case(self) -> t.Optional[exp.Expression]: 3998 ifs = [] 3999 default = None 4000 4001 comments = self._prev_comments 4002 expression = self._parse_conjunction() 4003 4004 while self._match(TokenType.WHEN): 4005 this = self._parse_conjunction() 4006 self._match(TokenType.THEN) 4007 then = self._parse_conjunction() 4008 ifs.append(self.expression(exp.If, this=this, true=then)) 4009 4010 if self._match(TokenType.ELSE): 4011 default = self._parse_conjunction() 4012 4013 if not self._match(TokenType.END): 4014 self.raise_error("Expected END after CASE", self._prev) 4015 4016 return self._parse_window( 4017 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4018 ) 4019 4020 def _parse_if(self) -> t.Optional[exp.Expression]: 4021 if self._match(TokenType.L_PAREN): 4022 args = self._parse_csv(self._parse_conjunction) 4023 this = self.validate_expression(exp.If.from_arg_list(args), args) 4024 self._match_r_paren() 4025 else: 4026 index = self._index - 1 4027 condition = self._parse_conjunction() 4028 4029 if not condition: 4030 self._retreat(index) 4031 return None 4032 4033 self._match(TokenType.THEN) 4034 true = self._parse_conjunction() 4035 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4036 self._match(TokenType.END) 4037 this = self.expression(exp.If, this=condition, true=true, false=false) 4038 4039 return self._parse_window(this) 4040 4041 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4042 if not self._match_text_seq("VALUE", "FOR"): 4043 self._retreat(self._index - 1) 4044 return None 4045 4046 return self.expression( 4047 exp.NextValueFor, 4048 this=self._parse_column(), 4049 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4050 ) 4051 4052 def _parse_extract(self) -> exp.Extract: 4053 this = self._parse_function() or self._parse_var() or self._parse_type() 4054 4055 if self._match(TokenType.FROM): 4056 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4057 4058 if not self._match(TokenType.COMMA): 4059 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4060 4061 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4062 4063 def _parse_any_value(self) -> exp.AnyValue: 4064 this = self._parse_lambda() 4065 is_max = None 4066 having = None 4067 4068 if self._match(TokenType.HAVING): 4069 self._match_texts(("MAX", "MIN")) 4070 is_max = self._prev.text == "MAX" 4071 having = self._parse_column() 4072 4073 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4074 4075 def _parse_cast(self, strict: bool) -> exp.Expression: 4076 this = self._parse_conjunction() 4077 4078 if not self._match(TokenType.ALIAS): 4079 if self._match(TokenType.COMMA): 4080 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4081 4082 self.raise_error("Expected AS after CAST") 4083 4084 fmt = None 4085 to = self._parse_types() 4086 4087 if not to: 4088 self.raise_error("Expected TYPE after CAST") 4089 elif isinstance(to, exp.Identifier): 4090 to = exp.DataType.build(to.name, udt=True) 4091 elif to.this == exp.DataType.Type.CHAR: 4092 if self._match(TokenType.CHARACTER_SET): 4093 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4094 elif self._match(TokenType.FORMAT): 4095 fmt_string = self._parse_string() 4096 fmt = self._parse_at_time_zone(fmt_string) 4097 4098 if to.this in exp.DataType.TEMPORAL_TYPES: 4099 this = self.expression( 4100 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4101 this=this, 4102 format=exp.Literal.string( 4103 format_time( 4104 fmt_string.this if fmt_string else "", 4105 self.FORMAT_MAPPING or self.TIME_MAPPING, 4106 self.FORMAT_TRIE or self.TIME_TRIE, 4107 ) 4108 ), 4109 ) 4110 4111 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4112 this.set("zone", fmt.args["zone"]) 4113 4114 return this 4115 4116 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4117 4118 def _parse_concat(self) -> t.Optional[exp.Expression]: 4119 args = self._parse_csv(self._parse_conjunction) 4120 if self.CONCAT_NULL_OUTPUTS_STRING: 4121 args = self._ensure_string_if_null(args) 4122 4123 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4124 # we find such a call we replace it with its argument. 4125 if len(args) == 1: 4126 return args[0] 4127 4128 return self.expression( 4129 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4130 ) 4131 4132 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4133 args = self._parse_csv(self._parse_conjunction) 4134 if len(args) < 2: 4135 return self.expression(exp.ConcatWs, expressions=args) 4136 delim, *values = args 4137 if self.CONCAT_NULL_OUTPUTS_STRING: 4138 values = self._ensure_string_if_null(values) 4139 4140 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4141 4142 def _parse_string_agg(self) -> exp.Expression: 4143 if self._match(TokenType.DISTINCT): 4144 args: t.List[t.Optional[exp.Expression]] = [ 4145 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4146 ] 4147 if self._match(TokenType.COMMA): 4148 args.extend(self._parse_csv(self._parse_conjunction)) 4149 else: 4150 args = self._parse_csv(self._parse_conjunction) # type: ignore 4151 4152 index = self._index 4153 if not self._match(TokenType.R_PAREN) and args: 4154 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4155 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4156 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4157 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4158 4159 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4160 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4161 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4162 if not self._match_text_seq("WITHIN", "GROUP"): 4163 self._retreat(index) 4164 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4165 4166 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4167 order = self._parse_order(this=seq_get(args, 0)) 4168 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4169 4170 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4171 this = self._parse_bitwise() 4172 4173 if self._match(TokenType.USING): 4174 to: t.Optional[exp.Expression] = self.expression( 4175 exp.CharacterSet, this=self._parse_var() 4176 ) 4177 elif self._match(TokenType.COMMA): 4178 to = self._parse_types() 4179 else: 4180 to = None 4181 4182 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4183 4184 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4185 """ 4186 There are generally two variants of the DECODE function: 4187 4188 - DECODE(bin, charset) 4189 - DECODE(expression, search, result [, search, result] ... [, default]) 4190 4191 The second variant will always be parsed into a CASE expression. Note that NULL 4192 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4193 instead of relying on pattern matching. 4194 """ 4195 args = self._parse_csv(self._parse_conjunction) 4196 4197 if len(args) < 3: 4198 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4199 4200 expression, *expressions = args 4201 if not expression: 4202 return None 4203 4204 ifs = [] 4205 for search, result in zip(expressions[::2], expressions[1::2]): 4206 if not search or not result: 4207 return None 4208 4209 if isinstance(search, exp.Literal): 4210 ifs.append( 4211 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4212 ) 4213 elif isinstance(search, exp.Null): 4214 ifs.append( 4215 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4216 ) 4217 else: 4218 cond = exp.or_( 4219 exp.EQ(this=expression.copy(), expression=search), 4220 exp.and_( 4221 exp.Is(this=expression.copy(), expression=exp.Null()), 4222 exp.Is(this=search.copy(), expression=exp.Null()), 4223 copy=False, 4224 ), 4225 copy=False, 4226 ) 4227 ifs.append(exp.If(this=cond, true=result)) 4228 4229 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4230 4231 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4232 self._match_text_seq("KEY") 4233 key = self._parse_column() 4234 self._match_set((TokenType.COLON, TokenType.COMMA)) 4235 self._match_text_seq("VALUE") 4236 value = self._parse_bitwise() 4237 4238 if not key and not value: 4239 return None 4240 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4241 4242 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4243 if not this or not self._match_text_seq("FORMAT", "JSON"): 4244 return this 4245 4246 return self.expression(exp.FormatJson, this=this) 4247 4248 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4249 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4250 for value in values: 4251 if self._match_text_seq(value, "ON", on): 4252 return f"{value} ON {on}" 4253 4254 return None 4255 4256 def _parse_json_object(self) -> exp.JSONObject: 4257 star = self._parse_star() 4258 expressions = ( 4259 [star] 4260 if star 4261 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4262 ) 4263 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4264 4265 unique_keys = None 4266 if self._match_text_seq("WITH", "UNIQUE"): 4267 unique_keys = True 4268 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4269 unique_keys = False 4270 4271 self._match_text_seq("KEYS") 4272 4273 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4274 self._parse_type() 4275 ) 4276 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4277 4278 return self.expression( 4279 exp.JSONObject, 4280 expressions=expressions, 4281 null_handling=null_handling, 4282 unique_keys=unique_keys, 4283 return_type=return_type, 4284 encoding=encoding, 4285 ) 4286 4287 def _parse_logarithm(self) -> exp.Func: 4288 # Default argument order is base, expression 4289 args = self._parse_csv(self._parse_range) 4290 4291 if len(args) > 1: 4292 if not self.LOG_BASE_FIRST: 4293 args.reverse() 4294 return exp.Log.from_arg_list(args) 4295 4296 return self.expression( 4297 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4298 ) 4299 4300 def _parse_match_against(self) -> exp.MatchAgainst: 4301 expressions = self._parse_csv(self._parse_column) 4302 4303 self._match_text_seq(")", "AGAINST", "(") 4304 4305 this = self._parse_string() 4306 4307 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4308 modifier = "IN NATURAL LANGUAGE MODE" 4309 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4310 modifier = f"{modifier} WITH QUERY EXPANSION" 4311 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4312 modifier = "IN BOOLEAN MODE" 4313 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4314 modifier = "WITH QUERY EXPANSION" 4315 else: 4316 modifier = None 4317 4318 return self.expression( 4319 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4320 ) 4321 4322 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4323 def _parse_open_json(self) -> exp.OpenJSON: 4324 this = self._parse_bitwise() 4325 path = self._match(TokenType.COMMA) and self._parse_string() 4326 4327 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4328 this = self._parse_field(any_token=True) 4329 kind = self._parse_types() 4330 path = self._parse_string() 4331 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4332 4333 return self.expression( 4334 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4335 ) 4336 4337 expressions = None 4338 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4339 self._match_l_paren() 4340 expressions = self._parse_csv(_parse_open_json_column_def) 4341 4342 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4343 4344 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4345 args = self._parse_csv(self._parse_bitwise) 4346 4347 if self._match(TokenType.IN): 4348 return self.expression( 4349 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4350 ) 4351 4352 if haystack_first: 4353 haystack = seq_get(args, 0) 4354 needle = seq_get(args, 1) 4355 else: 4356 needle = seq_get(args, 0) 4357 haystack = seq_get(args, 1) 4358 4359 return self.expression( 4360 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4361 ) 4362 4363 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4364 args = self._parse_csv(self._parse_table) 4365 return exp.JoinHint(this=func_name.upper(), expressions=args) 4366 4367 def _parse_substring(self) -> exp.Substring: 4368 # Postgres supports the form: substring(string [from int] [for int]) 4369 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4370 4371 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4372 4373 if self._match(TokenType.FROM): 4374 args.append(self._parse_bitwise()) 4375 if self._match(TokenType.FOR): 4376 args.append(self._parse_bitwise()) 4377 4378 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4379 4380 def _parse_trim(self) -> exp.Trim: 4381 # https://www.w3resource.com/sql/character-functions/trim.php 4382 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4383 4384 position = None 4385 collation = None 4386 4387 if self._match_texts(self.TRIM_TYPES): 4388 position = self._prev.text.upper() 4389 4390 expression = self._parse_bitwise() 4391 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4392 this = self._parse_bitwise() 4393 else: 4394 this = expression 4395 expression = None 4396 4397 if self._match(TokenType.COLLATE): 4398 collation = self._parse_bitwise() 4399 4400 return self.expression( 4401 exp.Trim, this=this, position=position, expression=expression, collation=collation 4402 ) 4403 4404 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4405 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4406 4407 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4408 return self._parse_window(self._parse_id_var(), alias=True) 4409 4410 def _parse_respect_or_ignore_nulls( 4411 self, this: t.Optional[exp.Expression] 4412 ) -> t.Optional[exp.Expression]: 4413 if self._match_text_seq("IGNORE", "NULLS"): 4414 return self.expression(exp.IgnoreNulls, this=this) 4415 if self._match_text_seq("RESPECT", "NULLS"): 4416 return self.expression(exp.RespectNulls, this=this) 4417 return this 4418 4419 def _parse_window( 4420 self, this: t.Optional[exp.Expression], alias: bool = False 4421 ) -> t.Optional[exp.Expression]: 4422 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4423 self._match(TokenType.WHERE) 4424 this = self.expression( 4425 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4426 ) 4427 self._match_r_paren() 4428 4429 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4430 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4431 if self._match_text_seq("WITHIN", "GROUP"): 4432 order = self._parse_wrapped(self._parse_order) 4433 this = self.expression(exp.WithinGroup, this=this, expression=order) 4434 4435 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4436 # Some dialects choose to implement and some do not. 4437 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4438 4439 # There is some code above in _parse_lambda that handles 4440 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4441 4442 # The below changes handle 4443 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4444 4445 # Oracle allows both formats 4446 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4447 # and Snowflake chose to do the same for familiarity 4448 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4449 this = self._parse_respect_or_ignore_nulls(this) 4450 4451 # bigquery select from window x AS (partition by ...) 4452 if alias: 4453 over = None 4454 self._match(TokenType.ALIAS) 4455 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4456 return this 4457 else: 4458 over = self._prev.text.upper() 4459 4460 if not self._match(TokenType.L_PAREN): 4461 return self.expression( 4462 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4463 ) 4464 4465 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4466 4467 first = self._match(TokenType.FIRST) 4468 if self._match_text_seq("LAST"): 4469 first = False 4470 4471 partition, order = self._parse_partition_and_order() 4472 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4473 4474 if kind: 4475 self._match(TokenType.BETWEEN) 4476 start = self._parse_window_spec() 4477 self._match(TokenType.AND) 4478 end = self._parse_window_spec() 4479 4480 spec = self.expression( 4481 exp.WindowSpec, 4482 kind=kind, 4483 start=start["value"], 4484 start_side=start["side"], 4485 end=end["value"], 4486 end_side=end["side"], 4487 ) 4488 else: 4489 spec = None 4490 4491 self._match_r_paren() 4492 4493 window = self.expression( 4494 exp.Window, 4495 this=this, 4496 partition_by=partition, 4497 order=order, 4498 spec=spec, 4499 alias=window_alias, 4500 over=over, 4501 first=first, 4502 ) 4503 4504 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4505 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4506 return self._parse_window(window, alias=alias) 4507 4508 return window 4509 4510 def _parse_partition_and_order( 4511 self, 4512 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4513 return self._parse_partition_by(), self._parse_order() 4514 4515 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4516 self._match(TokenType.BETWEEN) 4517 4518 return { 4519 "value": ( 4520 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4521 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4522 or self._parse_bitwise() 4523 ), 4524 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4525 } 4526 4527 def _parse_alias( 4528 self, this: t.Optional[exp.Expression], explicit: bool = False 4529 ) -> t.Optional[exp.Expression]: 4530 any_token = self._match(TokenType.ALIAS) 4531 4532 if explicit and not any_token: 4533 return this 4534 4535 if self._match(TokenType.L_PAREN): 4536 aliases = self.expression( 4537 exp.Aliases, 4538 this=this, 4539 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4540 ) 4541 self._match_r_paren(aliases) 4542 return aliases 4543 4544 alias = self._parse_id_var(any_token) 4545 4546 if alias: 4547 return self.expression(exp.Alias, this=this, alias=alias) 4548 4549 return this 4550 4551 def _parse_id_var( 4552 self, 4553 any_token: bool = True, 4554 tokens: t.Optional[t.Collection[TokenType]] = None, 4555 ) -> t.Optional[exp.Expression]: 4556 identifier = self._parse_identifier() 4557 4558 if identifier: 4559 return identifier 4560 4561 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4562 quoted = self._prev.token_type == TokenType.STRING 4563 return exp.Identifier(this=self._prev.text, quoted=quoted) 4564 4565 return None 4566 4567 def _parse_string(self) -> t.Optional[exp.Expression]: 4568 if self._match(TokenType.STRING): 4569 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4570 return self._parse_placeholder() 4571 4572 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4573 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4574 4575 def _parse_number(self) -> t.Optional[exp.Expression]: 4576 if self._match(TokenType.NUMBER): 4577 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4578 return self._parse_placeholder() 4579 4580 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4581 if self._match(TokenType.IDENTIFIER): 4582 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4583 return self._parse_placeholder() 4584 4585 def _parse_var( 4586 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4587 ) -> t.Optional[exp.Expression]: 4588 if ( 4589 (any_token and self._advance_any()) 4590 or self._match(TokenType.VAR) 4591 or (self._match_set(tokens) if tokens else False) 4592 ): 4593 return self.expression(exp.Var, this=self._prev.text) 4594 return self._parse_placeholder() 4595 4596 def _advance_any(self) -> t.Optional[Token]: 4597 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4598 self._advance() 4599 return self._prev 4600 return None 4601 4602 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4603 return self._parse_var() or self._parse_string() 4604 4605 def _parse_null(self) -> t.Optional[exp.Expression]: 4606 if self._match_set(self.NULL_TOKENS): 4607 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4608 return self._parse_placeholder() 4609 4610 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4611 if self._match(TokenType.TRUE): 4612 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4613 if self._match(TokenType.FALSE): 4614 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4615 return self._parse_placeholder() 4616 4617 def _parse_star(self) -> t.Optional[exp.Expression]: 4618 if self._match(TokenType.STAR): 4619 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4620 return self._parse_placeholder() 4621 4622 def _parse_parameter(self) -> exp.Parameter: 4623 wrapped = self._match(TokenType.L_BRACE) 4624 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4625 self._match(TokenType.R_BRACE) 4626 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4627 4628 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4629 if self._match_set(self.PLACEHOLDER_PARSERS): 4630 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4631 if placeholder: 4632 return placeholder 4633 self._advance(-1) 4634 return None 4635 4636 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4637 if not self._match(TokenType.EXCEPT): 4638 return None 4639 if self._match(TokenType.L_PAREN, advance=False): 4640 return self._parse_wrapped_csv(self._parse_column) 4641 4642 except_column = self._parse_column() 4643 return [except_column] if except_column else None 4644 4645 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4646 if not self._match(TokenType.REPLACE): 4647 return None 4648 if self._match(TokenType.L_PAREN, advance=False): 4649 return self._parse_wrapped_csv(self._parse_expression) 4650 4651 replace_expression = self._parse_expression() 4652 return [replace_expression] if replace_expression else None 4653 4654 def _parse_csv( 4655 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4656 ) -> t.List[exp.Expression]: 4657 parse_result = parse_method() 4658 items = [parse_result] if parse_result is not None else [] 4659 4660 while self._match(sep): 4661 self._add_comments(parse_result) 4662 parse_result = parse_method() 4663 if parse_result is not None: 4664 items.append(parse_result) 4665 4666 return items 4667 4668 def _parse_tokens( 4669 self, parse_method: t.Callable, expressions: t.Dict 4670 ) -> t.Optional[exp.Expression]: 4671 this = parse_method() 4672 4673 while self._match_set(expressions): 4674 this = self.expression( 4675 expressions[self._prev.token_type], 4676 this=this, 4677 comments=self._prev_comments, 4678 expression=parse_method(), 4679 ) 4680 4681 return this 4682 4683 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4684 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4685 4686 def _parse_wrapped_csv( 4687 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4688 ) -> t.List[exp.Expression]: 4689 return self._parse_wrapped( 4690 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4691 ) 4692 4693 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4694 wrapped = self._match(TokenType.L_PAREN) 4695 if not wrapped and not optional: 4696 self.raise_error("Expecting (") 4697 parse_result = parse_method() 4698 if wrapped: 4699 self._match_r_paren() 4700 return parse_result 4701 4702 def _parse_expressions(self) -> t.List[exp.Expression]: 4703 return self._parse_csv(self._parse_expression) 4704 4705 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4706 return self._parse_select() or self._parse_set_operations( 4707 self._parse_expression() if alias else self._parse_conjunction() 4708 ) 4709 4710 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4711 return self._parse_query_modifiers( 4712 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4713 ) 4714 4715 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4716 this = None 4717 if self._match_texts(self.TRANSACTION_KIND): 4718 this = self._prev.text 4719 4720 self._match_texts({"TRANSACTION", "WORK"}) 4721 4722 modes = [] 4723 while True: 4724 mode = [] 4725 while self._match(TokenType.VAR): 4726 mode.append(self._prev.text) 4727 4728 if mode: 4729 modes.append(" ".join(mode)) 4730 if not self._match(TokenType.COMMA): 4731 break 4732 4733 return self.expression(exp.Transaction, this=this, modes=modes) 4734 4735 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4736 chain = None 4737 savepoint = None 4738 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4739 4740 self._match_texts({"TRANSACTION", "WORK"}) 4741 4742 if self._match_text_seq("TO"): 4743 self._match_text_seq("SAVEPOINT") 4744 savepoint = self._parse_id_var() 4745 4746 if self._match(TokenType.AND): 4747 chain = not self._match_text_seq("NO") 4748 self._match_text_seq("CHAIN") 4749 4750 if is_rollback: 4751 return self.expression(exp.Rollback, savepoint=savepoint) 4752 4753 return self.expression(exp.Commit, chain=chain) 4754 4755 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4756 if not self._match_text_seq("ADD"): 4757 return None 4758 4759 self._match(TokenType.COLUMN) 4760 exists_column = self._parse_exists(not_=True) 4761 expression = self._parse_field_def() 4762 4763 if expression: 4764 expression.set("exists", exists_column) 4765 4766 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4767 if self._match_texts(("FIRST", "AFTER")): 4768 position = self._prev.text 4769 column_position = self.expression( 4770 exp.ColumnPosition, this=self._parse_column(), position=position 4771 ) 4772 expression.set("position", column_position) 4773 4774 return expression 4775 4776 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4777 drop = self._match(TokenType.DROP) and self._parse_drop() 4778 if drop and not isinstance(drop, exp.Command): 4779 drop.set("kind", drop.args.get("kind", "COLUMN")) 4780 return drop 4781 4782 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4783 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4784 return self.expression( 4785 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4786 ) 4787 4788 def _parse_add_constraint(self) -> exp.AddConstraint: 4789 this = None 4790 kind = self._prev.token_type 4791 4792 if kind == TokenType.CONSTRAINT: 4793 this = self._parse_id_var() 4794 4795 if self._match_text_seq("CHECK"): 4796 expression = self._parse_wrapped(self._parse_conjunction) 4797 enforced = self._match_text_seq("ENFORCED") 4798 4799 return self.expression( 4800 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4801 ) 4802 4803 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4804 expression = self._parse_foreign_key() 4805 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4806 expression = self._parse_primary_key() 4807 else: 4808 expression = None 4809 4810 return self.expression(exp.AddConstraint, this=this, expression=expression) 4811 4812 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4813 index = self._index - 1 4814 4815 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4816 return self._parse_csv(self._parse_add_constraint) 4817 4818 self._retreat(index) 4819 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4820 return self._parse_csv(self._parse_field_def) 4821 4822 return self._parse_csv(self._parse_add_column) 4823 4824 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4825 self._match(TokenType.COLUMN) 4826 column = self._parse_field(any_token=True) 4827 4828 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4829 return self.expression(exp.AlterColumn, this=column, drop=True) 4830 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4831 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4832 4833 self._match_text_seq("SET", "DATA") 4834 return self.expression( 4835 exp.AlterColumn, 4836 this=column, 4837 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4838 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4839 using=self._match(TokenType.USING) and self._parse_conjunction(), 4840 ) 4841 4842 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4843 index = self._index - 1 4844 4845 partition_exists = self._parse_exists() 4846 if self._match(TokenType.PARTITION, advance=False): 4847 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4848 4849 self._retreat(index) 4850 return self._parse_csv(self._parse_drop_column) 4851 4852 def _parse_alter_table_rename(self) -> exp.RenameTable: 4853 self._match_text_seq("TO") 4854 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4855 4856 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4857 start = self._prev 4858 4859 if not self._match(TokenType.TABLE): 4860 return self._parse_as_command(start) 4861 4862 exists = self._parse_exists() 4863 only = self._match_text_seq("ONLY") 4864 this = self._parse_table(schema=True) 4865 4866 if self._next: 4867 self._advance() 4868 4869 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4870 if parser: 4871 actions = ensure_list(parser(self)) 4872 4873 if not self._curr: 4874 return self.expression( 4875 exp.AlterTable, 4876 this=this, 4877 exists=exists, 4878 actions=actions, 4879 only=only, 4880 ) 4881 4882 return self._parse_as_command(start) 4883 4884 def _parse_merge(self) -> exp.Merge: 4885 self._match(TokenType.INTO) 4886 target = self._parse_table() 4887 4888 if target and self._match(TokenType.ALIAS, advance=False): 4889 target.set("alias", self._parse_table_alias()) 4890 4891 self._match(TokenType.USING) 4892 using = self._parse_table() 4893 4894 self._match(TokenType.ON) 4895 on = self._parse_conjunction() 4896 4897 whens = [] 4898 while self._match(TokenType.WHEN): 4899 matched = not self._match(TokenType.NOT) 4900 self._match_text_seq("MATCHED") 4901 source = ( 4902 False 4903 if self._match_text_seq("BY", "TARGET") 4904 else self._match_text_seq("BY", "SOURCE") 4905 ) 4906 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4907 4908 self._match(TokenType.THEN) 4909 4910 if self._match(TokenType.INSERT): 4911 _this = self._parse_star() 4912 if _this: 4913 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4914 else: 4915 then = self.expression( 4916 exp.Insert, 4917 this=self._parse_value(), 4918 expression=self._match(TokenType.VALUES) and self._parse_value(), 4919 ) 4920 elif self._match(TokenType.UPDATE): 4921 expressions = self._parse_star() 4922 if expressions: 4923 then = self.expression(exp.Update, expressions=expressions) 4924 else: 4925 then = self.expression( 4926 exp.Update, 4927 expressions=self._match(TokenType.SET) 4928 and self._parse_csv(self._parse_equality), 4929 ) 4930 elif self._match(TokenType.DELETE): 4931 then = self.expression(exp.Var, this=self._prev.text) 4932 else: 4933 then = None 4934 4935 whens.append( 4936 self.expression( 4937 exp.When, 4938 matched=matched, 4939 source=source, 4940 condition=condition, 4941 then=then, 4942 ) 4943 ) 4944 4945 return self.expression( 4946 exp.Merge, 4947 this=target, 4948 using=using, 4949 on=on, 4950 expressions=whens, 4951 ) 4952 4953 def _parse_show(self) -> t.Optional[exp.Expression]: 4954 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4955 if parser: 4956 return parser(self) 4957 return self._parse_as_command(self._prev) 4958 4959 def _parse_set_item_assignment( 4960 self, kind: t.Optional[str] = None 4961 ) -> t.Optional[exp.Expression]: 4962 index = self._index 4963 4964 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4965 return self._parse_set_transaction(global_=kind == "GLOBAL") 4966 4967 left = self._parse_primary() or self._parse_id_var() 4968 assignment_delimiter = self._match_texts(("=", "TO")) 4969 4970 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 4971 self._retreat(index) 4972 return None 4973 4974 right = self._parse_statement() or self._parse_id_var() 4975 this = self.expression(exp.EQ, this=left, expression=right) 4976 4977 return self.expression(exp.SetItem, this=this, kind=kind) 4978 4979 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4980 self._match_text_seq("TRANSACTION") 4981 characteristics = self._parse_csv( 4982 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4983 ) 4984 return self.expression( 4985 exp.SetItem, 4986 expressions=characteristics, 4987 kind="TRANSACTION", 4988 **{"global": global_}, # type: ignore 4989 ) 4990 4991 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4992 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4993 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4994 4995 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4996 index = self._index 4997 set_ = self.expression( 4998 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4999 ) 5000 5001 if self._curr: 5002 self._retreat(index) 5003 return self._parse_as_command(self._prev) 5004 5005 return set_ 5006 5007 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5008 for option in options: 5009 if self._match_text_seq(*option.split(" ")): 5010 return exp.var(option) 5011 return None 5012 5013 def _parse_as_command(self, start: Token) -> exp.Command: 5014 while self._curr: 5015 self._advance() 5016 text = self._find_sql(start, self._prev) 5017 size = len(start.text) 5018 return exp.Command(this=text[:size], expression=text[size:]) 5019 5020 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5021 settings = [] 5022 5023 self._match_l_paren() 5024 kind = self._parse_id_var() 5025 5026 if self._match(TokenType.L_PAREN): 5027 while True: 5028 key = self._parse_id_var() 5029 value = self._parse_primary() 5030 5031 if not key and value is None: 5032 break 5033 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5034 self._match(TokenType.R_PAREN) 5035 5036 self._match_r_paren() 5037 5038 return self.expression( 5039 exp.DictProperty, 5040 this=this, 5041 kind=kind.this if kind else None, 5042 settings=settings, 5043 ) 5044 5045 def _parse_dict_range(self, this: str) -> exp.DictRange: 5046 self._match_l_paren() 5047 has_min = self._match_text_seq("MIN") 5048 if has_min: 5049 min = self._parse_var() or self._parse_primary() 5050 self._match_text_seq("MAX") 5051 max = self._parse_var() or self._parse_primary() 5052 else: 5053 max = self._parse_var() or self._parse_primary() 5054 min = exp.Literal.number(0) 5055 self._match_r_paren() 5056 return self.expression(exp.DictRange, this=this, min=min, max=max) 5057 5058 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5059 index = self._index 5060 expression = self._parse_column() 5061 if not self._match(TokenType.IN): 5062 self._retreat(index - 1) 5063 return None 5064 iterator = self._parse_column() 5065 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5066 return self.expression( 5067 exp.Comprehension, 5068 this=this, 5069 expression=expression, 5070 iterator=iterator, 5071 condition=condition, 5072 ) 5073 5074 def _find_parser( 5075 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5076 ) -> t.Optional[t.Callable]: 5077 if not self._curr: 5078 return None 5079 5080 index = self._index 5081 this = [] 5082 while True: 5083 # The current token might be multiple words 5084 curr = self._curr.text.upper() 5085 key = curr.split(" ") 5086 this.append(curr) 5087 5088 self._advance() 5089 result, trie = in_trie(trie, key) 5090 if result == TrieResult.FAILED: 5091 break 5092 5093 if result == TrieResult.EXISTS: 5094 subparser = parsers[" ".join(this)] 5095 return subparser 5096 5097 self._retreat(index) 5098 return None 5099 5100 def _match(self, token_type, advance=True, expression=None): 5101 if not self._curr: 5102 return None 5103 5104 if self._curr.token_type == token_type: 5105 if advance: 5106 self._advance() 5107 self._add_comments(expression) 5108 return True 5109 5110 return None 5111 5112 def _match_set(self, types, advance=True): 5113 if not self._curr: 5114 return None 5115 5116 if self._curr.token_type in types: 5117 if advance: 5118 self._advance() 5119 return True 5120 5121 return None 5122 5123 def _match_pair(self, token_type_a, token_type_b, advance=True): 5124 if not self._curr or not self._next: 5125 return None 5126 5127 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5128 if advance: 5129 self._advance(2) 5130 return True 5131 5132 return None 5133 5134 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5135 if not self._match(TokenType.L_PAREN, expression=expression): 5136 self.raise_error("Expecting (") 5137 5138 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5139 if not self._match(TokenType.R_PAREN, expression=expression): 5140 self.raise_error("Expecting )") 5141 5142 def _match_texts(self, texts, advance=True): 5143 if self._curr and self._curr.text.upper() in texts: 5144 if advance: 5145 self._advance() 5146 return True 5147 return False 5148 5149 def _match_text_seq(self, *texts, advance=True): 5150 index = self._index 5151 for text in texts: 5152 if self._curr and self._curr.text.upper() == text: 5153 self._advance() 5154 else: 5155 self._retreat(index) 5156 return False 5157 5158 if not advance: 5159 self._retreat(index) 5160 5161 return True 5162 5163 @t.overload 5164 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5165 ... 5166 5167 @t.overload 5168 def _replace_columns_with_dots( 5169 self, this: t.Optional[exp.Expression] 5170 ) -> t.Optional[exp.Expression]: 5171 ... 5172 5173 def _replace_columns_with_dots(self, this): 5174 if isinstance(this, exp.Dot): 5175 exp.replace_children(this, self._replace_columns_with_dots) 5176 elif isinstance(this, exp.Column): 5177 exp.replace_children(this, self._replace_columns_with_dots) 5178 table = this.args.get("table") 5179 this = ( 5180 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5181 ) 5182 5183 return this 5184 5185 def _replace_lambda( 5186 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5187 ) -> t.Optional[exp.Expression]: 5188 if not node: 5189 return node 5190 5191 for column in node.find_all(exp.Column): 5192 if column.parts[0].name in lambda_variables: 5193 dot_or_id = column.to_dot() if column.table else column.this 5194 parent = column.parent 5195 5196 while isinstance(parent, exp.Dot): 5197 if not isinstance(parent.parent, exp.Dot): 5198 parent.replace(dot_or_id) 5199 break 5200 parent = parent.parent 5201 else: 5202 if column is node: 5203 node = dot_or_id 5204 else: 5205 column.replace(dot_or_id) 5206 return node 5207 5208 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5209 return [ 5210 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5211 for value in values 5212 if value 5213 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
918 def __init__( 919 self, 920 error_level: t.Optional[ErrorLevel] = None, 921 error_message_context: int = 100, 922 max_errors: int = 3, 923 ): 924 self.error_level = error_level or ErrorLevel.IMMEDIATE 925 self.error_message_context = error_message_context 926 self.max_errors = max_errors 927 self._tokenizer = self.TOKENIZER_CLASS() 928 self.reset()
940 def parse( 941 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 942 ) -> t.List[t.Optional[exp.Expression]]: 943 """ 944 Parses a list of tokens and returns a list of syntax trees, one tree 945 per parsed SQL statement. 946 947 Args: 948 raw_tokens: The list of tokens. 949 sql: The original SQL string, used to produce helpful debug messages. 950 951 Returns: 952 The list of the produced syntax trees. 953 """ 954 return self._parse( 955 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 956 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
958 def parse_into( 959 self, 960 expression_types: exp.IntoType, 961 raw_tokens: t.List[Token], 962 sql: t.Optional[str] = None, 963 ) -> t.List[t.Optional[exp.Expression]]: 964 """ 965 Parses a list of tokens into a given Expression type. If a collection of Expression 966 types is given instead, this method will try to parse the token list into each one 967 of them, stopping at the first for which the parsing succeeds. 968 969 Args: 970 expression_types: The expression type(s) to try and parse the token list into. 971 raw_tokens: The list of tokens. 972 sql: The original SQL string, used to produce helpful debug messages. 973 974 Returns: 975 The target Expression. 976 """ 977 errors = [] 978 for expression_type in ensure_list(expression_types): 979 parser = self.EXPRESSION_PARSERS.get(expression_type) 980 if not parser: 981 raise TypeError(f"No parser registered for {expression_type}") 982 983 try: 984 return self._parse(parser, raw_tokens, sql) 985 except ParseError as e: 986 e.errors[0]["into_expression"] = expression_type 987 errors.append(e) 988 989 raise ParseError( 990 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 991 errors=merge_errors(errors), 992 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1029 def check_errors(self) -> None: 1030 """Logs or raises any found errors, depending on the chosen error level setting.""" 1031 if self.error_level == ErrorLevel.WARN: 1032 for error in self.errors: 1033 logger.error(str(error)) 1034 elif self.error_level == ErrorLevel.RAISE and self.errors: 1035 raise ParseError( 1036 concat_messages(self.errors, self.max_errors), 1037 errors=merge_errors(self.errors), 1038 )
Logs or raises any found errors, depending on the chosen error level setting.
1040 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1041 """ 1042 Appends an error in the list of recorded errors or raises it, depending on the chosen 1043 error level setting. 1044 """ 1045 token = token or self._curr or self._prev or Token.string("") 1046 start = token.start 1047 end = token.end + 1 1048 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1049 highlight = self.sql[start:end] 1050 end_context = self.sql[end : end + self.error_message_context] 1051 1052 error = ParseError.new( 1053 f"{message}. Line {token.line}, Col: {token.col}.\n" 1054 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1055 description=message, 1056 line=token.line, 1057 col=token.col, 1058 start_context=start_context, 1059 highlight=highlight, 1060 end_context=end_context, 1061 ) 1062 1063 if self.error_level == ErrorLevel.IMMEDIATE: 1064 raise error 1065 1066 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1068 def expression( 1069 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1070 ) -> E: 1071 """ 1072 Creates a new, validated Expression. 1073 1074 Args: 1075 exp_class: The expression class to instantiate. 1076 comments: An optional list of comments to attach to the expression. 1077 kwargs: The arguments to set for the expression along with their respective values. 1078 1079 Returns: 1080 The target expression. 1081 """ 1082 instance = exp_class(**kwargs) 1083 instance.add_comments(comments) if comments else self._add_comments(instance) 1084 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1091 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1092 """ 1093 Validates an Expression, making sure that all its mandatory arguments are set. 1094 1095 Args: 1096 expression: The expression to validate. 1097 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1098 1099 Returns: 1100 The validated expression. 1101 """ 1102 if self.error_level != ErrorLevel.IGNORE: 1103 for error_message in expression.error_messages(args): 1104 self.raise_error(error_message) 1105 1106 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.