sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TIME, 159 TokenType.TIMETZ, 160 TokenType.TIMESTAMP, 161 TokenType.TIMESTAMPTZ, 162 TokenType.TIMESTAMPLTZ, 163 TokenType.DATETIME, 164 TokenType.DATETIME64, 165 TokenType.DATE, 166 TokenType.INT4RANGE, 167 TokenType.INT4MULTIRANGE, 168 TokenType.INT8RANGE, 169 TokenType.INT8MULTIRANGE, 170 TokenType.NUMRANGE, 171 TokenType.NUMMULTIRANGE, 172 TokenType.TSRANGE, 173 TokenType.TSMULTIRANGE, 174 TokenType.TSTZRANGE, 175 TokenType.TSTZMULTIRANGE, 176 TokenType.DATERANGE, 177 TokenType.DATEMULTIRANGE, 178 TokenType.DECIMAL, 179 TokenType.BIGDECIMAL, 180 TokenType.UUID, 181 TokenType.GEOGRAPHY, 182 TokenType.GEOMETRY, 183 TokenType.HLLSKETCH, 184 TokenType.HSTORE, 185 TokenType.PSEUDO_TYPE, 186 TokenType.SUPER, 187 TokenType.SERIAL, 188 TokenType.SMALLSERIAL, 189 TokenType.BIGSERIAL, 190 TokenType.XML, 191 TokenType.YEAR, 192 TokenType.UNIQUEIDENTIFIER, 193 TokenType.USERDEFINED, 194 TokenType.MONEY, 195 TokenType.SMALLMONEY, 196 TokenType.ROWVERSION, 197 TokenType.IMAGE, 198 TokenType.VARIANT, 199 TokenType.OBJECT, 200 TokenType.OBJECT_IDENTIFIER, 201 TokenType.INET, 202 TokenType.IPADDRESS, 203 TokenType.IPPREFIX, 204 TokenType.UNKNOWN, 205 TokenType.NULL, 206 *ENUM_TYPE_TOKENS, 207 *NESTED_TYPE_TOKENS, 208 } 209 210 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 211 TokenType.BIGINT: TokenType.UBIGINT, 212 TokenType.INT: TokenType.UINT, 213 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 214 TokenType.SMALLINT: TokenType.USMALLINT, 215 TokenType.TINYINT: TokenType.UTINYINT, 216 } 217 218 SUBQUERY_PREDICATES = { 219 TokenType.ANY: exp.Any, 220 TokenType.ALL: exp.All, 221 TokenType.EXISTS: exp.Exists, 222 TokenType.SOME: exp.Any, 223 } 224 225 RESERVED_KEYWORDS = { 226 *Tokenizer.SINGLE_TOKENS.values(), 227 TokenType.SELECT, 228 } 229 230 DB_CREATABLES = { 231 TokenType.DATABASE, 232 TokenType.SCHEMA, 233 TokenType.TABLE, 234 TokenType.VIEW, 235 TokenType.DICTIONARY, 236 } 237 238 CREATABLES = { 239 TokenType.COLUMN, 240 TokenType.FUNCTION, 241 TokenType.INDEX, 242 TokenType.PROCEDURE, 243 *DB_CREATABLES, 244 } 245 246 # Tokens that can represent identifiers 247 ID_VAR_TOKENS = { 248 TokenType.VAR, 249 TokenType.ANTI, 250 TokenType.APPLY, 251 TokenType.ASC, 252 TokenType.AUTO_INCREMENT, 253 TokenType.BEGIN, 254 TokenType.CACHE, 255 TokenType.CASE, 256 TokenType.COLLATE, 257 TokenType.COMMAND, 258 TokenType.COMMENT, 259 TokenType.COMMIT, 260 TokenType.CONSTRAINT, 261 TokenType.DEFAULT, 262 TokenType.DELETE, 263 TokenType.DESC, 264 TokenType.DESCRIBE, 265 TokenType.DICTIONARY, 266 TokenType.DIV, 267 TokenType.END, 268 TokenType.EXECUTE, 269 TokenType.ESCAPE, 270 TokenType.FALSE, 271 TokenType.FIRST, 272 TokenType.FILTER, 273 TokenType.FORMAT, 274 TokenType.FULL, 275 TokenType.IS, 276 TokenType.ISNULL, 277 TokenType.INTERVAL, 278 TokenType.KEEP, 279 TokenType.LEFT, 280 TokenType.LOAD, 281 TokenType.MERGE, 282 TokenType.NATURAL, 283 TokenType.NEXT, 284 TokenType.OFFSET, 285 TokenType.ORDINALITY, 286 TokenType.OVERWRITE, 287 TokenType.PARTITION, 288 TokenType.PERCENT, 289 TokenType.PIVOT, 290 TokenType.PRAGMA, 291 TokenType.RANGE, 292 TokenType.REFERENCES, 293 TokenType.RIGHT, 294 TokenType.ROW, 295 TokenType.ROWS, 296 TokenType.SEMI, 297 TokenType.SET, 298 TokenType.SETTINGS, 299 TokenType.SHOW, 300 TokenType.TEMPORARY, 301 TokenType.TOP, 302 TokenType.TRUE, 303 TokenType.UNIQUE, 304 TokenType.UNPIVOT, 305 TokenType.UPDATE, 306 TokenType.VOLATILE, 307 TokenType.WINDOW, 308 *CREATABLES, 309 *SUBQUERY_PREDICATES, 310 *TYPE_TOKENS, 311 *NO_PAREN_FUNCTIONS, 312 } 313 314 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 315 316 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 317 TokenType.APPLY, 318 TokenType.ASOF, 319 TokenType.FULL, 320 TokenType.LEFT, 321 TokenType.LOCK, 322 TokenType.NATURAL, 323 TokenType.OFFSET, 324 TokenType.RIGHT, 325 TokenType.WINDOW, 326 } 327 328 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 329 330 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 331 332 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 333 334 FUNC_TOKENS = { 335 TokenType.COMMAND, 336 TokenType.CURRENT_DATE, 337 TokenType.CURRENT_DATETIME, 338 TokenType.CURRENT_TIMESTAMP, 339 TokenType.CURRENT_TIME, 340 TokenType.CURRENT_USER, 341 TokenType.FILTER, 342 TokenType.FIRST, 343 TokenType.FORMAT, 344 TokenType.GLOB, 345 TokenType.IDENTIFIER, 346 TokenType.INDEX, 347 TokenType.ISNULL, 348 TokenType.ILIKE, 349 TokenType.INSERT, 350 TokenType.LIKE, 351 TokenType.MERGE, 352 TokenType.OFFSET, 353 TokenType.PRIMARY_KEY, 354 TokenType.RANGE, 355 TokenType.REPLACE, 356 TokenType.RLIKE, 357 TokenType.ROW, 358 TokenType.UNNEST, 359 TokenType.VAR, 360 TokenType.LEFT, 361 TokenType.RIGHT, 362 TokenType.DATE, 363 TokenType.DATETIME, 364 TokenType.TABLE, 365 TokenType.TIMESTAMP, 366 TokenType.TIMESTAMPTZ, 367 TokenType.WINDOW, 368 TokenType.XOR, 369 *TYPE_TOKENS, 370 *SUBQUERY_PREDICATES, 371 } 372 373 CONJUNCTION = { 374 TokenType.AND: exp.And, 375 TokenType.OR: exp.Or, 376 } 377 378 EQUALITY = { 379 TokenType.EQ: exp.EQ, 380 TokenType.NEQ: exp.NEQ, 381 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 382 } 383 384 COMPARISON = { 385 TokenType.GT: exp.GT, 386 TokenType.GTE: exp.GTE, 387 TokenType.LT: exp.LT, 388 TokenType.LTE: exp.LTE, 389 } 390 391 BITWISE = { 392 TokenType.AMP: exp.BitwiseAnd, 393 TokenType.CARET: exp.BitwiseXor, 394 TokenType.PIPE: exp.BitwiseOr, 395 TokenType.DPIPE: exp.DPipe, 396 } 397 398 TERM = { 399 TokenType.DASH: exp.Sub, 400 TokenType.PLUS: exp.Add, 401 TokenType.MOD: exp.Mod, 402 TokenType.COLLATE: exp.Collate, 403 } 404 405 FACTOR = { 406 TokenType.DIV: exp.IntDiv, 407 TokenType.LR_ARROW: exp.Distance, 408 TokenType.SLASH: exp.Div, 409 TokenType.STAR: exp.Mul, 410 } 411 412 TIMES = { 413 TokenType.TIME, 414 TokenType.TIMETZ, 415 } 416 417 TIMESTAMPS = { 418 TokenType.TIMESTAMP, 419 TokenType.TIMESTAMPTZ, 420 TokenType.TIMESTAMPLTZ, 421 *TIMES, 422 } 423 424 SET_OPERATIONS = { 425 TokenType.UNION, 426 TokenType.INTERSECT, 427 TokenType.EXCEPT, 428 } 429 430 JOIN_METHODS = { 431 TokenType.NATURAL, 432 TokenType.ASOF, 433 } 434 435 JOIN_SIDES = { 436 TokenType.LEFT, 437 TokenType.RIGHT, 438 TokenType.FULL, 439 } 440 441 JOIN_KINDS = { 442 TokenType.INNER, 443 TokenType.OUTER, 444 TokenType.CROSS, 445 TokenType.SEMI, 446 TokenType.ANTI, 447 } 448 449 JOIN_HINTS: t.Set[str] = set() 450 451 LAMBDAS = { 452 TokenType.ARROW: lambda self, expressions: self.expression( 453 exp.Lambda, 454 this=self._replace_lambda( 455 self._parse_conjunction(), 456 {node.name for node in expressions}, 457 ), 458 expressions=expressions, 459 ), 460 TokenType.FARROW: lambda self, expressions: self.expression( 461 exp.Kwarg, 462 this=exp.var(expressions[0].name), 463 expression=self._parse_conjunction(), 464 ), 465 } 466 467 COLUMN_OPERATORS = { 468 TokenType.DOT: None, 469 TokenType.DCOLON: lambda self, this, to: self.expression( 470 exp.Cast if self.STRICT_CAST else exp.TryCast, 471 this=this, 472 to=to, 473 ), 474 TokenType.ARROW: lambda self, this, path: self.expression( 475 exp.JSONExtract, 476 this=this, 477 expression=path, 478 ), 479 TokenType.DARROW: lambda self, this, path: self.expression( 480 exp.JSONExtractScalar, 481 this=this, 482 expression=path, 483 ), 484 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 485 exp.JSONBExtract, 486 this=this, 487 expression=path, 488 ), 489 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 490 exp.JSONBExtractScalar, 491 this=this, 492 expression=path, 493 ), 494 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 495 exp.JSONBContains, 496 this=this, 497 expression=key, 498 ), 499 } 500 501 EXPRESSION_PARSERS = { 502 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 503 exp.Column: lambda self: self._parse_column(), 504 exp.Condition: lambda self: self._parse_conjunction(), 505 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 506 exp.Expression: lambda self: self._parse_statement(), 507 exp.From: lambda self: self._parse_from(), 508 exp.Group: lambda self: self._parse_group(), 509 exp.Having: lambda self: self._parse_having(), 510 exp.Identifier: lambda self: self._parse_id_var(), 511 exp.Join: lambda self: self._parse_join(), 512 exp.Lambda: lambda self: self._parse_lambda(), 513 exp.Lateral: lambda self: self._parse_lateral(), 514 exp.Limit: lambda self: self._parse_limit(), 515 exp.Offset: lambda self: self._parse_offset(), 516 exp.Order: lambda self: self._parse_order(), 517 exp.Ordered: lambda self: self._parse_ordered(), 518 exp.Properties: lambda self: self._parse_properties(), 519 exp.Qualify: lambda self: self._parse_qualify(), 520 exp.Returning: lambda self: self._parse_returning(), 521 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 522 exp.Table: lambda self: self._parse_table_parts(), 523 exp.TableAlias: lambda self: self._parse_table_alias(), 524 exp.Where: lambda self: self._parse_where(), 525 exp.Window: lambda self: self._parse_named_window(), 526 exp.With: lambda self: self._parse_with(), 527 "JOIN_TYPE": lambda self: self._parse_join_parts(), 528 } 529 530 STATEMENT_PARSERS = { 531 TokenType.ALTER: lambda self: self._parse_alter(), 532 TokenType.BEGIN: lambda self: self._parse_transaction(), 533 TokenType.CACHE: lambda self: self._parse_cache(), 534 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 535 TokenType.COMMENT: lambda self: self._parse_comment(), 536 TokenType.CREATE: lambda self: self._parse_create(), 537 TokenType.DELETE: lambda self: self._parse_delete(), 538 TokenType.DESC: lambda self: self._parse_describe(), 539 TokenType.DESCRIBE: lambda self: self._parse_describe(), 540 TokenType.DROP: lambda self: self._parse_drop(), 541 TokenType.INSERT: lambda self: self._parse_insert(), 542 TokenType.LOAD: lambda self: self._parse_load(), 543 TokenType.MERGE: lambda self: self._parse_merge(), 544 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 545 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 546 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 547 TokenType.SET: lambda self: self._parse_set(), 548 TokenType.UNCACHE: lambda self: self._parse_uncache(), 549 TokenType.UPDATE: lambda self: self._parse_update(), 550 TokenType.USE: lambda self: self.expression( 551 exp.Use, 552 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 553 and exp.var(self._prev.text), 554 this=self._parse_table(schema=False), 555 ), 556 } 557 558 UNARY_PARSERS = { 559 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 560 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 561 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 562 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 563 } 564 565 PRIMARY_PARSERS = { 566 TokenType.STRING: lambda self, token: self.expression( 567 exp.Literal, this=token.text, is_string=True 568 ), 569 TokenType.NUMBER: lambda self, token: self.expression( 570 exp.Literal, this=token.text, is_string=False 571 ), 572 TokenType.STAR: lambda self, _: self.expression( 573 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 574 ), 575 TokenType.NULL: lambda self, _: self.expression(exp.Null), 576 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 577 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 578 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 579 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 580 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 581 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 582 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 583 exp.National, this=token.text 584 ), 585 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 586 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 587 } 588 589 PLACEHOLDER_PARSERS = { 590 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 591 TokenType.PARAMETER: lambda self: self._parse_parameter(), 592 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 593 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 594 else None, 595 } 596 597 RANGE_PARSERS = { 598 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 599 TokenType.GLOB: binary_range_parser(exp.Glob), 600 TokenType.ILIKE: binary_range_parser(exp.ILike), 601 TokenType.IN: lambda self, this: self._parse_in(this), 602 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 603 TokenType.IS: lambda self, this: self._parse_is(this), 604 TokenType.LIKE: binary_range_parser(exp.Like), 605 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 606 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 607 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 608 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 609 } 610 611 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 612 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 613 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 614 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 615 "CHARACTER SET": lambda self: self._parse_character_set(), 616 "CHECKSUM": lambda self: self._parse_checksum(), 617 "CLUSTER BY": lambda self: self._parse_cluster(), 618 "CLUSTERED": lambda self: self._parse_clustered_by(), 619 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 620 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 621 "COPY": lambda self: self._parse_copy_property(), 622 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 623 "DEFINER": lambda self: self._parse_definer(), 624 "DETERMINISTIC": lambda self: self.expression( 625 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 626 ), 627 "DISTKEY": lambda self: self._parse_distkey(), 628 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 629 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 630 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 631 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 632 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 633 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 634 "FREESPACE": lambda self: self._parse_freespace(), 635 "HEAP": lambda self: self.expression(exp.HeapProperty), 636 "IMMUTABLE": lambda self: self.expression( 637 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 638 ), 639 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 640 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 641 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 642 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 643 "LIKE": lambda self: self._parse_create_like(), 644 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 645 "LOCK": lambda self: self._parse_locking(), 646 "LOCKING": lambda self: self._parse_locking(), 647 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 648 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 649 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 650 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 651 "NO": lambda self: self._parse_no_property(), 652 "ON": lambda self: self._parse_on_property(), 653 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 654 "PARTITION BY": lambda self: self._parse_partitioned_by(), 655 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 656 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 657 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 658 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 659 "RETURNS": lambda self: self._parse_returns(), 660 "ROW": lambda self: self._parse_row(), 661 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 662 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 663 "SETTINGS": lambda self: self.expression( 664 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 665 ), 666 "SORTKEY": lambda self: self._parse_sortkey(), 667 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 668 "STABLE": lambda self: self.expression( 669 exp.StabilityProperty, this=exp.Literal.string("STABLE") 670 ), 671 "STORED": lambda self: self._parse_stored(), 672 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 673 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 674 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 675 "TO": lambda self: self._parse_to_table(), 676 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 677 "TTL": lambda self: self._parse_ttl(), 678 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 679 "VOLATILE": lambda self: self._parse_volatile_property(), 680 "WITH": lambda self: self._parse_with_property(), 681 } 682 683 CONSTRAINT_PARSERS = { 684 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 685 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 686 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 687 "CHARACTER SET": lambda self: self.expression( 688 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 689 ), 690 "CHECK": lambda self: self.expression( 691 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 692 ), 693 "COLLATE": lambda self: self.expression( 694 exp.CollateColumnConstraint, this=self._parse_var() 695 ), 696 "COMMENT": lambda self: self.expression( 697 exp.CommentColumnConstraint, this=self._parse_string() 698 ), 699 "COMPRESS": lambda self: self._parse_compress(), 700 "CLUSTERED": lambda self: self.expression( 701 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 702 ), 703 "NONCLUSTERED": lambda self: self.expression( 704 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 705 ), 706 "DEFAULT": lambda self: self.expression( 707 exp.DefaultColumnConstraint, this=self._parse_bitwise() 708 ), 709 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 710 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 711 "FORMAT": lambda self: self.expression( 712 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 713 ), 714 "GENERATED": lambda self: self._parse_generated_as_identity(), 715 "IDENTITY": lambda self: self._parse_auto_increment(), 716 "INLINE": lambda self: self._parse_inline(), 717 "LIKE": lambda self: self._parse_create_like(), 718 "NOT": lambda self: self._parse_not_constraint(), 719 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 720 "ON": lambda self: ( 721 self._match(TokenType.UPDATE) 722 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 723 ) 724 or self.expression(exp.OnProperty, this=self._parse_id_var()), 725 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 726 "PRIMARY KEY": lambda self: self._parse_primary_key(), 727 "REFERENCES": lambda self: self._parse_references(match=False), 728 "TITLE": lambda self: self.expression( 729 exp.TitleColumnConstraint, this=self._parse_var_or_string() 730 ), 731 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 732 "UNIQUE": lambda self: self._parse_unique(), 733 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 734 "WITH": lambda self: self.expression( 735 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 736 ), 737 } 738 739 ALTER_PARSERS = { 740 "ADD": lambda self: self._parse_alter_table_add(), 741 "ALTER": lambda self: self._parse_alter_table_alter(), 742 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 743 "DROP": lambda self: self._parse_alter_table_drop(), 744 "RENAME": lambda self: self._parse_alter_table_rename(), 745 } 746 747 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 748 749 NO_PAREN_FUNCTION_PARSERS = { 750 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 751 "CASE": lambda self: self._parse_case(), 752 "IF": lambda self: self._parse_if(), 753 "NEXT": lambda self: self._parse_next_value_for(), 754 } 755 756 INVALID_FUNC_NAME_TOKENS = { 757 TokenType.IDENTIFIER, 758 TokenType.STRING, 759 } 760 761 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 762 763 FUNCTION_PARSERS = { 764 "ANY_VALUE": lambda self: self._parse_any_value(), 765 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 766 "CONCAT": lambda self: self._parse_concat(), 767 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 768 "DECODE": lambda self: self._parse_decode(), 769 "EXTRACT": lambda self: self._parse_extract(), 770 "JSON_OBJECT": lambda self: self._parse_json_object(), 771 "LOG": lambda self: self._parse_logarithm(), 772 "MATCH": lambda self: self._parse_match_against(), 773 "OPENJSON": lambda self: self._parse_open_json(), 774 "POSITION": lambda self: self._parse_position(), 775 "SAFE_CAST": lambda self: self._parse_cast(False), 776 "STRING_AGG": lambda self: self._parse_string_agg(), 777 "SUBSTRING": lambda self: self._parse_substring(), 778 "TRIM": lambda self: self._parse_trim(), 779 "TRY_CAST": lambda self: self._parse_cast(False), 780 "TRY_CONVERT": lambda self: self._parse_convert(False), 781 } 782 783 QUERY_MODIFIER_PARSERS = { 784 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 785 TokenType.WHERE: lambda self: ("where", self._parse_where()), 786 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 787 TokenType.HAVING: lambda self: ("having", self._parse_having()), 788 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 789 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 790 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 791 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 792 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 793 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 794 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 795 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 796 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 797 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 798 TokenType.CLUSTER_BY: lambda self: ( 799 "cluster", 800 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 801 ), 802 TokenType.DISTRIBUTE_BY: lambda self: ( 803 "distribute", 804 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 805 ), 806 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 807 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 808 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 809 } 810 811 SET_PARSERS = { 812 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 813 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 814 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 815 "TRANSACTION": lambda self: self._parse_set_transaction(), 816 } 817 818 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 819 820 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 821 822 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 823 824 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 825 826 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 827 828 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 829 TRANSACTION_CHARACTERISTICS = { 830 "ISOLATION LEVEL REPEATABLE READ", 831 "ISOLATION LEVEL READ COMMITTED", 832 "ISOLATION LEVEL READ UNCOMMITTED", 833 "ISOLATION LEVEL SERIALIZABLE", 834 "READ WRITE", 835 "READ ONLY", 836 } 837 838 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 839 840 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 841 842 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 843 844 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 845 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 846 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 847 848 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 849 850 DISTINCT_TOKENS = {TokenType.DISTINCT} 851 852 STRICT_CAST = True 853 854 # A NULL arg in CONCAT yields NULL by default 855 CONCAT_NULL_OUTPUTS_STRING = False 856 857 PREFIXED_PIVOT_COLUMNS = False 858 IDENTIFY_PIVOT_STRINGS = False 859 860 LOG_BASE_FIRST = True 861 LOG_DEFAULTS_TO_LN = False 862 863 SUPPORTS_USER_DEFINED_TYPES = True 864 865 # Whether or not ADD is present for each column added by ALTER TABLE 866 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 867 868 # Whether or not the table sample clause expects CSV syntax 869 TABLESAMPLE_CSV = False 870 871 __slots__ = ( 872 "error_level", 873 "error_message_context", 874 "max_errors", 875 "sql", 876 "errors", 877 "_tokens", 878 "_index", 879 "_curr", 880 "_next", 881 "_prev", 882 "_prev_comments", 883 "_tokenizer", 884 ) 885 886 # Autofilled 887 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 888 INDEX_OFFSET: int = 0 889 UNNEST_COLUMN_ONLY: bool = False 890 ALIAS_POST_TABLESAMPLE: bool = False 891 STRICT_STRING_CONCAT = False 892 NORMALIZE_FUNCTIONS = "upper" 893 NULL_ORDERING: str = "nulls_are_small" 894 SHOW_TRIE: t.Dict = {} 895 SET_TRIE: t.Dict = {} 896 FORMAT_MAPPING: t.Dict[str, str] = {} 897 FORMAT_TRIE: t.Dict = {} 898 TIME_MAPPING: t.Dict[str, str] = {} 899 TIME_TRIE: t.Dict = {} 900 901 def __init__( 902 self, 903 error_level: t.Optional[ErrorLevel] = None, 904 error_message_context: int = 100, 905 max_errors: int = 3, 906 ): 907 self.error_level = error_level or ErrorLevel.IMMEDIATE 908 self.error_message_context = error_message_context 909 self.max_errors = max_errors 910 self._tokenizer = self.TOKENIZER_CLASS() 911 self.reset() 912 913 def reset(self): 914 self.sql = "" 915 self.errors = [] 916 self._tokens = [] 917 self._index = 0 918 self._curr = None 919 self._next = None 920 self._prev = None 921 self._prev_comments = None 922 923 def parse( 924 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 925 ) -> t.List[t.Optional[exp.Expression]]: 926 """ 927 Parses a list of tokens and returns a list of syntax trees, one tree 928 per parsed SQL statement. 929 930 Args: 931 raw_tokens: The list of tokens. 932 sql: The original SQL string, used to produce helpful debug messages. 933 934 Returns: 935 The list of the produced syntax trees. 936 """ 937 return self._parse( 938 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 939 ) 940 941 def parse_into( 942 self, 943 expression_types: exp.IntoType, 944 raw_tokens: t.List[Token], 945 sql: t.Optional[str] = None, 946 ) -> t.List[t.Optional[exp.Expression]]: 947 """ 948 Parses a list of tokens into a given Expression type. If a collection of Expression 949 types is given instead, this method will try to parse the token list into each one 950 of them, stopping at the first for which the parsing succeeds. 951 952 Args: 953 expression_types: The expression type(s) to try and parse the token list into. 954 raw_tokens: The list of tokens. 955 sql: The original SQL string, used to produce helpful debug messages. 956 957 Returns: 958 The target Expression. 959 """ 960 errors = [] 961 for expression_type in ensure_list(expression_types): 962 parser = self.EXPRESSION_PARSERS.get(expression_type) 963 if not parser: 964 raise TypeError(f"No parser registered for {expression_type}") 965 966 try: 967 return self._parse(parser, raw_tokens, sql) 968 except ParseError as e: 969 e.errors[0]["into_expression"] = expression_type 970 errors.append(e) 971 972 raise ParseError( 973 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 974 errors=merge_errors(errors), 975 ) from errors[-1] 976 977 def _parse( 978 self, 979 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 980 raw_tokens: t.List[Token], 981 sql: t.Optional[str] = None, 982 ) -> t.List[t.Optional[exp.Expression]]: 983 self.reset() 984 self.sql = sql or "" 985 986 total = len(raw_tokens) 987 chunks: t.List[t.List[Token]] = [[]] 988 989 for i, token in enumerate(raw_tokens): 990 if token.token_type == TokenType.SEMICOLON: 991 if i < total - 1: 992 chunks.append([]) 993 else: 994 chunks[-1].append(token) 995 996 expressions = [] 997 998 for tokens in chunks: 999 self._index = -1 1000 self._tokens = tokens 1001 self._advance() 1002 1003 expressions.append(parse_method(self)) 1004 1005 if self._index < len(self._tokens): 1006 self.raise_error("Invalid expression / Unexpected token") 1007 1008 self.check_errors() 1009 1010 return expressions 1011 1012 def check_errors(self) -> None: 1013 """Logs or raises any found errors, depending on the chosen error level setting.""" 1014 if self.error_level == ErrorLevel.WARN: 1015 for error in self.errors: 1016 logger.error(str(error)) 1017 elif self.error_level == ErrorLevel.RAISE and self.errors: 1018 raise ParseError( 1019 concat_messages(self.errors, self.max_errors), 1020 errors=merge_errors(self.errors), 1021 ) 1022 1023 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1024 """ 1025 Appends an error in the list of recorded errors or raises it, depending on the chosen 1026 error level setting. 1027 """ 1028 token = token or self._curr or self._prev or Token.string("") 1029 start = token.start 1030 end = token.end + 1 1031 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1032 highlight = self.sql[start:end] 1033 end_context = self.sql[end : end + self.error_message_context] 1034 1035 error = ParseError.new( 1036 f"{message}. Line {token.line}, Col: {token.col}.\n" 1037 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1038 description=message, 1039 line=token.line, 1040 col=token.col, 1041 start_context=start_context, 1042 highlight=highlight, 1043 end_context=end_context, 1044 ) 1045 1046 if self.error_level == ErrorLevel.IMMEDIATE: 1047 raise error 1048 1049 self.errors.append(error) 1050 1051 def expression( 1052 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1053 ) -> E: 1054 """ 1055 Creates a new, validated Expression. 1056 1057 Args: 1058 exp_class: The expression class to instantiate. 1059 comments: An optional list of comments to attach to the expression. 1060 kwargs: The arguments to set for the expression along with their respective values. 1061 1062 Returns: 1063 The target expression. 1064 """ 1065 instance = exp_class(**kwargs) 1066 instance.add_comments(comments) if comments else self._add_comments(instance) 1067 return self.validate_expression(instance) 1068 1069 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1070 if expression and self._prev_comments: 1071 expression.add_comments(self._prev_comments) 1072 self._prev_comments = None 1073 1074 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1075 """ 1076 Validates an Expression, making sure that all its mandatory arguments are set. 1077 1078 Args: 1079 expression: The expression to validate. 1080 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1081 1082 Returns: 1083 The validated expression. 1084 """ 1085 if self.error_level != ErrorLevel.IGNORE: 1086 for error_message in expression.error_messages(args): 1087 self.raise_error(error_message) 1088 1089 return expression 1090 1091 def _find_sql(self, start: Token, end: Token) -> str: 1092 return self.sql[start.start : end.end + 1] 1093 1094 def _advance(self, times: int = 1) -> None: 1095 self._index += times 1096 self._curr = seq_get(self._tokens, self._index) 1097 self._next = seq_get(self._tokens, self._index + 1) 1098 1099 if self._index > 0: 1100 self._prev = self._tokens[self._index - 1] 1101 self._prev_comments = self._prev.comments 1102 else: 1103 self._prev = None 1104 self._prev_comments = None 1105 1106 def _retreat(self, index: int) -> None: 1107 if index != self._index: 1108 self._advance(index - self._index) 1109 1110 def _parse_command(self) -> exp.Command: 1111 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1112 1113 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1114 start = self._prev 1115 exists = self._parse_exists() if allow_exists else None 1116 1117 self._match(TokenType.ON) 1118 1119 kind = self._match_set(self.CREATABLES) and self._prev 1120 if not kind: 1121 return self._parse_as_command(start) 1122 1123 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1124 this = self._parse_user_defined_function(kind=kind.token_type) 1125 elif kind.token_type == TokenType.TABLE: 1126 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1127 elif kind.token_type == TokenType.COLUMN: 1128 this = self._parse_column() 1129 else: 1130 this = self._parse_id_var() 1131 1132 self._match(TokenType.IS) 1133 1134 return self.expression( 1135 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1136 ) 1137 1138 def _parse_to_table( 1139 self, 1140 ) -> exp.ToTableProperty: 1141 table = self._parse_table_parts(schema=True) 1142 return self.expression(exp.ToTableProperty, this=table) 1143 1144 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1145 def _parse_ttl(self) -> exp.Expression: 1146 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1147 this = self._parse_bitwise() 1148 1149 if self._match_text_seq("DELETE"): 1150 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1151 if self._match_text_seq("RECOMPRESS"): 1152 return self.expression( 1153 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1154 ) 1155 if self._match_text_seq("TO", "DISK"): 1156 return self.expression( 1157 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1158 ) 1159 if self._match_text_seq("TO", "VOLUME"): 1160 return self.expression( 1161 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1162 ) 1163 1164 return this 1165 1166 expressions = self._parse_csv(_parse_ttl_action) 1167 where = self._parse_where() 1168 group = self._parse_group() 1169 1170 aggregates = None 1171 if group and self._match(TokenType.SET): 1172 aggregates = self._parse_csv(self._parse_set_item) 1173 1174 return self.expression( 1175 exp.MergeTreeTTL, 1176 expressions=expressions, 1177 where=where, 1178 group=group, 1179 aggregates=aggregates, 1180 ) 1181 1182 def _parse_statement(self) -> t.Optional[exp.Expression]: 1183 if self._curr is None: 1184 return None 1185 1186 if self._match_set(self.STATEMENT_PARSERS): 1187 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1188 1189 if self._match_set(Tokenizer.COMMANDS): 1190 return self._parse_command() 1191 1192 expression = self._parse_expression() 1193 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1194 return self._parse_query_modifiers(expression) 1195 1196 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1197 start = self._prev 1198 temporary = self._match(TokenType.TEMPORARY) 1199 materialized = self._match_text_seq("MATERIALIZED") 1200 1201 kind = self._match_set(self.CREATABLES) and self._prev.text 1202 if not kind: 1203 return self._parse_as_command(start) 1204 1205 return self.expression( 1206 exp.Drop, 1207 comments=start.comments, 1208 exists=exists or self._parse_exists(), 1209 this=self._parse_table(schema=True), 1210 kind=kind, 1211 temporary=temporary, 1212 materialized=materialized, 1213 cascade=self._match_text_seq("CASCADE"), 1214 constraints=self._match_text_seq("CONSTRAINTS"), 1215 purge=self._match_text_seq("PURGE"), 1216 ) 1217 1218 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1219 return ( 1220 self._match_text_seq("IF") 1221 and (not not_ or self._match(TokenType.NOT)) 1222 and self._match(TokenType.EXISTS) 1223 ) 1224 1225 def _parse_create(self) -> exp.Create | exp.Command: 1226 # Note: this can't be None because we've matched a statement parser 1227 start = self._prev 1228 comments = self._prev_comments 1229 1230 replace = start.text.upper() == "REPLACE" or self._match_pair( 1231 TokenType.OR, TokenType.REPLACE 1232 ) 1233 unique = self._match(TokenType.UNIQUE) 1234 1235 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1236 self._advance() 1237 1238 properties = None 1239 create_token = self._match_set(self.CREATABLES) and self._prev 1240 1241 if not create_token: 1242 # exp.Properties.Location.POST_CREATE 1243 properties = self._parse_properties() 1244 create_token = self._match_set(self.CREATABLES) and self._prev 1245 1246 if not properties or not create_token: 1247 return self._parse_as_command(start) 1248 1249 exists = self._parse_exists(not_=True) 1250 this = None 1251 expression: t.Optional[exp.Expression] = None 1252 indexes = None 1253 no_schema_binding = None 1254 begin = None 1255 clone = None 1256 1257 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1258 nonlocal properties 1259 if properties and temp_props: 1260 properties.expressions.extend(temp_props.expressions) 1261 elif temp_props: 1262 properties = temp_props 1263 1264 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1265 this = self._parse_user_defined_function(kind=create_token.token_type) 1266 1267 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1268 extend_props(self._parse_properties()) 1269 1270 self._match(TokenType.ALIAS) 1271 1272 if self._match(TokenType.COMMAND): 1273 expression = self._parse_as_command(self._prev) 1274 else: 1275 begin = self._match(TokenType.BEGIN) 1276 return_ = self._match_text_seq("RETURN") 1277 expression = self._parse_statement() 1278 1279 if return_: 1280 expression = self.expression(exp.Return, this=expression) 1281 elif create_token.token_type == TokenType.INDEX: 1282 this = self._parse_index(index=self._parse_id_var()) 1283 elif create_token.token_type in self.DB_CREATABLES: 1284 table_parts = self._parse_table_parts(schema=True) 1285 1286 # exp.Properties.Location.POST_NAME 1287 self._match(TokenType.COMMA) 1288 extend_props(self._parse_properties(before=True)) 1289 1290 this = self._parse_schema(this=table_parts) 1291 1292 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1293 extend_props(self._parse_properties()) 1294 1295 self._match(TokenType.ALIAS) 1296 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1297 # exp.Properties.Location.POST_ALIAS 1298 extend_props(self._parse_properties()) 1299 1300 expression = self._parse_ddl_select() 1301 1302 if create_token.token_type == TokenType.TABLE: 1303 # exp.Properties.Location.POST_EXPRESSION 1304 extend_props(self._parse_properties()) 1305 1306 indexes = [] 1307 while True: 1308 index = self._parse_index() 1309 1310 # exp.Properties.Location.POST_INDEX 1311 extend_props(self._parse_properties()) 1312 1313 if not index: 1314 break 1315 else: 1316 self._match(TokenType.COMMA) 1317 indexes.append(index) 1318 elif create_token.token_type == TokenType.VIEW: 1319 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1320 no_schema_binding = True 1321 1322 shallow = self._match_text_seq("SHALLOW") 1323 1324 if self._match_text_seq("CLONE"): 1325 clone = self._parse_table(schema=True) 1326 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1327 clone_kind = ( 1328 self._match(TokenType.L_PAREN) 1329 and self._match_texts(self.CLONE_KINDS) 1330 and self._prev.text.upper() 1331 ) 1332 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1333 self._match(TokenType.R_PAREN) 1334 clone = self.expression( 1335 exp.Clone, 1336 this=clone, 1337 when=when, 1338 kind=clone_kind, 1339 shallow=shallow, 1340 expression=clone_expression, 1341 ) 1342 1343 return self.expression( 1344 exp.Create, 1345 comments=comments, 1346 this=this, 1347 kind=create_token.text, 1348 replace=replace, 1349 unique=unique, 1350 expression=expression, 1351 exists=exists, 1352 properties=properties, 1353 indexes=indexes, 1354 no_schema_binding=no_schema_binding, 1355 begin=begin, 1356 clone=clone, 1357 ) 1358 1359 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1360 # only used for teradata currently 1361 self._match(TokenType.COMMA) 1362 1363 kwargs = { 1364 "no": self._match_text_seq("NO"), 1365 "dual": self._match_text_seq("DUAL"), 1366 "before": self._match_text_seq("BEFORE"), 1367 "default": self._match_text_seq("DEFAULT"), 1368 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1369 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1370 "after": self._match_text_seq("AFTER"), 1371 "minimum": self._match_texts(("MIN", "MINIMUM")), 1372 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1373 } 1374 1375 if self._match_texts(self.PROPERTY_PARSERS): 1376 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1377 try: 1378 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1379 except TypeError: 1380 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1381 1382 return None 1383 1384 def _parse_property(self) -> t.Optional[exp.Expression]: 1385 if self._match_texts(self.PROPERTY_PARSERS): 1386 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1387 1388 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1389 return self._parse_character_set(default=True) 1390 1391 if self._match_text_seq("COMPOUND", "SORTKEY"): 1392 return self._parse_sortkey(compound=True) 1393 1394 if self._match_text_seq("SQL", "SECURITY"): 1395 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1396 1397 assignment = self._match_pair( 1398 TokenType.VAR, TokenType.EQ, advance=False 1399 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1400 1401 if assignment: 1402 key = self._parse_var_or_string() 1403 self._match(TokenType.EQ) 1404 return self.expression( 1405 exp.Property, 1406 this=key, 1407 value=self._parse_column() or self._parse_var(any_token=True), 1408 ) 1409 1410 return None 1411 1412 def _parse_stored(self) -> exp.FileFormatProperty: 1413 self._match(TokenType.ALIAS) 1414 1415 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1416 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1417 1418 return self.expression( 1419 exp.FileFormatProperty, 1420 this=self.expression( 1421 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1422 ) 1423 if input_format or output_format 1424 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1425 ) 1426 1427 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1428 self._match(TokenType.EQ) 1429 self._match(TokenType.ALIAS) 1430 return self.expression(exp_class, this=self._parse_field()) 1431 1432 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1433 properties = [] 1434 while True: 1435 if before: 1436 prop = self._parse_property_before() 1437 else: 1438 prop = self._parse_property() 1439 1440 if not prop: 1441 break 1442 for p in ensure_list(prop): 1443 properties.append(p) 1444 1445 if properties: 1446 return self.expression(exp.Properties, expressions=properties) 1447 1448 return None 1449 1450 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1451 return self.expression( 1452 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1453 ) 1454 1455 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1456 if self._index >= 2: 1457 pre_volatile_token = self._tokens[self._index - 2] 1458 else: 1459 pre_volatile_token = None 1460 1461 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1462 return exp.VolatileProperty() 1463 1464 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1465 1466 def _parse_with_property( 1467 self, 1468 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1469 if self._match(TokenType.L_PAREN, advance=False): 1470 return self._parse_wrapped_csv(self._parse_property) 1471 1472 if self._match_text_seq("JOURNAL"): 1473 return self._parse_withjournaltable() 1474 1475 if self._match_text_seq("DATA"): 1476 return self._parse_withdata(no=False) 1477 elif self._match_text_seq("NO", "DATA"): 1478 return self._parse_withdata(no=True) 1479 1480 if not self._next: 1481 return None 1482 1483 return self._parse_withisolatedloading() 1484 1485 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1486 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1487 self._match(TokenType.EQ) 1488 1489 user = self._parse_id_var() 1490 self._match(TokenType.PARAMETER) 1491 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1492 1493 if not user or not host: 1494 return None 1495 1496 return exp.DefinerProperty(this=f"{user}@{host}") 1497 1498 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1499 self._match(TokenType.TABLE) 1500 self._match(TokenType.EQ) 1501 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1502 1503 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1504 return self.expression(exp.LogProperty, no=no) 1505 1506 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1507 return self.expression(exp.JournalProperty, **kwargs) 1508 1509 def _parse_checksum(self) -> exp.ChecksumProperty: 1510 self._match(TokenType.EQ) 1511 1512 on = None 1513 if self._match(TokenType.ON): 1514 on = True 1515 elif self._match_text_seq("OFF"): 1516 on = False 1517 1518 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1519 1520 def _parse_cluster(self) -> exp.Cluster: 1521 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1522 1523 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1524 self._match_text_seq("BY") 1525 1526 self._match_l_paren() 1527 expressions = self._parse_csv(self._parse_column) 1528 self._match_r_paren() 1529 1530 if self._match_text_seq("SORTED", "BY"): 1531 self._match_l_paren() 1532 sorted_by = self._parse_csv(self._parse_ordered) 1533 self._match_r_paren() 1534 else: 1535 sorted_by = None 1536 1537 self._match(TokenType.INTO) 1538 buckets = self._parse_number() 1539 self._match_text_seq("BUCKETS") 1540 1541 return self.expression( 1542 exp.ClusteredByProperty, 1543 expressions=expressions, 1544 sorted_by=sorted_by, 1545 buckets=buckets, 1546 ) 1547 1548 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1549 if not self._match_text_seq("GRANTS"): 1550 self._retreat(self._index - 1) 1551 return None 1552 1553 return self.expression(exp.CopyGrantsProperty) 1554 1555 def _parse_freespace(self) -> exp.FreespaceProperty: 1556 self._match(TokenType.EQ) 1557 return self.expression( 1558 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1559 ) 1560 1561 def _parse_mergeblockratio( 1562 self, no: bool = False, default: bool = False 1563 ) -> exp.MergeBlockRatioProperty: 1564 if self._match(TokenType.EQ): 1565 return self.expression( 1566 exp.MergeBlockRatioProperty, 1567 this=self._parse_number(), 1568 percent=self._match(TokenType.PERCENT), 1569 ) 1570 1571 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1572 1573 def _parse_datablocksize( 1574 self, 1575 default: t.Optional[bool] = None, 1576 minimum: t.Optional[bool] = None, 1577 maximum: t.Optional[bool] = None, 1578 ) -> exp.DataBlocksizeProperty: 1579 self._match(TokenType.EQ) 1580 size = self._parse_number() 1581 1582 units = None 1583 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1584 units = self._prev.text 1585 1586 return self.expression( 1587 exp.DataBlocksizeProperty, 1588 size=size, 1589 units=units, 1590 default=default, 1591 minimum=minimum, 1592 maximum=maximum, 1593 ) 1594 1595 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1596 self._match(TokenType.EQ) 1597 always = self._match_text_seq("ALWAYS") 1598 manual = self._match_text_seq("MANUAL") 1599 never = self._match_text_seq("NEVER") 1600 default = self._match_text_seq("DEFAULT") 1601 1602 autotemp = None 1603 if self._match_text_seq("AUTOTEMP"): 1604 autotemp = self._parse_schema() 1605 1606 return self.expression( 1607 exp.BlockCompressionProperty, 1608 always=always, 1609 manual=manual, 1610 never=never, 1611 default=default, 1612 autotemp=autotemp, 1613 ) 1614 1615 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1616 no = self._match_text_seq("NO") 1617 concurrent = self._match_text_seq("CONCURRENT") 1618 self._match_text_seq("ISOLATED", "LOADING") 1619 for_all = self._match_text_seq("FOR", "ALL") 1620 for_insert = self._match_text_seq("FOR", "INSERT") 1621 for_none = self._match_text_seq("FOR", "NONE") 1622 return self.expression( 1623 exp.IsolatedLoadingProperty, 1624 no=no, 1625 concurrent=concurrent, 1626 for_all=for_all, 1627 for_insert=for_insert, 1628 for_none=for_none, 1629 ) 1630 1631 def _parse_locking(self) -> exp.LockingProperty: 1632 if self._match(TokenType.TABLE): 1633 kind = "TABLE" 1634 elif self._match(TokenType.VIEW): 1635 kind = "VIEW" 1636 elif self._match(TokenType.ROW): 1637 kind = "ROW" 1638 elif self._match_text_seq("DATABASE"): 1639 kind = "DATABASE" 1640 else: 1641 kind = None 1642 1643 if kind in ("DATABASE", "TABLE", "VIEW"): 1644 this = self._parse_table_parts() 1645 else: 1646 this = None 1647 1648 if self._match(TokenType.FOR): 1649 for_or_in = "FOR" 1650 elif self._match(TokenType.IN): 1651 for_or_in = "IN" 1652 else: 1653 for_or_in = None 1654 1655 if self._match_text_seq("ACCESS"): 1656 lock_type = "ACCESS" 1657 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1658 lock_type = "EXCLUSIVE" 1659 elif self._match_text_seq("SHARE"): 1660 lock_type = "SHARE" 1661 elif self._match_text_seq("READ"): 1662 lock_type = "READ" 1663 elif self._match_text_seq("WRITE"): 1664 lock_type = "WRITE" 1665 elif self._match_text_seq("CHECKSUM"): 1666 lock_type = "CHECKSUM" 1667 else: 1668 lock_type = None 1669 1670 override = self._match_text_seq("OVERRIDE") 1671 1672 return self.expression( 1673 exp.LockingProperty, 1674 this=this, 1675 kind=kind, 1676 for_or_in=for_or_in, 1677 lock_type=lock_type, 1678 override=override, 1679 ) 1680 1681 def _parse_partition_by(self) -> t.List[exp.Expression]: 1682 if self._match(TokenType.PARTITION_BY): 1683 return self._parse_csv(self._parse_conjunction) 1684 return [] 1685 1686 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1687 self._match(TokenType.EQ) 1688 return self.expression( 1689 exp.PartitionedByProperty, 1690 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1691 ) 1692 1693 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1694 if self._match_text_seq("AND", "STATISTICS"): 1695 statistics = True 1696 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1697 statistics = False 1698 else: 1699 statistics = None 1700 1701 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1702 1703 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1704 if self._match_text_seq("PRIMARY", "INDEX"): 1705 return exp.NoPrimaryIndexProperty() 1706 return None 1707 1708 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1709 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1710 return exp.OnCommitProperty() 1711 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1712 return exp.OnCommitProperty(delete=True) 1713 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1714 1715 def _parse_distkey(self) -> exp.DistKeyProperty: 1716 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1717 1718 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1719 table = self._parse_table(schema=True) 1720 1721 options = [] 1722 while self._match_texts(("INCLUDING", "EXCLUDING")): 1723 this = self._prev.text.upper() 1724 1725 id_var = self._parse_id_var() 1726 if not id_var: 1727 return None 1728 1729 options.append( 1730 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1731 ) 1732 1733 return self.expression(exp.LikeProperty, this=table, expressions=options) 1734 1735 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1736 return self.expression( 1737 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1738 ) 1739 1740 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1741 self._match(TokenType.EQ) 1742 return self.expression( 1743 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1744 ) 1745 1746 def _parse_returns(self) -> exp.ReturnsProperty: 1747 value: t.Optional[exp.Expression] 1748 is_table = self._match(TokenType.TABLE) 1749 1750 if is_table: 1751 if self._match(TokenType.LT): 1752 value = self.expression( 1753 exp.Schema, 1754 this="TABLE", 1755 expressions=self._parse_csv(self._parse_struct_types), 1756 ) 1757 if not self._match(TokenType.GT): 1758 self.raise_error("Expecting >") 1759 else: 1760 value = self._parse_schema(exp.var("TABLE")) 1761 else: 1762 value = self._parse_types() 1763 1764 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1765 1766 def _parse_describe(self) -> exp.Describe: 1767 kind = self._match_set(self.CREATABLES) and self._prev.text 1768 this = self._parse_table(schema=True) 1769 properties = self._parse_properties() 1770 expressions = properties.expressions if properties else None 1771 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1772 1773 def _parse_insert(self) -> exp.Insert: 1774 comments = ensure_list(self._prev_comments) 1775 overwrite = self._match(TokenType.OVERWRITE) 1776 ignore = self._match(TokenType.IGNORE) 1777 local = self._match_text_seq("LOCAL") 1778 alternative = None 1779 1780 if self._match_text_seq("DIRECTORY"): 1781 this: t.Optional[exp.Expression] = self.expression( 1782 exp.Directory, 1783 this=self._parse_var_or_string(), 1784 local=local, 1785 row_format=self._parse_row_format(match_row=True), 1786 ) 1787 else: 1788 if self._match(TokenType.OR): 1789 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1790 1791 self._match(TokenType.INTO) 1792 comments += ensure_list(self._prev_comments) 1793 self._match(TokenType.TABLE) 1794 this = self._parse_table(schema=True) 1795 1796 returning = self._parse_returning() 1797 1798 return self.expression( 1799 exp.Insert, 1800 comments=comments, 1801 this=this, 1802 by_name=self._match_text_seq("BY", "NAME"), 1803 exists=self._parse_exists(), 1804 partition=self._parse_partition(), 1805 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1806 and self._parse_conjunction(), 1807 expression=self._parse_ddl_select(), 1808 conflict=self._parse_on_conflict(), 1809 returning=returning or self._parse_returning(), 1810 overwrite=overwrite, 1811 alternative=alternative, 1812 ignore=ignore, 1813 ) 1814 1815 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1816 conflict = self._match_text_seq("ON", "CONFLICT") 1817 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1818 1819 if not conflict and not duplicate: 1820 return None 1821 1822 nothing = None 1823 expressions = None 1824 key = None 1825 constraint = None 1826 1827 if conflict: 1828 if self._match_text_seq("ON", "CONSTRAINT"): 1829 constraint = self._parse_id_var() 1830 else: 1831 key = self._parse_csv(self._parse_value) 1832 1833 self._match_text_seq("DO") 1834 if self._match_text_seq("NOTHING"): 1835 nothing = True 1836 else: 1837 self._match(TokenType.UPDATE) 1838 self._match(TokenType.SET) 1839 expressions = self._parse_csv(self._parse_equality) 1840 1841 return self.expression( 1842 exp.OnConflict, 1843 duplicate=duplicate, 1844 expressions=expressions, 1845 nothing=nothing, 1846 key=key, 1847 constraint=constraint, 1848 ) 1849 1850 def _parse_returning(self) -> t.Optional[exp.Returning]: 1851 if not self._match(TokenType.RETURNING): 1852 return None 1853 return self.expression( 1854 exp.Returning, 1855 expressions=self._parse_csv(self._parse_expression), 1856 into=self._match(TokenType.INTO) and self._parse_table_part(), 1857 ) 1858 1859 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1860 if not self._match(TokenType.FORMAT): 1861 return None 1862 return self._parse_row_format() 1863 1864 def _parse_row_format( 1865 self, match_row: bool = False 1866 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1867 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1868 return None 1869 1870 if self._match_text_seq("SERDE"): 1871 this = self._parse_string() 1872 1873 serde_properties = None 1874 if self._match(TokenType.SERDE_PROPERTIES): 1875 serde_properties = self.expression( 1876 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1877 ) 1878 1879 return self.expression( 1880 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1881 ) 1882 1883 self._match_text_seq("DELIMITED") 1884 1885 kwargs = {} 1886 1887 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1888 kwargs["fields"] = self._parse_string() 1889 if self._match_text_seq("ESCAPED", "BY"): 1890 kwargs["escaped"] = self._parse_string() 1891 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1892 kwargs["collection_items"] = self._parse_string() 1893 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1894 kwargs["map_keys"] = self._parse_string() 1895 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1896 kwargs["lines"] = self._parse_string() 1897 if self._match_text_seq("NULL", "DEFINED", "AS"): 1898 kwargs["null"] = self._parse_string() 1899 1900 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1901 1902 def _parse_load(self) -> exp.LoadData | exp.Command: 1903 if self._match_text_seq("DATA"): 1904 local = self._match_text_seq("LOCAL") 1905 self._match_text_seq("INPATH") 1906 inpath = self._parse_string() 1907 overwrite = self._match(TokenType.OVERWRITE) 1908 self._match_pair(TokenType.INTO, TokenType.TABLE) 1909 1910 return self.expression( 1911 exp.LoadData, 1912 this=self._parse_table(schema=True), 1913 local=local, 1914 overwrite=overwrite, 1915 inpath=inpath, 1916 partition=self._parse_partition(), 1917 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1918 serde=self._match_text_seq("SERDE") and self._parse_string(), 1919 ) 1920 return self._parse_as_command(self._prev) 1921 1922 def _parse_delete(self) -> exp.Delete: 1923 # This handles MySQL's "Multiple-Table Syntax" 1924 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1925 tables = None 1926 comments = self._prev_comments 1927 if not self._match(TokenType.FROM, advance=False): 1928 tables = self._parse_csv(self._parse_table) or None 1929 1930 returning = self._parse_returning() 1931 1932 return self.expression( 1933 exp.Delete, 1934 comments=comments, 1935 tables=tables, 1936 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1937 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1938 where=self._parse_where(), 1939 returning=returning or self._parse_returning(), 1940 limit=self._parse_limit(), 1941 ) 1942 1943 def _parse_update(self) -> exp.Update: 1944 comments = self._prev_comments 1945 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1946 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1947 returning = self._parse_returning() 1948 return self.expression( 1949 exp.Update, 1950 comments=comments, 1951 **{ # type: ignore 1952 "this": this, 1953 "expressions": expressions, 1954 "from": self._parse_from(joins=True), 1955 "where": self._parse_where(), 1956 "returning": returning or self._parse_returning(), 1957 "order": self._parse_order(), 1958 "limit": self._parse_limit(), 1959 }, 1960 ) 1961 1962 def _parse_uncache(self) -> exp.Uncache: 1963 if not self._match(TokenType.TABLE): 1964 self.raise_error("Expecting TABLE after UNCACHE") 1965 1966 return self.expression( 1967 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1968 ) 1969 1970 def _parse_cache(self) -> exp.Cache: 1971 lazy = self._match_text_seq("LAZY") 1972 self._match(TokenType.TABLE) 1973 table = self._parse_table(schema=True) 1974 1975 options = [] 1976 if self._match_text_seq("OPTIONS"): 1977 self._match_l_paren() 1978 k = self._parse_string() 1979 self._match(TokenType.EQ) 1980 v = self._parse_string() 1981 options = [k, v] 1982 self._match_r_paren() 1983 1984 self._match(TokenType.ALIAS) 1985 return self.expression( 1986 exp.Cache, 1987 this=table, 1988 lazy=lazy, 1989 options=options, 1990 expression=self._parse_select(nested=True), 1991 ) 1992 1993 def _parse_partition(self) -> t.Optional[exp.Partition]: 1994 if not self._match(TokenType.PARTITION): 1995 return None 1996 1997 return self.expression( 1998 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1999 ) 2000 2001 def _parse_value(self) -> exp.Tuple: 2002 if self._match(TokenType.L_PAREN): 2003 expressions = self._parse_csv(self._parse_conjunction) 2004 self._match_r_paren() 2005 return self.expression(exp.Tuple, expressions=expressions) 2006 2007 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2008 # https://prestodb.io/docs/current/sql/values.html 2009 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2010 2011 def _parse_projections(self) -> t.List[exp.Expression]: 2012 return self._parse_expressions() 2013 2014 def _parse_select( 2015 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2016 ) -> t.Optional[exp.Expression]: 2017 cte = self._parse_with() 2018 2019 if cte: 2020 this = self._parse_statement() 2021 2022 if not this: 2023 self.raise_error("Failed to parse any statement following CTE") 2024 return cte 2025 2026 if "with" in this.arg_types: 2027 this.set("with", cte) 2028 else: 2029 self.raise_error(f"{this.key} does not support CTE") 2030 this = cte 2031 2032 return this 2033 2034 # duckdb supports leading with FROM x 2035 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2036 2037 if self._match(TokenType.SELECT): 2038 comments = self._prev_comments 2039 2040 hint = self._parse_hint() 2041 all_ = self._match(TokenType.ALL) 2042 distinct = self._match_set(self.DISTINCT_TOKENS) 2043 2044 kind = ( 2045 self._match(TokenType.ALIAS) 2046 and self._match_texts(("STRUCT", "VALUE")) 2047 and self._prev.text 2048 ) 2049 2050 if distinct: 2051 distinct = self.expression( 2052 exp.Distinct, 2053 on=self._parse_value() if self._match(TokenType.ON) else None, 2054 ) 2055 2056 if all_ and distinct: 2057 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2058 2059 limit = self._parse_limit(top=True) 2060 projections = self._parse_projections() 2061 2062 this = self.expression( 2063 exp.Select, 2064 kind=kind, 2065 hint=hint, 2066 distinct=distinct, 2067 expressions=projections, 2068 limit=limit, 2069 ) 2070 this.comments = comments 2071 2072 into = self._parse_into() 2073 if into: 2074 this.set("into", into) 2075 2076 if not from_: 2077 from_ = self._parse_from() 2078 2079 if from_: 2080 this.set("from", from_) 2081 2082 this = self._parse_query_modifiers(this) 2083 elif (table or nested) and self._match(TokenType.L_PAREN): 2084 if self._match(TokenType.PIVOT): 2085 this = self._parse_simplified_pivot() 2086 elif self._match(TokenType.FROM): 2087 this = exp.select("*").from_( 2088 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2089 ) 2090 else: 2091 this = self._parse_table() if table else self._parse_select(nested=True) 2092 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2093 2094 self._match_r_paren() 2095 2096 # We return early here so that the UNION isn't attached to the subquery by the 2097 # following call to _parse_set_operations, but instead becomes the parent node 2098 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2099 elif self._match(TokenType.VALUES): 2100 this = self.expression( 2101 exp.Values, 2102 expressions=self._parse_csv(self._parse_value), 2103 alias=self._parse_table_alias(), 2104 ) 2105 elif from_: 2106 this = exp.select("*").from_(from_.this, copy=False) 2107 else: 2108 this = None 2109 2110 return self._parse_set_operations(this) 2111 2112 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2113 if not skip_with_token and not self._match(TokenType.WITH): 2114 return None 2115 2116 comments = self._prev_comments 2117 recursive = self._match(TokenType.RECURSIVE) 2118 2119 expressions = [] 2120 while True: 2121 expressions.append(self._parse_cte()) 2122 2123 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2124 break 2125 else: 2126 self._match(TokenType.WITH) 2127 2128 return self.expression( 2129 exp.With, comments=comments, expressions=expressions, recursive=recursive 2130 ) 2131 2132 def _parse_cte(self) -> exp.CTE: 2133 alias = self._parse_table_alias() 2134 if not alias or not alias.this: 2135 self.raise_error("Expected CTE to have alias") 2136 2137 self._match(TokenType.ALIAS) 2138 return self.expression( 2139 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2140 ) 2141 2142 def _parse_table_alias( 2143 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2144 ) -> t.Optional[exp.TableAlias]: 2145 any_token = self._match(TokenType.ALIAS) 2146 alias = ( 2147 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2148 or self._parse_string_as_identifier() 2149 ) 2150 2151 index = self._index 2152 if self._match(TokenType.L_PAREN): 2153 columns = self._parse_csv(self._parse_function_parameter) 2154 self._match_r_paren() if columns else self._retreat(index) 2155 else: 2156 columns = None 2157 2158 if not alias and not columns: 2159 return None 2160 2161 return self.expression(exp.TableAlias, this=alias, columns=columns) 2162 2163 def _parse_subquery( 2164 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2165 ) -> t.Optional[exp.Subquery]: 2166 if not this: 2167 return None 2168 2169 return self.expression( 2170 exp.Subquery, 2171 this=this, 2172 pivots=self._parse_pivots(), 2173 alias=self._parse_table_alias() if parse_alias else None, 2174 ) 2175 2176 def _parse_query_modifiers( 2177 self, this: t.Optional[exp.Expression] 2178 ) -> t.Optional[exp.Expression]: 2179 if isinstance(this, self.MODIFIABLES): 2180 for join in iter(self._parse_join, None): 2181 this.append("joins", join) 2182 for lateral in iter(self._parse_lateral, None): 2183 this.append("laterals", lateral) 2184 2185 while True: 2186 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2187 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2188 key, expression = parser(self) 2189 2190 if expression: 2191 this.set(key, expression) 2192 if key == "limit": 2193 offset = expression.args.pop("offset", None) 2194 if offset: 2195 this.set("offset", exp.Offset(expression=offset)) 2196 continue 2197 break 2198 return this 2199 2200 def _parse_hint(self) -> t.Optional[exp.Hint]: 2201 if self._match(TokenType.HINT): 2202 hints = [] 2203 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2204 hints.extend(hint) 2205 2206 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2207 self.raise_error("Expected */ after HINT") 2208 2209 return self.expression(exp.Hint, expressions=hints) 2210 2211 return None 2212 2213 def _parse_into(self) -> t.Optional[exp.Into]: 2214 if not self._match(TokenType.INTO): 2215 return None 2216 2217 temp = self._match(TokenType.TEMPORARY) 2218 unlogged = self._match_text_seq("UNLOGGED") 2219 self._match(TokenType.TABLE) 2220 2221 return self.expression( 2222 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2223 ) 2224 2225 def _parse_from( 2226 self, joins: bool = False, skip_from_token: bool = False 2227 ) -> t.Optional[exp.From]: 2228 if not skip_from_token and not self._match(TokenType.FROM): 2229 return None 2230 2231 return self.expression( 2232 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2233 ) 2234 2235 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2236 if not self._match(TokenType.MATCH_RECOGNIZE): 2237 return None 2238 2239 self._match_l_paren() 2240 2241 partition = self._parse_partition_by() 2242 order = self._parse_order() 2243 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2244 2245 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2246 rows = exp.var("ONE ROW PER MATCH") 2247 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2248 text = "ALL ROWS PER MATCH" 2249 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2250 text += f" SHOW EMPTY MATCHES" 2251 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2252 text += f" OMIT EMPTY MATCHES" 2253 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2254 text += f" WITH UNMATCHED ROWS" 2255 rows = exp.var(text) 2256 else: 2257 rows = None 2258 2259 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2260 text = "AFTER MATCH SKIP" 2261 if self._match_text_seq("PAST", "LAST", "ROW"): 2262 text += f" PAST LAST ROW" 2263 elif self._match_text_seq("TO", "NEXT", "ROW"): 2264 text += f" TO NEXT ROW" 2265 elif self._match_text_seq("TO", "FIRST"): 2266 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2267 elif self._match_text_seq("TO", "LAST"): 2268 text += f" TO LAST {self._advance_any().text}" # type: ignore 2269 after = exp.var(text) 2270 else: 2271 after = None 2272 2273 if self._match_text_seq("PATTERN"): 2274 self._match_l_paren() 2275 2276 if not self._curr: 2277 self.raise_error("Expecting )", self._curr) 2278 2279 paren = 1 2280 start = self._curr 2281 2282 while self._curr and paren > 0: 2283 if self._curr.token_type == TokenType.L_PAREN: 2284 paren += 1 2285 if self._curr.token_type == TokenType.R_PAREN: 2286 paren -= 1 2287 2288 end = self._prev 2289 self._advance() 2290 2291 if paren > 0: 2292 self.raise_error("Expecting )", self._curr) 2293 2294 pattern = exp.var(self._find_sql(start, end)) 2295 else: 2296 pattern = None 2297 2298 define = ( 2299 self._parse_csv( 2300 lambda: self.expression( 2301 exp.Alias, 2302 alias=self._parse_id_var(any_token=True), 2303 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2304 ) 2305 ) 2306 if self._match_text_seq("DEFINE") 2307 else None 2308 ) 2309 2310 self._match_r_paren() 2311 2312 return self.expression( 2313 exp.MatchRecognize, 2314 partition_by=partition, 2315 order=order, 2316 measures=measures, 2317 rows=rows, 2318 after=after, 2319 pattern=pattern, 2320 define=define, 2321 alias=self._parse_table_alias(), 2322 ) 2323 2324 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2325 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2326 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2327 2328 if outer_apply or cross_apply: 2329 this = self._parse_select(table=True) 2330 view = None 2331 outer = not cross_apply 2332 elif self._match(TokenType.LATERAL): 2333 this = self._parse_select(table=True) 2334 view = self._match(TokenType.VIEW) 2335 outer = self._match(TokenType.OUTER) 2336 else: 2337 return None 2338 2339 if not this: 2340 this = ( 2341 self._parse_unnest() 2342 or self._parse_function() 2343 or self._parse_id_var(any_token=False) 2344 ) 2345 2346 while self._match(TokenType.DOT): 2347 this = exp.Dot( 2348 this=this, 2349 expression=self._parse_function() or self._parse_id_var(any_token=False), 2350 ) 2351 2352 if view: 2353 table = self._parse_id_var(any_token=False) 2354 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2355 table_alias: t.Optional[exp.TableAlias] = self.expression( 2356 exp.TableAlias, this=table, columns=columns 2357 ) 2358 elif isinstance(this, exp.Subquery) and this.alias: 2359 # Ensures parity between the Subquery's and the Lateral's "alias" args 2360 table_alias = this.args["alias"].copy() 2361 else: 2362 table_alias = self._parse_table_alias() 2363 2364 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2365 2366 def _parse_join_parts( 2367 self, 2368 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2369 return ( 2370 self._match_set(self.JOIN_METHODS) and self._prev, 2371 self._match_set(self.JOIN_SIDES) and self._prev, 2372 self._match_set(self.JOIN_KINDS) and self._prev, 2373 ) 2374 2375 def _parse_join( 2376 self, skip_join_token: bool = False, parse_bracket: bool = False 2377 ) -> t.Optional[exp.Join]: 2378 if self._match(TokenType.COMMA): 2379 return self.expression(exp.Join, this=self._parse_table()) 2380 2381 index = self._index 2382 method, side, kind = self._parse_join_parts() 2383 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2384 join = self._match(TokenType.JOIN) 2385 2386 if not skip_join_token and not join: 2387 self._retreat(index) 2388 kind = None 2389 method = None 2390 side = None 2391 2392 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2393 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2394 2395 if not skip_join_token and not join and not outer_apply and not cross_apply: 2396 return None 2397 2398 if outer_apply: 2399 side = Token(TokenType.LEFT, "LEFT") 2400 2401 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2402 2403 if method: 2404 kwargs["method"] = method.text 2405 if side: 2406 kwargs["side"] = side.text 2407 if kind: 2408 kwargs["kind"] = kind.text 2409 if hint: 2410 kwargs["hint"] = hint 2411 2412 if self._match(TokenType.ON): 2413 kwargs["on"] = self._parse_conjunction() 2414 elif self._match(TokenType.USING): 2415 kwargs["using"] = self._parse_wrapped_id_vars() 2416 elif not (kind and kind.token_type == TokenType.CROSS): 2417 index = self._index 2418 joins = self._parse_joins() 2419 2420 if joins and self._match(TokenType.ON): 2421 kwargs["on"] = self._parse_conjunction() 2422 elif joins and self._match(TokenType.USING): 2423 kwargs["using"] = self._parse_wrapped_id_vars() 2424 else: 2425 joins = None 2426 self._retreat(index) 2427 2428 kwargs["this"].set("joins", joins) 2429 2430 comments = [c for token in (method, side, kind) if token for c in token.comments] 2431 return self.expression(exp.Join, comments=comments, **kwargs) 2432 2433 def _parse_index( 2434 self, 2435 index: t.Optional[exp.Expression] = None, 2436 ) -> t.Optional[exp.Index]: 2437 if index: 2438 unique = None 2439 primary = None 2440 amp = None 2441 2442 self._match(TokenType.ON) 2443 self._match(TokenType.TABLE) # hive 2444 table = self._parse_table_parts(schema=True) 2445 else: 2446 unique = self._match(TokenType.UNIQUE) 2447 primary = self._match_text_seq("PRIMARY") 2448 amp = self._match_text_seq("AMP") 2449 2450 if not self._match(TokenType.INDEX): 2451 return None 2452 2453 index = self._parse_id_var() 2454 table = None 2455 2456 using = self._parse_field() if self._match(TokenType.USING) else None 2457 2458 if self._match(TokenType.L_PAREN, advance=False): 2459 columns = self._parse_wrapped_csv(self._parse_ordered) 2460 else: 2461 columns = None 2462 2463 return self.expression( 2464 exp.Index, 2465 this=index, 2466 table=table, 2467 using=using, 2468 columns=columns, 2469 unique=unique, 2470 primary=primary, 2471 amp=amp, 2472 partition_by=self._parse_partition_by(), 2473 ) 2474 2475 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2476 hints: t.List[exp.Expression] = [] 2477 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2478 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2479 hints.append( 2480 self.expression( 2481 exp.WithTableHint, 2482 expressions=self._parse_csv( 2483 lambda: self._parse_function() or self._parse_var(any_token=True) 2484 ), 2485 ) 2486 ) 2487 self._match_r_paren() 2488 else: 2489 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2490 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2491 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2492 2493 self._match_texts({"INDEX", "KEY"}) 2494 if self._match(TokenType.FOR): 2495 hint.set("target", self._advance_any() and self._prev.text.upper()) 2496 2497 hint.set("expressions", self._parse_wrapped_id_vars()) 2498 hints.append(hint) 2499 2500 return hints or None 2501 2502 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2503 return ( 2504 (not schema and self._parse_function(optional_parens=False)) 2505 or self._parse_id_var(any_token=False) 2506 or self._parse_string_as_identifier() 2507 or self._parse_placeholder() 2508 ) 2509 2510 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2511 catalog = None 2512 db = None 2513 table = self._parse_table_part(schema=schema) 2514 2515 while self._match(TokenType.DOT): 2516 if catalog: 2517 # This allows nesting the table in arbitrarily many dot expressions if needed 2518 table = self.expression( 2519 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2520 ) 2521 else: 2522 catalog = db 2523 db = table 2524 table = self._parse_table_part(schema=schema) 2525 2526 if not table: 2527 self.raise_error(f"Expected table name but got {self._curr}") 2528 2529 return self.expression( 2530 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2531 ) 2532 2533 def _parse_table( 2534 self, 2535 schema: bool = False, 2536 joins: bool = False, 2537 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2538 parse_bracket: bool = False, 2539 ) -> t.Optional[exp.Expression]: 2540 lateral = self._parse_lateral() 2541 if lateral: 2542 return lateral 2543 2544 unnest = self._parse_unnest() 2545 if unnest: 2546 return unnest 2547 2548 values = self._parse_derived_table_values() 2549 if values: 2550 return values 2551 2552 subquery = self._parse_select(table=True) 2553 if subquery: 2554 if not subquery.args.get("pivots"): 2555 subquery.set("pivots", self._parse_pivots()) 2556 return subquery 2557 2558 bracket = parse_bracket and self._parse_bracket(None) 2559 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2560 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2561 2562 if schema: 2563 return self._parse_schema(this=this) 2564 2565 version = self._parse_version() 2566 2567 if version: 2568 this.set("version", version) 2569 2570 if self.ALIAS_POST_TABLESAMPLE: 2571 table_sample = self._parse_table_sample() 2572 2573 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2574 if alias: 2575 this.set("alias", alias) 2576 2577 this.set("hints", self._parse_table_hints()) 2578 2579 if not this.args.get("pivots"): 2580 this.set("pivots", self._parse_pivots()) 2581 2582 if not self.ALIAS_POST_TABLESAMPLE: 2583 table_sample = self._parse_table_sample() 2584 2585 if table_sample: 2586 table_sample.set("this", this) 2587 this = table_sample 2588 2589 if joins: 2590 for join in iter(self._parse_join, None): 2591 this.append("joins", join) 2592 2593 return this 2594 2595 def _parse_version(self) -> t.Optional[exp.Version]: 2596 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2597 this = "TIMESTAMP" 2598 elif self._match(TokenType.VERSION_SNAPSHOT): 2599 this = "VERSION" 2600 else: 2601 return None 2602 2603 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2604 kind = self._prev.text.upper() 2605 start = self._parse_bitwise() 2606 self._match_texts(("TO", "AND")) 2607 end = self._parse_bitwise() 2608 expression: t.Optional[exp.Expression] = self.expression( 2609 exp.Tuple, expressions=[start, end] 2610 ) 2611 elif self._match_text_seq("CONTAINED", "IN"): 2612 kind = "CONTAINED IN" 2613 expression = self.expression( 2614 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2615 ) 2616 elif self._match(TokenType.ALL): 2617 kind = "ALL" 2618 expression = None 2619 else: 2620 self._match_text_seq("AS", "OF") 2621 kind = "AS OF" 2622 expression = self._parse_type() 2623 2624 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2625 2626 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2627 if not self._match(TokenType.UNNEST): 2628 return None 2629 2630 expressions = self._parse_wrapped_csv(self._parse_type) 2631 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2632 2633 alias = self._parse_table_alias() if with_alias else None 2634 2635 if alias and self.UNNEST_COLUMN_ONLY: 2636 if alias.args.get("columns"): 2637 self.raise_error("Unexpected extra column alias in unnest.") 2638 2639 alias.set("columns", [alias.this]) 2640 alias.set("this", None) 2641 2642 offset = None 2643 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2644 self._match(TokenType.ALIAS) 2645 offset = self._parse_id_var() or exp.to_identifier("offset") 2646 2647 return self.expression( 2648 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2649 ) 2650 2651 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2652 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2653 if not is_derived and not self._match(TokenType.VALUES): 2654 return None 2655 2656 expressions = self._parse_csv(self._parse_value) 2657 alias = self._parse_table_alias() 2658 2659 if is_derived: 2660 self._match_r_paren() 2661 2662 return self.expression( 2663 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2664 ) 2665 2666 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2667 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2668 as_modifier and self._match_text_seq("USING", "SAMPLE") 2669 ): 2670 return None 2671 2672 bucket_numerator = None 2673 bucket_denominator = None 2674 bucket_field = None 2675 percent = None 2676 rows = None 2677 size = None 2678 seed = None 2679 2680 kind = ( 2681 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2682 ) 2683 method = self._parse_var(tokens=(TokenType.ROW,)) 2684 2685 self._match(TokenType.L_PAREN) 2686 2687 if self.TABLESAMPLE_CSV: 2688 num = None 2689 expressions = self._parse_csv(self._parse_primary) 2690 else: 2691 expressions = None 2692 num = self._parse_number() 2693 2694 if self._match_text_seq("BUCKET"): 2695 bucket_numerator = self._parse_number() 2696 self._match_text_seq("OUT", "OF") 2697 bucket_denominator = bucket_denominator = self._parse_number() 2698 self._match(TokenType.ON) 2699 bucket_field = self._parse_field() 2700 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2701 percent = num 2702 elif self._match(TokenType.ROWS): 2703 rows = num 2704 elif num: 2705 size = num 2706 2707 self._match(TokenType.R_PAREN) 2708 2709 if self._match(TokenType.L_PAREN): 2710 method = self._parse_var() 2711 seed = self._match(TokenType.COMMA) and self._parse_number() 2712 self._match_r_paren() 2713 elif self._match_texts(("SEED", "REPEATABLE")): 2714 seed = self._parse_wrapped(self._parse_number) 2715 2716 return self.expression( 2717 exp.TableSample, 2718 expressions=expressions, 2719 method=method, 2720 bucket_numerator=bucket_numerator, 2721 bucket_denominator=bucket_denominator, 2722 bucket_field=bucket_field, 2723 percent=percent, 2724 rows=rows, 2725 size=size, 2726 seed=seed, 2727 kind=kind, 2728 ) 2729 2730 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2731 return list(iter(self._parse_pivot, None)) or None 2732 2733 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2734 return list(iter(self._parse_join, None)) or None 2735 2736 # https://duckdb.org/docs/sql/statements/pivot 2737 def _parse_simplified_pivot(self) -> exp.Pivot: 2738 def _parse_on() -> t.Optional[exp.Expression]: 2739 this = self._parse_bitwise() 2740 return self._parse_in(this) if self._match(TokenType.IN) else this 2741 2742 this = self._parse_table() 2743 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2744 using = self._match(TokenType.USING) and self._parse_csv( 2745 lambda: self._parse_alias(self._parse_function()) 2746 ) 2747 group = self._parse_group() 2748 return self.expression( 2749 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2750 ) 2751 2752 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2753 index = self._index 2754 include_nulls = None 2755 2756 if self._match(TokenType.PIVOT): 2757 unpivot = False 2758 elif self._match(TokenType.UNPIVOT): 2759 unpivot = True 2760 2761 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2762 if self._match_text_seq("INCLUDE", "NULLS"): 2763 include_nulls = True 2764 elif self._match_text_seq("EXCLUDE", "NULLS"): 2765 include_nulls = False 2766 else: 2767 return None 2768 2769 expressions = [] 2770 field = None 2771 2772 if not self._match(TokenType.L_PAREN): 2773 self._retreat(index) 2774 return None 2775 2776 if unpivot: 2777 expressions = self._parse_csv(self._parse_column) 2778 else: 2779 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2780 2781 if not expressions: 2782 self.raise_error("Failed to parse PIVOT's aggregation list") 2783 2784 if not self._match(TokenType.FOR): 2785 self.raise_error("Expecting FOR") 2786 2787 value = self._parse_column() 2788 2789 if not self._match(TokenType.IN): 2790 self.raise_error("Expecting IN") 2791 2792 field = self._parse_in(value, alias=True) 2793 2794 self._match_r_paren() 2795 2796 pivot = self.expression( 2797 exp.Pivot, 2798 expressions=expressions, 2799 field=field, 2800 unpivot=unpivot, 2801 include_nulls=include_nulls, 2802 ) 2803 2804 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2805 pivot.set("alias", self._parse_table_alias()) 2806 2807 if not unpivot: 2808 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2809 2810 columns: t.List[exp.Expression] = [] 2811 for fld in pivot.args["field"].expressions: 2812 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2813 for name in names: 2814 if self.PREFIXED_PIVOT_COLUMNS: 2815 name = f"{name}_{field_name}" if name else field_name 2816 else: 2817 name = f"{field_name}_{name}" if name else field_name 2818 2819 columns.append(exp.to_identifier(name)) 2820 2821 pivot.set("columns", columns) 2822 2823 return pivot 2824 2825 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2826 return [agg.alias for agg in aggregations] 2827 2828 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2829 if not skip_where_token and not self._match(TokenType.WHERE): 2830 return None 2831 2832 return self.expression( 2833 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2834 ) 2835 2836 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2837 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2838 return None 2839 2840 elements = defaultdict(list) 2841 2842 if self._match(TokenType.ALL): 2843 return self.expression(exp.Group, all=True) 2844 2845 while True: 2846 expressions = self._parse_csv(self._parse_conjunction) 2847 if expressions: 2848 elements["expressions"].extend(expressions) 2849 2850 grouping_sets = self._parse_grouping_sets() 2851 if grouping_sets: 2852 elements["grouping_sets"].extend(grouping_sets) 2853 2854 rollup = None 2855 cube = None 2856 totals = None 2857 2858 with_ = self._match(TokenType.WITH) 2859 if self._match(TokenType.ROLLUP): 2860 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2861 elements["rollup"].extend(ensure_list(rollup)) 2862 2863 if self._match(TokenType.CUBE): 2864 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2865 elements["cube"].extend(ensure_list(cube)) 2866 2867 if self._match_text_seq("TOTALS"): 2868 totals = True 2869 elements["totals"] = True # type: ignore 2870 2871 if not (grouping_sets or rollup or cube or totals): 2872 break 2873 2874 return self.expression(exp.Group, **elements) # type: ignore 2875 2876 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2877 if not self._match(TokenType.GROUPING_SETS): 2878 return None 2879 2880 return self._parse_wrapped_csv(self._parse_grouping_set) 2881 2882 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2883 if self._match(TokenType.L_PAREN): 2884 grouping_set = self._parse_csv(self._parse_column) 2885 self._match_r_paren() 2886 return self.expression(exp.Tuple, expressions=grouping_set) 2887 2888 return self._parse_column() 2889 2890 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2891 if not skip_having_token and not self._match(TokenType.HAVING): 2892 return None 2893 return self.expression(exp.Having, this=self._parse_conjunction()) 2894 2895 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2896 if not self._match(TokenType.QUALIFY): 2897 return None 2898 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2899 2900 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2901 if skip_start_token: 2902 start = None 2903 elif self._match(TokenType.START_WITH): 2904 start = self._parse_conjunction() 2905 else: 2906 return None 2907 2908 self._match(TokenType.CONNECT_BY) 2909 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2910 exp.Prior, this=self._parse_bitwise() 2911 ) 2912 connect = self._parse_conjunction() 2913 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2914 return self.expression(exp.Connect, start=start, connect=connect) 2915 2916 def _parse_order( 2917 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2918 ) -> t.Optional[exp.Expression]: 2919 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2920 return this 2921 2922 return self.expression( 2923 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2924 ) 2925 2926 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2927 if not self._match(token): 2928 return None 2929 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2930 2931 def _parse_ordered(self) -> exp.Ordered: 2932 this = self._parse_conjunction() 2933 self._match(TokenType.ASC) 2934 2935 is_desc = self._match(TokenType.DESC) 2936 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2937 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2938 desc = is_desc or False 2939 asc = not desc 2940 nulls_first = is_nulls_first or False 2941 explicitly_null_ordered = is_nulls_first or is_nulls_last 2942 2943 if ( 2944 not explicitly_null_ordered 2945 and ( 2946 (asc and self.NULL_ORDERING == "nulls_are_small") 2947 or (desc and self.NULL_ORDERING != "nulls_are_small") 2948 ) 2949 and self.NULL_ORDERING != "nulls_are_last" 2950 ): 2951 nulls_first = True 2952 2953 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2954 2955 def _parse_limit( 2956 self, this: t.Optional[exp.Expression] = None, top: bool = False 2957 ) -> t.Optional[exp.Expression]: 2958 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2959 comments = self._prev_comments 2960 if top: 2961 limit_paren = self._match(TokenType.L_PAREN) 2962 expression = self._parse_number() 2963 2964 if limit_paren: 2965 self._match_r_paren() 2966 else: 2967 expression = self._parse_term() 2968 2969 if self._match(TokenType.COMMA): 2970 offset = expression 2971 expression = self._parse_term() 2972 else: 2973 offset = None 2974 2975 limit_exp = self.expression( 2976 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2977 ) 2978 2979 return limit_exp 2980 2981 if self._match(TokenType.FETCH): 2982 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2983 direction = self._prev.text if direction else "FIRST" 2984 2985 count = self._parse_number() 2986 percent = self._match(TokenType.PERCENT) 2987 2988 self._match_set((TokenType.ROW, TokenType.ROWS)) 2989 2990 only = self._match_text_seq("ONLY") 2991 with_ties = self._match_text_seq("WITH", "TIES") 2992 2993 if only and with_ties: 2994 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2995 2996 return self.expression( 2997 exp.Fetch, 2998 direction=direction, 2999 count=count, 3000 percent=percent, 3001 with_ties=with_ties, 3002 ) 3003 3004 return this 3005 3006 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3007 if not self._match(TokenType.OFFSET): 3008 return this 3009 3010 count = self._parse_term() 3011 self._match_set((TokenType.ROW, TokenType.ROWS)) 3012 return self.expression(exp.Offset, this=this, expression=count) 3013 3014 def _parse_locks(self) -> t.List[exp.Lock]: 3015 locks = [] 3016 while True: 3017 if self._match_text_seq("FOR", "UPDATE"): 3018 update = True 3019 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3020 "LOCK", "IN", "SHARE", "MODE" 3021 ): 3022 update = False 3023 else: 3024 break 3025 3026 expressions = None 3027 if self._match_text_seq("OF"): 3028 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3029 3030 wait: t.Optional[bool | exp.Expression] = None 3031 if self._match_text_seq("NOWAIT"): 3032 wait = True 3033 elif self._match_text_seq("WAIT"): 3034 wait = self._parse_primary() 3035 elif self._match_text_seq("SKIP", "LOCKED"): 3036 wait = False 3037 3038 locks.append( 3039 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3040 ) 3041 3042 return locks 3043 3044 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3045 if not self._match_set(self.SET_OPERATIONS): 3046 return this 3047 3048 token_type = self._prev.token_type 3049 3050 if token_type == TokenType.UNION: 3051 expression = exp.Union 3052 elif token_type == TokenType.EXCEPT: 3053 expression = exp.Except 3054 else: 3055 expression = exp.Intersect 3056 3057 return self.expression( 3058 expression, 3059 this=this, 3060 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3061 by_name=self._match_text_seq("BY", "NAME"), 3062 expression=self._parse_set_operations(self._parse_select(nested=True)), 3063 ) 3064 3065 def _parse_expression(self) -> t.Optional[exp.Expression]: 3066 return self._parse_alias(self._parse_conjunction()) 3067 3068 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3069 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3070 3071 def _parse_equality(self) -> t.Optional[exp.Expression]: 3072 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3073 3074 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3075 return self._parse_tokens(self._parse_range, self.COMPARISON) 3076 3077 def _parse_range(self) -> t.Optional[exp.Expression]: 3078 this = self._parse_bitwise() 3079 negate = self._match(TokenType.NOT) 3080 3081 if self._match_set(self.RANGE_PARSERS): 3082 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3083 if not expression: 3084 return this 3085 3086 this = expression 3087 elif self._match(TokenType.ISNULL): 3088 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3089 3090 # Postgres supports ISNULL and NOTNULL for conditions. 3091 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3092 if self._match(TokenType.NOTNULL): 3093 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3094 this = self.expression(exp.Not, this=this) 3095 3096 if negate: 3097 this = self.expression(exp.Not, this=this) 3098 3099 if self._match(TokenType.IS): 3100 this = self._parse_is(this) 3101 3102 return this 3103 3104 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3105 index = self._index - 1 3106 negate = self._match(TokenType.NOT) 3107 3108 if self._match_text_seq("DISTINCT", "FROM"): 3109 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3110 return self.expression(klass, this=this, expression=self._parse_expression()) 3111 3112 expression = self._parse_null() or self._parse_boolean() 3113 if not expression: 3114 self._retreat(index) 3115 return None 3116 3117 this = self.expression(exp.Is, this=this, expression=expression) 3118 return self.expression(exp.Not, this=this) if negate else this 3119 3120 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3121 unnest = self._parse_unnest(with_alias=False) 3122 if unnest: 3123 this = self.expression(exp.In, this=this, unnest=unnest) 3124 elif self._match(TokenType.L_PAREN): 3125 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3126 3127 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3128 this = self.expression(exp.In, this=this, query=expressions[0]) 3129 else: 3130 this = self.expression(exp.In, this=this, expressions=expressions) 3131 3132 self._match_r_paren(this) 3133 else: 3134 this = self.expression(exp.In, this=this, field=self._parse_field()) 3135 3136 return this 3137 3138 def _parse_between(self, this: exp.Expression) -> exp.Between: 3139 low = self._parse_bitwise() 3140 self._match(TokenType.AND) 3141 high = self._parse_bitwise() 3142 return self.expression(exp.Between, this=this, low=low, high=high) 3143 3144 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3145 if not self._match(TokenType.ESCAPE): 3146 return this 3147 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3148 3149 def _parse_interval(self) -> t.Optional[exp.Interval]: 3150 index = self._index 3151 3152 if not self._match(TokenType.INTERVAL): 3153 return None 3154 3155 if self._match(TokenType.STRING, advance=False): 3156 this = self._parse_primary() 3157 else: 3158 this = self._parse_term() 3159 3160 if not this: 3161 self._retreat(index) 3162 return None 3163 3164 unit = self._parse_function() or self._parse_var(any_token=True) 3165 3166 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3167 # each INTERVAL expression into this canonical form so it's easy to transpile 3168 if this and this.is_number: 3169 this = exp.Literal.string(this.name) 3170 elif this and this.is_string: 3171 parts = this.name.split() 3172 3173 if len(parts) == 2: 3174 if unit: 3175 # This is not actually a unit, it's something else (e.g. a "window side") 3176 unit = None 3177 self._retreat(self._index - 1) 3178 3179 this = exp.Literal.string(parts[0]) 3180 unit = self.expression(exp.Var, this=parts[1]) 3181 3182 return self.expression(exp.Interval, this=this, unit=unit) 3183 3184 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3185 this = self._parse_term() 3186 3187 while True: 3188 if self._match_set(self.BITWISE): 3189 this = self.expression( 3190 self.BITWISE[self._prev.token_type], 3191 this=this, 3192 expression=self._parse_term(), 3193 ) 3194 elif self._match(TokenType.DQMARK): 3195 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3196 elif self._match_pair(TokenType.LT, TokenType.LT): 3197 this = self.expression( 3198 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3199 ) 3200 elif self._match_pair(TokenType.GT, TokenType.GT): 3201 this = self.expression( 3202 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3203 ) 3204 else: 3205 break 3206 3207 return this 3208 3209 def _parse_term(self) -> t.Optional[exp.Expression]: 3210 return self._parse_tokens(self._parse_factor, self.TERM) 3211 3212 def _parse_factor(self) -> t.Optional[exp.Expression]: 3213 return self._parse_tokens(self._parse_unary, self.FACTOR) 3214 3215 def _parse_unary(self) -> t.Optional[exp.Expression]: 3216 if self._match_set(self.UNARY_PARSERS): 3217 return self.UNARY_PARSERS[self._prev.token_type](self) 3218 return self._parse_at_time_zone(self._parse_type()) 3219 3220 def _parse_type(self) -> t.Optional[exp.Expression]: 3221 interval = self._parse_interval() 3222 if interval: 3223 return interval 3224 3225 index = self._index 3226 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3227 this = self._parse_column() 3228 3229 if data_type: 3230 if isinstance(this, exp.Literal): 3231 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3232 if parser: 3233 return parser(self, this, data_type) 3234 return self.expression(exp.Cast, this=this, to=data_type) 3235 if not data_type.expressions: 3236 self._retreat(index) 3237 return self._parse_column() 3238 return self._parse_column_ops(data_type) 3239 3240 return this 3241 3242 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3243 this = self._parse_type() 3244 if not this: 3245 return None 3246 3247 return self.expression( 3248 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3249 ) 3250 3251 def _parse_types( 3252 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3253 ) -> t.Optional[exp.Expression]: 3254 index = self._index 3255 3256 prefix = self._match_text_seq("SYSUDTLIB", ".") 3257 3258 if not self._match_set(self.TYPE_TOKENS): 3259 identifier = allow_identifiers and self._parse_id_var( 3260 any_token=False, tokens=(TokenType.VAR,) 3261 ) 3262 3263 if identifier: 3264 tokens = self._tokenizer.tokenize(identifier.name) 3265 3266 if len(tokens) != 1: 3267 self.raise_error("Unexpected identifier", self._prev) 3268 3269 if tokens[0].token_type in self.TYPE_TOKENS: 3270 self._prev = tokens[0] 3271 elif self.SUPPORTS_USER_DEFINED_TYPES: 3272 return identifier 3273 else: 3274 return None 3275 else: 3276 return None 3277 3278 type_token = self._prev.token_type 3279 3280 if type_token == TokenType.PSEUDO_TYPE: 3281 return self.expression(exp.PseudoType, this=self._prev.text) 3282 3283 if type_token == TokenType.OBJECT_IDENTIFIER: 3284 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3285 3286 nested = type_token in self.NESTED_TYPE_TOKENS 3287 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3288 expressions = None 3289 maybe_func = False 3290 3291 if self._match(TokenType.L_PAREN): 3292 if is_struct: 3293 expressions = self._parse_csv(self._parse_struct_types) 3294 elif nested: 3295 expressions = self._parse_csv( 3296 lambda: self._parse_types( 3297 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3298 ) 3299 ) 3300 elif type_token in self.ENUM_TYPE_TOKENS: 3301 expressions = self._parse_csv(self._parse_equality) 3302 else: 3303 expressions = self._parse_csv(self._parse_type_size) 3304 3305 if not expressions or not self._match(TokenType.R_PAREN): 3306 self._retreat(index) 3307 return None 3308 3309 maybe_func = True 3310 3311 this: t.Optional[exp.Expression] = None 3312 values: t.Optional[t.List[exp.Expression]] = None 3313 3314 if nested and self._match(TokenType.LT): 3315 if is_struct: 3316 expressions = self._parse_csv(self._parse_struct_types) 3317 else: 3318 expressions = self._parse_csv( 3319 lambda: self._parse_types( 3320 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3321 ) 3322 ) 3323 3324 if not self._match(TokenType.GT): 3325 self.raise_error("Expecting >") 3326 3327 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3328 values = self._parse_csv(self._parse_conjunction) 3329 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3330 3331 if type_token in self.TIMESTAMPS: 3332 if self._match_text_seq("WITH", "TIME", "ZONE"): 3333 maybe_func = False 3334 tz_type = ( 3335 exp.DataType.Type.TIMETZ 3336 if type_token in self.TIMES 3337 else exp.DataType.Type.TIMESTAMPTZ 3338 ) 3339 this = exp.DataType(this=tz_type, expressions=expressions) 3340 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3341 maybe_func = False 3342 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3343 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3344 maybe_func = False 3345 elif type_token == TokenType.INTERVAL: 3346 unit = self._parse_var() 3347 3348 if self._match_text_seq("TO"): 3349 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3350 else: 3351 span = None 3352 3353 if span or not unit: 3354 this = self.expression( 3355 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3356 ) 3357 else: 3358 this = self.expression(exp.Interval, unit=unit) 3359 3360 if maybe_func and check_func: 3361 index2 = self._index 3362 peek = self._parse_string() 3363 3364 if not peek: 3365 self._retreat(index) 3366 return None 3367 3368 self._retreat(index2) 3369 3370 if not this: 3371 if self._match_text_seq("UNSIGNED"): 3372 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3373 if not unsigned_type_token: 3374 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3375 3376 type_token = unsigned_type_token or type_token 3377 3378 this = exp.DataType( 3379 this=exp.DataType.Type[type_token.value], 3380 expressions=expressions, 3381 nested=nested, 3382 values=values, 3383 prefix=prefix, 3384 ) 3385 3386 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3387 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3388 3389 return this 3390 3391 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3392 this = self._parse_type() or self._parse_id_var() 3393 self._match(TokenType.COLON) 3394 return self._parse_column_def(this) 3395 3396 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3397 if not self._match_text_seq("AT", "TIME", "ZONE"): 3398 return this 3399 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3400 3401 def _parse_column(self) -> t.Optional[exp.Expression]: 3402 this = self._parse_field() 3403 if isinstance(this, exp.Identifier): 3404 this = self.expression(exp.Column, this=this) 3405 elif not this: 3406 return self._parse_bracket(this) 3407 return self._parse_column_ops(this) 3408 3409 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3410 this = self._parse_bracket(this) 3411 3412 while self._match_set(self.COLUMN_OPERATORS): 3413 op_token = self._prev.token_type 3414 op = self.COLUMN_OPERATORS.get(op_token) 3415 3416 if op_token == TokenType.DCOLON: 3417 field = self._parse_types() 3418 if not field: 3419 self.raise_error("Expected type") 3420 elif op and self._curr: 3421 self._advance() 3422 value = self._prev.text 3423 field = ( 3424 exp.Literal.number(value) 3425 if self._prev.token_type == TokenType.NUMBER 3426 else exp.Literal.string(value) 3427 ) 3428 else: 3429 field = self._parse_field(anonymous_func=True, any_token=True) 3430 3431 if isinstance(field, exp.Func): 3432 # bigquery allows function calls like x.y.count(...) 3433 # SAFE.SUBSTR(...) 3434 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3435 this = self._replace_columns_with_dots(this) 3436 3437 if op: 3438 this = op(self, this, field) 3439 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3440 this = self.expression( 3441 exp.Column, 3442 this=field, 3443 table=this.this, 3444 db=this.args.get("table"), 3445 catalog=this.args.get("db"), 3446 ) 3447 else: 3448 this = self.expression(exp.Dot, this=this, expression=field) 3449 this = self._parse_bracket(this) 3450 return this 3451 3452 def _parse_primary(self) -> t.Optional[exp.Expression]: 3453 if self._match_set(self.PRIMARY_PARSERS): 3454 token_type = self._prev.token_type 3455 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3456 3457 if token_type == TokenType.STRING: 3458 expressions = [primary] 3459 while self._match(TokenType.STRING): 3460 expressions.append(exp.Literal.string(self._prev.text)) 3461 3462 if len(expressions) > 1: 3463 return self.expression(exp.Concat, expressions=expressions) 3464 3465 return primary 3466 3467 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3468 return exp.Literal.number(f"0.{self._prev.text}") 3469 3470 if self._match(TokenType.L_PAREN): 3471 comments = self._prev_comments 3472 query = self._parse_select() 3473 3474 if query: 3475 expressions = [query] 3476 else: 3477 expressions = self._parse_expressions() 3478 3479 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3480 3481 if isinstance(this, exp.Subqueryable): 3482 this = self._parse_set_operations( 3483 self._parse_subquery(this=this, parse_alias=False) 3484 ) 3485 elif len(expressions) > 1: 3486 this = self.expression(exp.Tuple, expressions=expressions) 3487 else: 3488 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3489 3490 if this: 3491 this.add_comments(comments) 3492 3493 self._match_r_paren(expression=this) 3494 return this 3495 3496 return None 3497 3498 def _parse_field( 3499 self, 3500 any_token: bool = False, 3501 tokens: t.Optional[t.Collection[TokenType]] = None, 3502 anonymous_func: bool = False, 3503 ) -> t.Optional[exp.Expression]: 3504 return ( 3505 self._parse_primary() 3506 or self._parse_function(anonymous=anonymous_func) 3507 or self._parse_id_var(any_token=any_token, tokens=tokens) 3508 ) 3509 3510 def _parse_function( 3511 self, 3512 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3513 anonymous: bool = False, 3514 optional_parens: bool = True, 3515 ) -> t.Optional[exp.Expression]: 3516 if not self._curr: 3517 return None 3518 3519 token_type = self._curr.token_type 3520 this = self._curr.text 3521 upper = this.upper() 3522 3523 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3524 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3525 self._advance() 3526 return parser(self) 3527 3528 if not self._next or self._next.token_type != TokenType.L_PAREN: 3529 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3530 self._advance() 3531 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3532 3533 return None 3534 3535 if token_type not in self.FUNC_TOKENS: 3536 return None 3537 3538 self._advance(2) 3539 3540 parser = self.FUNCTION_PARSERS.get(upper) 3541 if parser and not anonymous: 3542 this = parser(self) 3543 else: 3544 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3545 3546 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3547 this = self.expression(subquery_predicate, this=self._parse_select()) 3548 self._match_r_paren() 3549 return this 3550 3551 if functions is None: 3552 functions = self.FUNCTIONS 3553 3554 function = functions.get(upper) 3555 3556 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3557 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3558 3559 if function and not anonymous: 3560 func = self.validate_expression(function(args), args) 3561 if not self.NORMALIZE_FUNCTIONS: 3562 func.meta["name"] = this 3563 this = func 3564 else: 3565 this = self.expression(exp.Anonymous, this=this, expressions=args) 3566 3567 self._match_r_paren(this) 3568 return self._parse_window(this) 3569 3570 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3571 return self._parse_column_def(self._parse_id_var()) 3572 3573 def _parse_user_defined_function( 3574 self, kind: t.Optional[TokenType] = None 3575 ) -> t.Optional[exp.Expression]: 3576 this = self._parse_id_var() 3577 3578 while self._match(TokenType.DOT): 3579 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3580 3581 if not self._match(TokenType.L_PAREN): 3582 return this 3583 3584 expressions = self._parse_csv(self._parse_function_parameter) 3585 self._match_r_paren() 3586 return self.expression( 3587 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3588 ) 3589 3590 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3591 literal = self._parse_primary() 3592 if literal: 3593 return self.expression(exp.Introducer, this=token.text, expression=literal) 3594 3595 return self.expression(exp.Identifier, this=token.text) 3596 3597 def _parse_session_parameter(self) -> exp.SessionParameter: 3598 kind = None 3599 this = self._parse_id_var() or self._parse_primary() 3600 3601 if this and self._match(TokenType.DOT): 3602 kind = this.name 3603 this = self._parse_var() or self._parse_primary() 3604 3605 return self.expression(exp.SessionParameter, this=this, kind=kind) 3606 3607 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3608 index = self._index 3609 3610 if self._match(TokenType.L_PAREN): 3611 expressions = t.cast( 3612 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3613 ) 3614 3615 if not self._match(TokenType.R_PAREN): 3616 self._retreat(index) 3617 else: 3618 expressions = [self._parse_id_var()] 3619 3620 if self._match_set(self.LAMBDAS): 3621 return self.LAMBDAS[self._prev.token_type](self, expressions) 3622 3623 self._retreat(index) 3624 3625 this: t.Optional[exp.Expression] 3626 3627 if self._match(TokenType.DISTINCT): 3628 this = self.expression( 3629 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3630 ) 3631 else: 3632 this = self._parse_select_or_expression(alias=alias) 3633 3634 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3635 3636 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3637 index = self._index 3638 3639 if not self.errors: 3640 try: 3641 if self._parse_select(nested=True): 3642 return this 3643 except ParseError: 3644 pass 3645 finally: 3646 self.errors.clear() 3647 self._retreat(index) 3648 3649 if not self._match(TokenType.L_PAREN): 3650 return this 3651 3652 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3653 3654 self._match_r_paren() 3655 return self.expression(exp.Schema, this=this, expressions=args) 3656 3657 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3658 return self._parse_column_def(self._parse_field(any_token=True)) 3659 3660 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3661 # column defs are not really columns, they're identifiers 3662 if isinstance(this, exp.Column): 3663 this = this.this 3664 3665 kind = self._parse_types(schema=True) 3666 3667 if self._match_text_seq("FOR", "ORDINALITY"): 3668 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3669 3670 constraints: t.List[exp.Expression] = [] 3671 3672 if not kind and self._match(TokenType.ALIAS): 3673 constraints.append( 3674 self.expression( 3675 exp.ComputedColumnConstraint, 3676 this=self._parse_conjunction(), 3677 persisted=self._match_text_seq("PERSISTED"), 3678 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3679 ) 3680 ) 3681 3682 while True: 3683 constraint = self._parse_column_constraint() 3684 if not constraint: 3685 break 3686 constraints.append(constraint) 3687 3688 if not kind and not constraints: 3689 return this 3690 3691 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3692 3693 def _parse_auto_increment( 3694 self, 3695 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3696 start = None 3697 increment = None 3698 3699 if self._match(TokenType.L_PAREN, advance=False): 3700 args = self._parse_wrapped_csv(self._parse_bitwise) 3701 start = seq_get(args, 0) 3702 increment = seq_get(args, 1) 3703 elif self._match_text_seq("START"): 3704 start = self._parse_bitwise() 3705 self._match_text_seq("INCREMENT") 3706 increment = self._parse_bitwise() 3707 3708 if start and increment: 3709 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3710 3711 return exp.AutoIncrementColumnConstraint() 3712 3713 def _parse_compress(self) -> exp.CompressColumnConstraint: 3714 if self._match(TokenType.L_PAREN, advance=False): 3715 return self.expression( 3716 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3717 ) 3718 3719 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3720 3721 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3722 if self._match_text_seq("BY", "DEFAULT"): 3723 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3724 this = self.expression( 3725 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3726 ) 3727 else: 3728 self._match_text_seq("ALWAYS") 3729 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3730 3731 self._match(TokenType.ALIAS) 3732 identity = self._match_text_seq("IDENTITY") 3733 3734 if self._match(TokenType.L_PAREN): 3735 if self._match(TokenType.START_WITH): 3736 this.set("start", self._parse_bitwise()) 3737 if self._match_text_seq("INCREMENT", "BY"): 3738 this.set("increment", self._parse_bitwise()) 3739 if self._match_text_seq("MINVALUE"): 3740 this.set("minvalue", self._parse_bitwise()) 3741 if self._match_text_seq("MAXVALUE"): 3742 this.set("maxvalue", self._parse_bitwise()) 3743 3744 if self._match_text_seq("CYCLE"): 3745 this.set("cycle", True) 3746 elif self._match_text_seq("NO", "CYCLE"): 3747 this.set("cycle", False) 3748 3749 if not identity: 3750 this.set("expression", self._parse_bitwise()) 3751 3752 self._match_r_paren() 3753 3754 return this 3755 3756 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3757 self._match_text_seq("LENGTH") 3758 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3759 3760 def _parse_not_constraint( 3761 self, 3762 ) -> t.Optional[exp.Expression]: 3763 if self._match_text_seq("NULL"): 3764 return self.expression(exp.NotNullColumnConstraint) 3765 if self._match_text_seq("CASESPECIFIC"): 3766 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3767 if self._match_text_seq("FOR", "REPLICATION"): 3768 return self.expression(exp.NotForReplicationColumnConstraint) 3769 return None 3770 3771 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3772 if self._match(TokenType.CONSTRAINT): 3773 this = self._parse_id_var() 3774 else: 3775 this = None 3776 3777 if self._match_texts(self.CONSTRAINT_PARSERS): 3778 return self.expression( 3779 exp.ColumnConstraint, 3780 this=this, 3781 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3782 ) 3783 3784 return this 3785 3786 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3787 if not self._match(TokenType.CONSTRAINT): 3788 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3789 3790 this = self._parse_id_var() 3791 expressions = [] 3792 3793 while True: 3794 constraint = self._parse_unnamed_constraint() or self._parse_function() 3795 if not constraint: 3796 break 3797 expressions.append(constraint) 3798 3799 return self.expression(exp.Constraint, this=this, expressions=expressions) 3800 3801 def _parse_unnamed_constraint( 3802 self, constraints: t.Optional[t.Collection[str]] = None 3803 ) -> t.Optional[exp.Expression]: 3804 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3805 return None 3806 3807 constraint = self._prev.text.upper() 3808 if constraint not in self.CONSTRAINT_PARSERS: 3809 self.raise_error(f"No parser found for schema constraint {constraint}.") 3810 3811 return self.CONSTRAINT_PARSERS[constraint](self) 3812 3813 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3814 self._match_text_seq("KEY") 3815 return self.expression( 3816 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3817 ) 3818 3819 def _parse_key_constraint_options(self) -> t.List[str]: 3820 options = [] 3821 while True: 3822 if not self._curr: 3823 break 3824 3825 if self._match(TokenType.ON): 3826 action = None 3827 on = self._advance_any() and self._prev.text 3828 3829 if self._match_text_seq("NO", "ACTION"): 3830 action = "NO ACTION" 3831 elif self._match_text_seq("CASCADE"): 3832 action = "CASCADE" 3833 elif self._match_pair(TokenType.SET, TokenType.NULL): 3834 action = "SET NULL" 3835 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3836 action = "SET DEFAULT" 3837 else: 3838 self.raise_error("Invalid key constraint") 3839 3840 options.append(f"ON {on} {action}") 3841 elif self._match_text_seq("NOT", "ENFORCED"): 3842 options.append("NOT ENFORCED") 3843 elif self._match_text_seq("DEFERRABLE"): 3844 options.append("DEFERRABLE") 3845 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3846 options.append("INITIALLY DEFERRED") 3847 elif self._match_text_seq("NORELY"): 3848 options.append("NORELY") 3849 elif self._match_text_seq("MATCH", "FULL"): 3850 options.append("MATCH FULL") 3851 else: 3852 break 3853 3854 return options 3855 3856 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3857 if match and not self._match(TokenType.REFERENCES): 3858 return None 3859 3860 expressions = None 3861 this = self._parse_table(schema=True) 3862 options = self._parse_key_constraint_options() 3863 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3864 3865 def _parse_foreign_key(self) -> exp.ForeignKey: 3866 expressions = self._parse_wrapped_id_vars() 3867 reference = self._parse_references() 3868 options = {} 3869 3870 while self._match(TokenType.ON): 3871 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3872 self.raise_error("Expected DELETE or UPDATE") 3873 3874 kind = self._prev.text.lower() 3875 3876 if self._match_text_seq("NO", "ACTION"): 3877 action = "NO ACTION" 3878 elif self._match(TokenType.SET): 3879 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3880 action = "SET " + self._prev.text.upper() 3881 else: 3882 self._advance() 3883 action = self._prev.text.upper() 3884 3885 options[kind] = action 3886 3887 return self.expression( 3888 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3889 ) 3890 3891 def _parse_primary_key( 3892 self, wrapped_optional: bool = False, in_props: bool = False 3893 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3894 desc = ( 3895 self._match_set((TokenType.ASC, TokenType.DESC)) 3896 and self._prev.token_type == TokenType.DESC 3897 ) 3898 3899 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3900 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3901 3902 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3903 options = self._parse_key_constraint_options() 3904 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3905 3906 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3907 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3908 return this 3909 3910 bracket_kind = self._prev.token_type 3911 3912 if self._match(TokenType.COLON): 3913 expressions: t.List[exp.Expression] = [ 3914 self.expression(exp.Slice, expression=self._parse_conjunction()) 3915 ] 3916 else: 3917 expressions = self._parse_csv( 3918 lambda: self._parse_slice( 3919 self._parse_alias(self._parse_conjunction(), explicit=True) 3920 ) 3921 ) 3922 3923 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3924 if bracket_kind == TokenType.L_BRACE: 3925 this = self.expression(exp.Struct, expressions=expressions) 3926 elif not this or this.name.upper() == "ARRAY": 3927 this = self.expression(exp.Array, expressions=expressions) 3928 else: 3929 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3930 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3931 3932 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3933 self.raise_error("Expected ]") 3934 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3935 self.raise_error("Expected }") 3936 3937 self._add_comments(this) 3938 return self._parse_bracket(this) 3939 3940 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3941 if self._match(TokenType.COLON): 3942 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3943 return this 3944 3945 def _parse_case(self) -> t.Optional[exp.Expression]: 3946 ifs = [] 3947 default = None 3948 3949 comments = self._prev_comments 3950 expression = self._parse_conjunction() 3951 3952 while self._match(TokenType.WHEN): 3953 this = self._parse_conjunction() 3954 self._match(TokenType.THEN) 3955 then = self._parse_conjunction() 3956 ifs.append(self.expression(exp.If, this=this, true=then)) 3957 3958 if self._match(TokenType.ELSE): 3959 default = self._parse_conjunction() 3960 3961 if not self._match(TokenType.END): 3962 self.raise_error("Expected END after CASE", self._prev) 3963 3964 return self._parse_window( 3965 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3966 ) 3967 3968 def _parse_if(self) -> t.Optional[exp.Expression]: 3969 if self._match(TokenType.L_PAREN): 3970 args = self._parse_csv(self._parse_conjunction) 3971 this = self.validate_expression(exp.If.from_arg_list(args), args) 3972 self._match_r_paren() 3973 else: 3974 index = self._index - 1 3975 condition = self._parse_conjunction() 3976 3977 if not condition: 3978 self._retreat(index) 3979 return None 3980 3981 self._match(TokenType.THEN) 3982 true = self._parse_conjunction() 3983 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3984 self._match(TokenType.END) 3985 this = self.expression(exp.If, this=condition, true=true, false=false) 3986 3987 return self._parse_window(this) 3988 3989 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3990 if not self._match_text_seq("VALUE", "FOR"): 3991 self._retreat(self._index - 1) 3992 return None 3993 3994 return self.expression( 3995 exp.NextValueFor, 3996 this=self._parse_column(), 3997 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3998 ) 3999 4000 def _parse_extract(self) -> exp.Extract: 4001 this = self._parse_function() or self._parse_var() or self._parse_type() 4002 4003 if self._match(TokenType.FROM): 4004 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4005 4006 if not self._match(TokenType.COMMA): 4007 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4008 4009 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4010 4011 def _parse_any_value(self) -> exp.AnyValue: 4012 this = self._parse_lambda() 4013 is_max = None 4014 having = None 4015 4016 if self._match(TokenType.HAVING): 4017 self._match_texts(("MAX", "MIN")) 4018 is_max = self._prev.text == "MAX" 4019 having = self._parse_column() 4020 4021 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4022 4023 def _parse_cast(self, strict: bool) -> exp.Expression: 4024 this = self._parse_conjunction() 4025 4026 if not self._match(TokenType.ALIAS): 4027 if self._match(TokenType.COMMA): 4028 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4029 4030 self.raise_error("Expected AS after CAST") 4031 4032 fmt = None 4033 to = self._parse_types() 4034 4035 if not to: 4036 self.raise_error("Expected TYPE after CAST") 4037 elif isinstance(to, exp.Identifier): 4038 to = exp.DataType.build(to.name, udt=True) 4039 elif to.this == exp.DataType.Type.CHAR: 4040 if self._match(TokenType.CHARACTER_SET): 4041 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4042 elif self._match(TokenType.FORMAT): 4043 fmt_string = self._parse_string() 4044 fmt = self._parse_at_time_zone(fmt_string) 4045 4046 if to.this in exp.DataType.TEMPORAL_TYPES: 4047 this = self.expression( 4048 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4049 this=this, 4050 format=exp.Literal.string( 4051 format_time( 4052 fmt_string.this if fmt_string else "", 4053 self.FORMAT_MAPPING or self.TIME_MAPPING, 4054 self.FORMAT_TRIE or self.TIME_TRIE, 4055 ) 4056 ), 4057 ) 4058 4059 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4060 this.set("zone", fmt.args["zone"]) 4061 4062 return this 4063 4064 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4065 4066 def _parse_concat(self) -> t.Optional[exp.Expression]: 4067 args = self._parse_csv(self._parse_conjunction) 4068 if self.CONCAT_NULL_OUTPUTS_STRING: 4069 args = [ 4070 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 4071 for arg in args 4072 if arg 4073 ] 4074 4075 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4076 # we find such a call we replace it with its argument. 4077 if len(args) == 1: 4078 return args[0] 4079 4080 return self.expression( 4081 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4082 ) 4083 4084 def _parse_string_agg(self) -> exp.Expression: 4085 if self._match(TokenType.DISTINCT): 4086 args: t.List[t.Optional[exp.Expression]] = [ 4087 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4088 ] 4089 if self._match(TokenType.COMMA): 4090 args.extend(self._parse_csv(self._parse_conjunction)) 4091 else: 4092 args = self._parse_csv(self._parse_conjunction) # type: ignore 4093 4094 index = self._index 4095 if not self._match(TokenType.R_PAREN) and args: 4096 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4097 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4098 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4099 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4100 4101 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4102 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4103 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4104 if not self._match_text_seq("WITHIN", "GROUP"): 4105 self._retreat(index) 4106 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4107 4108 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4109 order = self._parse_order(this=seq_get(args, 0)) 4110 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4111 4112 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4113 this = self._parse_bitwise() 4114 4115 if self._match(TokenType.USING): 4116 to: t.Optional[exp.Expression] = self.expression( 4117 exp.CharacterSet, this=self._parse_var() 4118 ) 4119 elif self._match(TokenType.COMMA): 4120 to = self._parse_types() 4121 else: 4122 to = None 4123 4124 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4125 4126 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4127 """ 4128 There are generally two variants of the DECODE function: 4129 4130 - DECODE(bin, charset) 4131 - DECODE(expression, search, result [, search, result] ... [, default]) 4132 4133 The second variant will always be parsed into a CASE expression. Note that NULL 4134 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4135 instead of relying on pattern matching. 4136 """ 4137 args = self._parse_csv(self._parse_conjunction) 4138 4139 if len(args) < 3: 4140 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4141 4142 expression, *expressions = args 4143 if not expression: 4144 return None 4145 4146 ifs = [] 4147 for search, result in zip(expressions[::2], expressions[1::2]): 4148 if not search or not result: 4149 return None 4150 4151 if isinstance(search, exp.Literal): 4152 ifs.append( 4153 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4154 ) 4155 elif isinstance(search, exp.Null): 4156 ifs.append( 4157 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4158 ) 4159 else: 4160 cond = exp.or_( 4161 exp.EQ(this=expression.copy(), expression=search), 4162 exp.and_( 4163 exp.Is(this=expression.copy(), expression=exp.Null()), 4164 exp.Is(this=search.copy(), expression=exp.Null()), 4165 copy=False, 4166 ), 4167 copy=False, 4168 ) 4169 ifs.append(exp.If(this=cond, true=result)) 4170 4171 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4172 4173 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4174 self._match_text_seq("KEY") 4175 key = self._parse_column() 4176 self._match_set((TokenType.COLON, TokenType.COMMA)) 4177 self._match_text_seq("VALUE") 4178 value = self._parse_bitwise() 4179 4180 if not key and not value: 4181 return None 4182 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4183 4184 def _parse_json_object(self) -> exp.JSONObject: 4185 star = self._parse_star() 4186 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4187 4188 null_handling = None 4189 if self._match_text_seq("NULL", "ON", "NULL"): 4190 null_handling = "NULL ON NULL" 4191 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4192 null_handling = "ABSENT ON NULL" 4193 4194 unique_keys = None 4195 if self._match_text_seq("WITH", "UNIQUE"): 4196 unique_keys = True 4197 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4198 unique_keys = False 4199 4200 self._match_text_seq("KEYS") 4201 4202 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4203 format_json = self._match_text_seq("FORMAT", "JSON") 4204 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4205 4206 return self.expression( 4207 exp.JSONObject, 4208 expressions=expressions, 4209 null_handling=null_handling, 4210 unique_keys=unique_keys, 4211 return_type=return_type, 4212 format_json=format_json, 4213 encoding=encoding, 4214 ) 4215 4216 def _parse_logarithm(self) -> exp.Func: 4217 # Default argument order is base, expression 4218 args = self._parse_csv(self._parse_range) 4219 4220 if len(args) > 1: 4221 if not self.LOG_BASE_FIRST: 4222 args.reverse() 4223 return exp.Log.from_arg_list(args) 4224 4225 return self.expression( 4226 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4227 ) 4228 4229 def _parse_match_against(self) -> exp.MatchAgainst: 4230 expressions = self._parse_csv(self._parse_column) 4231 4232 self._match_text_seq(")", "AGAINST", "(") 4233 4234 this = self._parse_string() 4235 4236 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4237 modifier = "IN NATURAL LANGUAGE MODE" 4238 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4239 modifier = f"{modifier} WITH QUERY EXPANSION" 4240 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4241 modifier = "IN BOOLEAN MODE" 4242 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4243 modifier = "WITH QUERY EXPANSION" 4244 else: 4245 modifier = None 4246 4247 return self.expression( 4248 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4249 ) 4250 4251 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4252 def _parse_open_json(self) -> exp.OpenJSON: 4253 this = self._parse_bitwise() 4254 path = self._match(TokenType.COMMA) and self._parse_string() 4255 4256 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4257 this = self._parse_field(any_token=True) 4258 kind = self._parse_types() 4259 path = self._parse_string() 4260 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4261 4262 return self.expression( 4263 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4264 ) 4265 4266 expressions = None 4267 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4268 self._match_l_paren() 4269 expressions = self._parse_csv(_parse_open_json_column_def) 4270 4271 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4272 4273 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4274 args = self._parse_csv(self._parse_bitwise) 4275 4276 if self._match(TokenType.IN): 4277 return self.expression( 4278 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4279 ) 4280 4281 if haystack_first: 4282 haystack = seq_get(args, 0) 4283 needle = seq_get(args, 1) 4284 else: 4285 needle = seq_get(args, 0) 4286 haystack = seq_get(args, 1) 4287 4288 return self.expression( 4289 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4290 ) 4291 4292 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4293 args = self._parse_csv(self._parse_table) 4294 return exp.JoinHint(this=func_name.upper(), expressions=args) 4295 4296 def _parse_substring(self) -> exp.Substring: 4297 # Postgres supports the form: substring(string [from int] [for int]) 4298 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4299 4300 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4301 4302 if self._match(TokenType.FROM): 4303 args.append(self._parse_bitwise()) 4304 if self._match(TokenType.FOR): 4305 args.append(self._parse_bitwise()) 4306 4307 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4308 4309 def _parse_trim(self) -> exp.Trim: 4310 # https://www.w3resource.com/sql/character-functions/trim.php 4311 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4312 4313 position = None 4314 collation = None 4315 4316 if self._match_texts(self.TRIM_TYPES): 4317 position = self._prev.text.upper() 4318 4319 expression = self._parse_bitwise() 4320 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4321 this = self._parse_bitwise() 4322 else: 4323 this = expression 4324 expression = None 4325 4326 if self._match(TokenType.COLLATE): 4327 collation = self._parse_bitwise() 4328 4329 return self.expression( 4330 exp.Trim, this=this, position=position, expression=expression, collation=collation 4331 ) 4332 4333 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4334 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4335 4336 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4337 return self._parse_window(self._parse_id_var(), alias=True) 4338 4339 def _parse_respect_or_ignore_nulls( 4340 self, this: t.Optional[exp.Expression] 4341 ) -> t.Optional[exp.Expression]: 4342 if self._match_text_seq("IGNORE", "NULLS"): 4343 return self.expression(exp.IgnoreNulls, this=this) 4344 if self._match_text_seq("RESPECT", "NULLS"): 4345 return self.expression(exp.RespectNulls, this=this) 4346 return this 4347 4348 def _parse_window( 4349 self, this: t.Optional[exp.Expression], alias: bool = False 4350 ) -> t.Optional[exp.Expression]: 4351 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4352 self._match(TokenType.WHERE) 4353 this = self.expression( 4354 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4355 ) 4356 self._match_r_paren() 4357 4358 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4359 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4360 if self._match_text_seq("WITHIN", "GROUP"): 4361 order = self._parse_wrapped(self._parse_order) 4362 this = self.expression(exp.WithinGroup, this=this, expression=order) 4363 4364 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4365 # Some dialects choose to implement and some do not. 4366 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4367 4368 # There is some code above in _parse_lambda that handles 4369 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4370 4371 # The below changes handle 4372 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4373 4374 # Oracle allows both formats 4375 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4376 # and Snowflake chose to do the same for familiarity 4377 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4378 this = self._parse_respect_or_ignore_nulls(this) 4379 4380 # bigquery select from window x AS (partition by ...) 4381 if alias: 4382 over = None 4383 self._match(TokenType.ALIAS) 4384 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4385 return this 4386 else: 4387 over = self._prev.text.upper() 4388 4389 if not self._match(TokenType.L_PAREN): 4390 return self.expression( 4391 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4392 ) 4393 4394 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4395 4396 first = self._match(TokenType.FIRST) 4397 if self._match_text_seq("LAST"): 4398 first = False 4399 4400 partition, order = self._parse_partition_and_order() 4401 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4402 4403 if kind: 4404 self._match(TokenType.BETWEEN) 4405 start = self._parse_window_spec() 4406 self._match(TokenType.AND) 4407 end = self._parse_window_spec() 4408 4409 spec = self.expression( 4410 exp.WindowSpec, 4411 kind=kind, 4412 start=start["value"], 4413 start_side=start["side"], 4414 end=end["value"], 4415 end_side=end["side"], 4416 ) 4417 else: 4418 spec = None 4419 4420 self._match_r_paren() 4421 4422 window = self.expression( 4423 exp.Window, 4424 this=this, 4425 partition_by=partition, 4426 order=order, 4427 spec=spec, 4428 alias=window_alias, 4429 over=over, 4430 first=first, 4431 ) 4432 4433 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4434 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4435 return self._parse_window(window, alias=alias) 4436 4437 return window 4438 4439 def _parse_partition_and_order( 4440 self, 4441 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4442 return self._parse_partition_by(), self._parse_order() 4443 4444 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4445 self._match(TokenType.BETWEEN) 4446 4447 return { 4448 "value": ( 4449 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4450 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4451 or self._parse_bitwise() 4452 ), 4453 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4454 } 4455 4456 def _parse_alias( 4457 self, this: t.Optional[exp.Expression], explicit: bool = False 4458 ) -> t.Optional[exp.Expression]: 4459 any_token = self._match(TokenType.ALIAS) 4460 4461 if explicit and not any_token: 4462 return this 4463 4464 if self._match(TokenType.L_PAREN): 4465 aliases = self.expression( 4466 exp.Aliases, 4467 this=this, 4468 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4469 ) 4470 self._match_r_paren(aliases) 4471 return aliases 4472 4473 alias = self._parse_id_var(any_token) 4474 4475 if alias: 4476 return self.expression(exp.Alias, this=this, alias=alias) 4477 4478 return this 4479 4480 def _parse_id_var( 4481 self, 4482 any_token: bool = True, 4483 tokens: t.Optional[t.Collection[TokenType]] = None, 4484 ) -> t.Optional[exp.Expression]: 4485 identifier = self._parse_identifier() 4486 4487 if identifier: 4488 return identifier 4489 4490 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4491 quoted = self._prev.token_type == TokenType.STRING 4492 return exp.Identifier(this=self._prev.text, quoted=quoted) 4493 4494 return None 4495 4496 def _parse_string(self) -> t.Optional[exp.Expression]: 4497 if self._match(TokenType.STRING): 4498 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4499 return self._parse_placeholder() 4500 4501 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4502 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4503 4504 def _parse_number(self) -> t.Optional[exp.Expression]: 4505 if self._match(TokenType.NUMBER): 4506 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4507 return self._parse_placeholder() 4508 4509 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4510 if self._match(TokenType.IDENTIFIER): 4511 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4512 return self._parse_placeholder() 4513 4514 def _parse_var( 4515 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4516 ) -> t.Optional[exp.Expression]: 4517 if ( 4518 (any_token and self._advance_any()) 4519 or self._match(TokenType.VAR) 4520 or (self._match_set(tokens) if tokens else False) 4521 ): 4522 return self.expression(exp.Var, this=self._prev.text) 4523 return self._parse_placeholder() 4524 4525 def _advance_any(self) -> t.Optional[Token]: 4526 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4527 self._advance() 4528 return self._prev 4529 return None 4530 4531 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4532 return self._parse_var() or self._parse_string() 4533 4534 def _parse_null(self) -> t.Optional[exp.Expression]: 4535 if self._match(TokenType.NULL): 4536 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4537 return self._parse_placeholder() 4538 4539 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4540 if self._match(TokenType.TRUE): 4541 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4542 if self._match(TokenType.FALSE): 4543 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4544 return self._parse_placeholder() 4545 4546 def _parse_star(self) -> t.Optional[exp.Expression]: 4547 if self._match(TokenType.STAR): 4548 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4549 return self._parse_placeholder() 4550 4551 def _parse_parameter(self) -> exp.Parameter: 4552 wrapped = self._match(TokenType.L_BRACE) 4553 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4554 self._match(TokenType.R_BRACE) 4555 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4556 4557 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4558 if self._match_set(self.PLACEHOLDER_PARSERS): 4559 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4560 if placeholder: 4561 return placeholder 4562 self._advance(-1) 4563 return None 4564 4565 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4566 if not self._match(TokenType.EXCEPT): 4567 return None 4568 if self._match(TokenType.L_PAREN, advance=False): 4569 return self._parse_wrapped_csv(self._parse_column) 4570 return self._parse_csv(self._parse_column) 4571 4572 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4573 if not self._match(TokenType.REPLACE): 4574 return None 4575 if self._match(TokenType.L_PAREN, advance=False): 4576 return self._parse_wrapped_csv(self._parse_expression) 4577 return self._parse_expressions() 4578 4579 def _parse_csv( 4580 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4581 ) -> t.List[exp.Expression]: 4582 parse_result = parse_method() 4583 items = [parse_result] if parse_result is not None else [] 4584 4585 while self._match(sep): 4586 self._add_comments(parse_result) 4587 parse_result = parse_method() 4588 if parse_result is not None: 4589 items.append(parse_result) 4590 4591 return items 4592 4593 def _parse_tokens( 4594 self, parse_method: t.Callable, expressions: t.Dict 4595 ) -> t.Optional[exp.Expression]: 4596 this = parse_method() 4597 4598 while self._match_set(expressions): 4599 this = self.expression( 4600 expressions[self._prev.token_type], 4601 this=this, 4602 comments=self._prev_comments, 4603 expression=parse_method(), 4604 ) 4605 4606 return this 4607 4608 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4609 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4610 4611 def _parse_wrapped_csv( 4612 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4613 ) -> t.List[exp.Expression]: 4614 return self._parse_wrapped( 4615 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4616 ) 4617 4618 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4619 wrapped = self._match(TokenType.L_PAREN) 4620 if not wrapped and not optional: 4621 self.raise_error("Expecting (") 4622 parse_result = parse_method() 4623 if wrapped: 4624 self._match_r_paren() 4625 return parse_result 4626 4627 def _parse_expressions(self) -> t.List[exp.Expression]: 4628 return self._parse_csv(self._parse_expression) 4629 4630 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4631 return self._parse_select() or self._parse_set_operations( 4632 self._parse_expression() if alias else self._parse_conjunction() 4633 ) 4634 4635 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4636 return self._parse_query_modifiers( 4637 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4638 ) 4639 4640 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4641 this = None 4642 if self._match_texts(self.TRANSACTION_KIND): 4643 this = self._prev.text 4644 4645 self._match_texts({"TRANSACTION", "WORK"}) 4646 4647 modes = [] 4648 while True: 4649 mode = [] 4650 while self._match(TokenType.VAR): 4651 mode.append(self._prev.text) 4652 4653 if mode: 4654 modes.append(" ".join(mode)) 4655 if not self._match(TokenType.COMMA): 4656 break 4657 4658 return self.expression(exp.Transaction, this=this, modes=modes) 4659 4660 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4661 chain = None 4662 savepoint = None 4663 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4664 4665 self._match_texts({"TRANSACTION", "WORK"}) 4666 4667 if self._match_text_seq("TO"): 4668 self._match_text_seq("SAVEPOINT") 4669 savepoint = self._parse_id_var() 4670 4671 if self._match(TokenType.AND): 4672 chain = not self._match_text_seq("NO") 4673 self._match_text_seq("CHAIN") 4674 4675 if is_rollback: 4676 return self.expression(exp.Rollback, savepoint=savepoint) 4677 4678 return self.expression(exp.Commit, chain=chain) 4679 4680 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4681 if not self._match_text_seq("ADD"): 4682 return None 4683 4684 self._match(TokenType.COLUMN) 4685 exists_column = self._parse_exists(not_=True) 4686 expression = self._parse_field_def() 4687 4688 if expression: 4689 expression.set("exists", exists_column) 4690 4691 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4692 if self._match_texts(("FIRST", "AFTER")): 4693 position = self._prev.text 4694 column_position = self.expression( 4695 exp.ColumnPosition, this=self._parse_column(), position=position 4696 ) 4697 expression.set("position", column_position) 4698 4699 return expression 4700 4701 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4702 drop = self._match(TokenType.DROP) and self._parse_drop() 4703 if drop and not isinstance(drop, exp.Command): 4704 drop.set("kind", drop.args.get("kind", "COLUMN")) 4705 return drop 4706 4707 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4708 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4709 return self.expression( 4710 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4711 ) 4712 4713 def _parse_add_constraint(self) -> exp.AddConstraint: 4714 this = None 4715 kind = self._prev.token_type 4716 4717 if kind == TokenType.CONSTRAINT: 4718 this = self._parse_id_var() 4719 4720 if self._match_text_seq("CHECK"): 4721 expression = self._parse_wrapped(self._parse_conjunction) 4722 enforced = self._match_text_seq("ENFORCED") 4723 4724 return self.expression( 4725 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4726 ) 4727 4728 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4729 expression = self._parse_foreign_key() 4730 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4731 expression = self._parse_primary_key() 4732 else: 4733 expression = None 4734 4735 return self.expression(exp.AddConstraint, this=this, expression=expression) 4736 4737 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4738 index = self._index - 1 4739 4740 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4741 return self._parse_csv(self._parse_add_constraint) 4742 4743 self._retreat(index) 4744 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4745 return self._parse_csv(self._parse_field_def) 4746 4747 return self._parse_csv(self._parse_add_column) 4748 4749 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4750 self._match(TokenType.COLUMN) 4751 column = self._parse_field(any_token=True) 4752 4753 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4754 return self.expression(exp.AlterColumn, this=column, drop=True) 4755 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4756 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4757 4758 self._match_text_seq("SET", "DATA") 4759 return self.expression( 4760 exp.AlterColumn, 4761 this=column, 4762 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4763 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4764 using=self._match(TokenType.USING) and self._parse_conjunction(), 4765 ) 4766 4767 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4768 index = self._index - 1 4769 4770 partition_exists = self._parse_exists() 4771 if self._match(TokenType.PARTITION, advance=False): 4772 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4773 4774 self._retreat(index) 4775 return self._parse_csv(self._parse_drop_column) 4776 4777 def _parse_alter_table_rename(self) -> exp.RenameTable: 4778 self._match_text_seq("TO") 4779 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4780 4781 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4782 start = self._prev 4783 4784 if not self._match(TokenType.TABLE): 4785 return self._parse_as_command(start) 4786 4787 exists = self._parse_exists() 4788 only = self._match_text_seq("ONLY") 4789 this = self._parse_table(schema=True) 4790 4791 if self._next: 4792 self._advance() 4793 4794 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4795 if parser: 4796 actions = ensure_list(parser(self)) 4797 4798 if not self._curr: 4799 return self.expression( 4800 exp.AlterTable, 4801 this=this, 4802 exists=exists, 4803 actions=actions, 4804 only=only, 4805 ) 4806 4807 return self._parse_as_command(start) 4808 4809 def _parse_merge(self) -> exp.Merge: 4810 self._match(TokenType.INTO) 4811 target = self._parse_table() 4812 4813 if target and self._match(TokenType.ALIAS, advance=False): 4814 target.set("alias", self._parse_table_alias()) 4815 4816 self._match(TokenType.USING) 4817 using = self._parse_table() 4818 4819 self._match(TokenType.ON) 4820 on = self._parse_conjunction() 4821 4822 whens = [] 4823 while self._match(TokenType.WHEN): 4824 matched = not self._match(TokenType.NOT) 4825 self._match_text_seq("MATCHED") 4826 source = ( 4827 False 4828 if self._match_text_seq("BY", "TARGET") 4829 else self._match_text_seq("BY", "SOURCE") 4830 ) 4831 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4832 4833 self._match(TokenType.THEN) 4834 4835 if self._match(TokenType.INSERT): 4836 _this = self._parse_star() 4837 if _this: 4838 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4839 else: 4840 then = self.expression( 4841 exp.Insert, 4842 this=self._parse_value(), 4843 expression=self._match(TokenType.VALUES) and self._parse_value(), 4844 ) 4845 elif self._match(TokenType.UPDATE): 4846 expressions = self._parse_star() 4847 if expressions: 4848 then = self.expression(exp.Update, expressions=expressions) 4849 else: 4850 then = self.expression( 4851 exp.Update, 4852 expressions=self._match(TokenType.SET) 4853 and self._parse_csv(self._parse_equality), 4854 ) 4855 elif self._match(TokenType.DELETE): 4856 then = self.expression(exp.Var, this=self._prev.text) 4857 else: 4858 then = None 4859 4860 whens.append( 4861 self.expression( 4862 exp.When, 4863 matched=matched, 4864 source=source, 4865 condition=condition, 4866 then=then, 4867 ) 4868 ) 4869 4870 return self.expression( 4871 exp.Merge, 4872 this=target, 4873 using=using, 4874 on=on, 4875 expressions=whens, 4876 ) 4877 4878 def _parse_show(self) -> t.Optional[exp.Expression]: 4879 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4880 if parser: 4881 return parser(self) 4882 return self._parse_as_command(self._prev) 4883 4884 def _parse_set_item_assignment( 4885 self, kind: t.Optional[str] = None 4886 ) -> t.Optional[exp.Expression]: 4887 index = self._index 4888 4889 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4890 return self._parse_set_transaction(global_=kind == "GLOBAL") 4891 4892 left = self._parse_primary() or self._parse_id_var() 4893 4894 if not self._match_texts(("=", "TO")): 4895 self._retreat(index) 4896 return None 4897 4898 right = self._parse_statement() or self._parse_id_var() 4899 this = self.expression(exp.EQ, this=left, expression=right) 4900 4901 return self.expression(exp.SetItem, this=this, kind=kind) 4902 4903 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4904 self._match_text_seq("TRANSACTION") 4905 characteristics = self._parse_csv( 4906 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4907 ) 4908 return self.expression( 4909 exp.SetItem, 4910 expressions=characteristics, 4911 kind="TRANSACTION", 4912 **{"global": global_}, # type: ignore 4913 ) 4914 4915 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4916 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4917 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4918 4919 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4920 index = self._index 4921 set_ = self.expression( 4922 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4923 ) 4924 4925 if self._curr: 4926 self._retreat(index) 4927 return self._parse_as_command(self._prev) 4928 4929 return set_ 4930 4931 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4932 for option in options: 4933 if self._match_text_seq(*option.split(" ")): 4934 return exp.var(option) 4935 return None 4936 4937 def _parse_as_command(self, start: Token) -> exp.Command: 4938 while self._curr: 4939 self._advance() 4940 text = self._find_sql(start, self._prev) 4941 size = len(start.text) 4942 return exp.Command(this=text[:size], expression=text[size:]) 4943 4944 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4945 settings = [] 4946 4947 self._match_l_paren() 4948 kind = self._parse_id_var() 4949 4950 if self._match(TokenType.L_PAREN): 4951 while True: 4952 key = self._parse_id_var() 4953 value = self._parse_primary() 4954 4955 if not key and value is None: 4956 break 4957 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4958 self._match(TokenType.R_PAREN) 4959 4960 self._match_r_paren() 4961 4962 return self.expression( 4963 exp.DictProperty, 4964 this=this, 4965 kind=kind.this if kind else None, 4966 settings=settings, 4967 ) 4968 4969 def _parse_dict_range(self, this: str) -> exp.DictRange: 4970 self._match_l_paren() 4971 has_min = self._match_text_seq("MIN") 4972 if has_min: 4973 min = self._parse_var() or self._parse_primary() 4974 self._match_text_seq("MAX") 4975 max = self._parse_var() or self._parse_primary() 4976 else: 4977 max = self._parse_var() or self._parse_primary() 4978 min = exp.Literal.number(0) 4979 self._match_r_paren() 4980 return self.expression(exp.DictRange, this=this, min=min, max=max) 4981 4982 def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: 4983 expression = self._parse_column() 4984 self._match(TokenType.IN) 4985 iterator = self._parse_column() 4986 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 4987 return self.expression( 4988 exp.Comprehension, 4989 this=this, 4990 expression=expression, 4991 iterator=iterator, 4992 condition=condition, 4993 ) 4994 4995 def _find_parser( 4996 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4997 ) -> t.Optional[t.Callable]: 4998 if not self._curr: 4999 return None 5000 5001 index = self._index 5002 this = [] 5003 while True: 5004 # The current token might be multiple words 5005 curr = self._curr.text.upper() 5006 key = curr.split(" ") 5007 this.append(curr) 5008 5009 self._advance() 5010 result, trie = in_trie(trie, key) 5011 if result == TrieResult.FAILED: 5012 break 5013 5014 if result == TrieResult.EXISTS: 5015 subparser = parsers[" ".join(this)] 5016 return subparser 5017 5018 self._retreat(index) 5019 return None 5020 5021 def _match(self, token_type, advance=True, expression=None): 5022 if not self._curr: 5023 return None 5024 5025 if self._curr.token_type == token_type: 5026 if advance: 5027 self._advance() 5028 self._add_comments(expression) 5029 return True 5030 5031 return None 5032 5033 def _match_set(self, types, advance=True): 5034 if not self._curr: 5035 return None 5036 5037 if self._curr.token_type in types: 5038 if advance: 5039 self._advance() 5040 return True 5041 5042 return None 5043 5044 def _match_pair(self, token_type_a, token_type_b, advance=True): 5045 if not self._curr or not self._next: 5046 return None 5047 5048 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5049 if advance: 5050 self._advance(2) 5051 return True 5052 5053 return None 5054 5055 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5056 if not self._match(TokenType.L_PAREN, expression=expression): 5057 self.raise_error("Expecting (") 5058 5059 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5060 if not self._match(TokenType.R_PAREN, expression=expression): 5061 self.raise_error("Expecting )") 5062 5063 def _match_texts(self, texts, advance=True): 5064 if self._curr and self._curr.text.upper() in texts: 5065 if advance: 5066 self._advance() 5067 return True 5068 return False 5069 5070 def _match_text_seq(self, *texts, advance=True): 5071 index = self._index 5072 for text in texts: 5073 if self._curr and self._curr.text.upper() == text: 5074 self._advance() 5075 else: 5076 self._retreat(index) 5077 return False 5078 5079 if not advance: 5080 self._retreat(index) 5081 5082 return True 5083 5084 @t.overload 5085 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5086 ... 5087 5088 @t.overload 5089 def _replace_columns_with_dots( 5090 self, this: t.Optional[exp.Expression] 5091 ) -> t.Optional[exp.Expression]: 5092 ... 5093 5094 def _replace_columns_with_dots(self, this): 5095 if isinstance(this, exp.Dot): 5096 exp.replace_children(this, self._replace_columns_with_dots) 5097 elif isinstance(this, exp.Column): 5098 exp.replace_children(this, self._replace_columns_with_dots) 5099 table = this.args.get("table") 5100 this = ( 5101 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5102 ) 5103 5104 return this 5105 5106 def _replace_lambda( 5107 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5108 ) -> t.Optional[exp.Expression]: 5109 if not node: 5110 return node 5111 5112 for column in node.find_all(exp.Column): 5113 if column.parts[0].name in lambda_variables: 5114 dot_or_id = column.to_dot() if column.table else column.this 5115 parent = column.parent 5116 5117 while isinstance(parent, exp.Dot): 5118 if not isinstance(parent.parent, exp.Dot): 5119 parent.replace(dot_or_id) 5120 break 5121 parent = parent.parent 5122 else: 5123 if column is node: 5124 node = dot_or_id 5125 else: 5126 column.replace(dot_or_id) 5127 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TIME, 160 TokenType.TIMETZ, 161 TokenType.TIMESTAMP, 162 TokenType.TIMESTAMPTZ, 163 TokenType.TIMESTAMPLTZ, 164 TokenType.DATETIME, 165 TokenType.DATETIME64, 166 TokenType.DATE, 167 TokenType.INT4RANGE, 168 TokenType.INT4MULTIRANGE, 169 TokenType.INT8RANGE, 170 TokenType.INT8MULTIRANGE, 171 TokenType.NUMRANGE, 172 TokenType.NUMMULTIRANGE, 173 TokenType.TSRANGE, 174 TokenType.TSMULTIRANGE, 175 TokenType.TSTZRANGE, 176 TokenType.TSTZMULTIRANGE, 177 TokenType.DATERANGE, 178 TokenType.DATEMULTIRANGE, 179 TokenType.DECIMAL, 180 TokenType.BIGDECIMAL, 181 TokenType.UUID, 182 TokenType.GEOGRAPHY, 183 TokenType.GEOMETRY, 184 TokenType.HLLSKETCH, 185 TokenType.HSTORE, 186 TokenType.PSEUDO_TYPE, 187 TokenType.SUPER, 188 TokenType.SERIAL, 189 TokenType.SMALLSERIAL, 190 TokenType.BIGSERIAL, 191 TokenType.XML, 192 TokenType.YEAR, 193 TokenType.UNIQUEIDENTIFIER, 194 TokenType.USERDEFINED, 195 TokenType.MONEY, 196 TokenType.SMALLMONEY, 197 TokenType.ROWVERSION, 198 TokenType.IMAGE, 199 TokenType.VARIANT, 200 TokenType.OBJECT, 201 TokenType.OBJECT_IDENTIFIER, 202 TokenType.INET, 203 TokenType.IPADDRESS, 204 TokenType.IPPREFIX, 205 TokenType.UNKNOWN, 206 TokenType.NULL, 207 *ENUM_TYPE_TOKENS, 208 *NESTED_TYPE_TOKENS, 209 } 210 211 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 212 TokenType.BIGINT: TokenType.UBIGINT, 213 TokenType.INT: TokenType.UINT, 214 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 215 TokenType.SMALLINT: TokenType.USMALLINT, 216 TokenType.TINYINT: TokenType.UTINYINT, 217 } 218 219 SUBQUERY_PREDICATES = { 220 TokenType.ANY: exp.Any, 221 TokenType.ALL: exp.All, 222 TokenType.EXISTS: exp.Exists, 223 TokenType.SOME: exp.Any, 224 } 225 226 RESERVED_KEYWORDS = { 227 *Tokenizer.SINGLE_TOKENS.values(), 228 TokenType.SELECT, 229 } 230 231 DB_CREATABLES = { 232 TokenType.DATABASE, 233 TokenType.SCHEMA, 234 TokenType.TABLE, 235 TokenType.VIEW, 236 TokenType.DICTIONARY, 237 } 238 239 CREATABLES = { 240 TokenType.COLUMN, 241 TokenType.FUNCTION, 242 TokenType.INDEX, 243 TokenType.PROCEDURE, 244 *DB_CREATABLES, 245 } 246 247 # Tokens that can represent identifiers 248 ID_VAR_TOKENS = { 249 TokenType.VAR, 250 TokenType.ANTI, 251 TokenType.APPLY, 252 TokenType.ASC, 253 TokenType.AUTO_INCREMENT, 254 TokenType.BEGIN, 255 TokenType.CACHE, 256 TokenType.CASE, 257 TokenType.COLLATE, 258 TokenType.COMMAND, 259 TokenType.COMMENT, 260 TokenType.COMMIT, 261 TokenType.CONSTRAINT, 262 TokenType.DEFAULT, 263 TokenType.DELETE, 264 TokenType.DESC, 265 TokenType.DESCRIBE, 266 TokenType.DICTIONARY, 267 TokenType.DIV, 268 TokenType.END, 269 TokenType.EXECUTE, 270 TokenType.ESCAPE, 271 TokenType.FALSE, 272 TokenType.FIRST, 273 TokenType.FILTER, 274 TokenType.FORMAT, 275 TokenType.FULL, 276 TokenType.IS, 277 TokenType.ISNULL, 278 TokenType.INTERVAL, 279 TokenType.KEEP, 280 TokenType.LEFT, 281 TokenType.LOAD, 282 TokenType.MERGE, 283 TokenType.NATURAL, 284 TokenType.NEXT, 285 TokenType.OFFSET, 286 TokenType.ORDINALITY, 287 TokenType.OVERWRITE, 288 TokenType.PARTITION, 289 TokenType.PERCENT, 290 TokenType.PIVOT, 291 TokenType.PRAGMA, 292 TokenType.RANGE, 293 TokenType.REFERENCES, 294 TokenType.RIGHT, 295 TokenType.ROW, 296 TokenType.ROWS, 297 TokenType.SEMI, 298 TokenType.SET, 299 TokenType.SETTINGS, 300 TokenType.SHOW, 301 TokenType.TEMPORARY, 302 TokenType.TOP, 303 TokenType.TRUE, 304 TokenType.UNIQUE, 305 TokenType.UNPIVOT, 306 TokenType.UPDATE, 307 TokenType.VOLATILE, 308 TokenType.WINDOW, 309 *CREATABLES, 310 *SUBQUERY_PREDICATES, 311 *TYPE_TOKENS, 312 *NO_PAREN_FUNCTIONS, 313 } 314 315 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 316 317 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 318 TokenType.APPLY, 319 TokenType.ASOF, 320 TokenType.FULL, 321 TokenType.LEFT, 322 TokenType.LOCK, 323 TokenType.NATURAL, 324 TokenType.OFFSET, 325 TokenType.RIGHT, 326 TokenType.WINDOW, 327 } 328 329 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 330 331 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 332 333 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 334 335 FUNC_TOKENS = { 336 TokenType.COMMAND, 337 TokenType.CURRENT_DATE, 338 TokenType.CURRENT_DATETIME, 339 TokenType.CURRENT_TIMESTAMP, 340 TokenType.CURRENT_TIME, 341 TokenType.CURRENT_USER, 342 TokenType.FILTER, 343 TokenType.FIRST, 344 TokenType.FORMAT, 345 TokenType.GLOB, 346 TokenType.IDENTIFIER, 347 TokenType.INDEX, 348 TokenType.ISNULL, 349 TokenType.ILIKE, 350 TokenType.INSERT, 351 TokenType.LIKE, 352 TokenType.MERGE, 353 TokenType.OFFSET, 354 TokenType.PRIMARY_KEY, 355 TokenType.RANGE, 356 TokenType.REPLACE, 357 TokenType.RLIKE, 358 TokenType.ROW, 359 TokenType.UNNEST, 360 TokenType.VAR, 361 TokenType.LEFT, 362 TokenType.RIGHT, 363 TokenType.DATE, 364 TokenType.DATETIME, 365 TokenType.TABLE, 366 TokenType.TIMESTAMP, 367 TokenType.TIMESTAMPTZ, 368 TokenType.WINDOW, 369 TokenType.XOR, 370 *TYPE_TOKENS, 371 *SUBQUERY_PREDICATES, 372 } 373 374 CONJUNCTION = { 375 TokenType.AND: exp.And, 376 TokenType.OR: exp.Or, 377 } 378 379 EQUALITY = { 380 TokenType.EQ: exp.EQ, 381 TokenType.NEQ: exp.NEQ, 382 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 383 } 384 385 COMPARISON = { 386 TokenType.GT: exp.GT, 387 TokenType.GTE: exp.GTE, 388 TokenType.LT: exp.LT, 389 TokenType.LTE: exp.LTE, 390 } 391 392 BITWISE = { 393 TokenType.AMP: exp.BitwiseAnd, 394 TokenType.CARET: exp.BitwiseXor, 395 TokenType.PIPE: exp.BitwiseOr, 396 TokenType.DPIPE: exp.DPipe, 397 } 398 399 TERM = { 400 TokenType.DASH: exp.Sub, 401 TokenType.PLUS: exp.Add, 402 TokenType.MOD: exp.Mod, 403 TokenType.COLLATE: exp.Collate, 404 } 405 406 FACTOR = { 407 TokenType.DIV: exp.IntDiv, 408 TokenType.LR_ARROW: exp.Distance, 409 TokenType.SLASH: exp.Div, 410 TokenType.STAR: exp.Mul, 411 } 412 413 TIMES = { 414 TokenType.TIME, 415 TokenType.TIMETZ, 416 } 417 418 TIMESTAMPS = { 419 TokenType.TIMESTAMP, 420 TokenType.TIMESTAMPTZ, 421 TokenType.TIMESTAMPLTZ, 422 *TIMES, 423 } 424 425 SET_OPERATIONS = { 426 TokenType.UNION, 427 TokenType.INTERSECT, 428 TokenType.EXCEPT, 429 } 430 431 JOIN_METHODS = { 432 TokenType.NATURAL, 433 TokenType.ASOF, 434 } 435 436 JOIN_SIDES = { 437 TokenType.LEFT, 438 TokenType.RIGHT, 439 TokenType.FULL, 440 } 441 442 JOIN_KINDS = { 443 TokenType.INNER, 444 TokenType.OUTER, 445 TokenType.CROSS, 446 TokenType.SEMI, 447 TokenType.ANTI, 448 } 449 450 JOIN_HINTS: t.Set[str] = set() 451 452 LAMBDAS = { 453 TokenType.ARROW: lambda self, expressions: self.expression( 454 exp.Lambda, 455 this=self._replace_lambda( 456 self._parse_conjunction(), 457 {node.name for node in expressions}, 458 ), 459 expressions=expressions, 460 ), 461 TokenType.FARROW: lambda self, expressions: self.expression( 462 exp.Kwarg, 463 this=exp.var(expressions[0].name), 464 expression=self._parse_conjunction(), 465 ), 466 } 467 468 COLUMN_OPERATORS = { 469 TokenType.DOT: None, 470 TokenType.DCOLON: lambda self, this, to: self.expression( 471 exp.Cast if self.STRICT_CAST else exp.TryCast, 472 this=this, 473 to=to, 474 ), 475 TokenType.ARROW: lambda self, this, path: self.expression( 476 exp.JSONExtract, 477 this=this, 478 expression=path, 479 ), 480 TokenType.DARROW: lambda self, this, path: self.expression( 481 exp.JSONExtractScalar, 482 this=this, 483 expression=path, 484 ), 485 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 486 exp.JSONBExtract, 487 this=this, 488 expression=path, 489 ), 490 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 491 exp.JSONBExtractScalar, 492 this=this, 493 expression=path, 494 ), 495 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 496 exp.JSONBContains, 497 this=this, 498 expression=key, 499 ), 500 } 501 502 EXPRESSION_PARSERS = { 503 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 504 exp.Column: lambda self: self._parse_column(), 505 exp.Condition: lambda self: self._parse_conjunction(), 506 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 507 exp.Expression: lambda self: self._parse_statement(), 508 exp.From: lambda self: self._parse_from(), 509 exp.Group: lambda self: self._parse_group(), 510 exp.Having: lambda self: self._parse_having(), 511 exp.Identifier: lambda self: self._parse_id_var(), 512 exp.Join: lambda self: self._parse_join(), 513 exp.Lambda: lambda self: self._parse_lambda(), 514 exp.Lateral: lambda self: self._parse_lateral(), 515 exp.Limit: lambda self: self._parse_limit(), 516 exp.Offset: lambda self: self._parse_offset(), 517 exp.Order: lambda self: self._parse_order(), 518 exp.Ordered: lambda self: self._parse_ordered(), 519 exp.Properties: lambda self: self._parse_properties(), 520 exp.Qualify: lambda self: self._parse_qualify(), 521 exp.Returning: lambda self: self._parse_returning(), 522 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 523 exp.Table: lambda self: self._parse_table_parts(), 524 exp.TableAlias: lambda self: self._parse_table_alias(), 525 exp.Where: lambda self: self._parse_where(), 526 exp.Window: lambda self: self._parse_named_window(), 527 exp.With: lambda self: self._parse_with(), 528 "JOIN_TYPE": lambda self: self._parse_join_parts(), 529 } 530 531 STATEMENT_PARSERS = { 532 TokenType.ALTER: lambda self: self._parse_alter(), 533 TokenType.BEGIN: lambda self: self._parse_transaction(), 534 TokenType.CACHE: lambda self: self._parse_cache(), 535 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 536 TokenType.COMMENT: lambda self: self._parse_comment(), 537 TokenType.CREATE: lambda self: self._parse_create(), 538 TokenType.DELETE: lambda self: self._parse_delete(), 539 TokenType.DESC: lambda self: self._parse_describe(), 540 TokenType.DESCRIBE: lambda self: self._parse_describe(), 541 TokenType.DROP: lambda self: self._parse_drop(), 542 TokenType.INSERT: lambda self: self._parse_insert(), 543 TokenType.LOAD: lambda self: self._parse_load(), 544 TokenType.MERGE: lambda self: self._parse_merge(), 545 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 546 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 547 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 548 TokenType.SET: lambda self: self._parse_set(), 549 TokenType.UNCACHE: lambda self: self._parse_uncache(), 550 TokenType.UPDATE: lambda self: self._parse_update(), 551 TokenType.USE: lambda self: self.expression( 552 exp.Use, 553 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 554 and exp.var(self._prev.text), 555 this=self._parse_table(schema=False), 556 ), 557 } 558 559 UNARY_PARSERS = { 560 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 561 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 562 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 563 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 564 } 565 566 PRIMARY_PARSERS = { 567 TokenType.STRING: lambda self, token: self.expression( 568 exp.Literal, this=token.text, is_string=True 569 ), 570 TokenType.NUMBER: lambda self, token: self.expression( 571 exp.Literal, this=token.text, is_string=False 572 ), 573 TokenType.STAR: lambda self, _: self.expression( 574 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 575 ), 576 TokenType.NULL: lambda self, _: self.expression(exp.Null), 577 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 578 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 579 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 580 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 581 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 582 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 583 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 584 exp.National, this=token.text 585 ), 586 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 587 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 588 } 589 590 PLACEHOLDER_PARSERS = { 591 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 592 TokenType.PARAMETER: lambda self: self._parse_parameter(), 593 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 594 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 595 else None, 596 } 597 598 RANGE_PARSERS = { 599 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 600 TokenType.GLOB: binary_range_parser(exp.Glob), 601 TokenType.ILIKE: binary_range_parser(exp.ILike), 602 TokenType.IN: lambda self, this: self._parse_in(this), 603 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 604 TokenType.IS: lambda self, this: self._parse_is(this), 605 TokenType.LIKE: binary_range_parser(exp.Like), 606 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 607 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 608 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 609 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 610 } 611 612 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 613 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 614 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 615 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 616 "CHARACTER SET": lambda self: self._parse_character_set(), 617 "CHECKSUM": lambda self: self._parse_checksum(), 618 "CLUSTER BY": lambda self: self._parse_cluster(), 619 "CLUSTERED": lambda self: self._parse_clustered_by(), 620 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 621 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 622 "COPY": lambda self: self._parse_copy_property(), 623 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 624 "DEFINER": lambda self: self._parse_definer(), 625 "DETERMINISTIC": lambda self: self.expression( 626 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 627 ), 628 "DISTKEY": lambda self: self._parse_distkey(), 629 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 630 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 631 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 632 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 633 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 634 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 635 "FREESPACE": lambda self: self._parse_freespace(), 636 "HEAP": lambda self: self.expression(exp.HeapProperty), 637 "IMMUTABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 639 ), 640 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 641 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 642 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 643 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 644 "LIKE": lambda self: self._parse_create_like(), 645 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 646 "LOCK": lambda self: self._parse_locking(), 647 "LOCKING": lambda self: self._parse_locking(), 648 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 649 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 650 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 651 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 652 "NO": lambda self: self._parse_no_property(), 653 "ON": lambda self: self._parse_on_property(), 654 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 655 "PARTITION BY": lambda self: self._parse_partitioned_by(), 656 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 657 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 658 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 659 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 660 "RETURNS": lambda self: self._parse_returns(), 661 "ROW": lambda self: self._parse_row(), 662 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 663 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 664 "SETTINGS": lambda self: self.expression( 665 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 666 ), 667 "SORTKEY": lambda self: self._parse_sortkey(), 668 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 669 "STABLE": lambda self: self.expression( 670 exp.StabilityProperty, this=exp.Literal.string("STABLE") 671 ), 672 "STORED": lambda self: self._parse_stored(), 673 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 674 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 675 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 676 "TO": lambda self: self._parse_to_table(), 677 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 678 "TTL": lambda self: self._parse_ttl(), 679 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 680 "VOLATILE": lambda self: self._parse_volatile_property(), 681 "WITH": lambda self: self._parse_with_property(), 682 } 683 684 CONSTRAINT_PARSERS = { 685 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 686 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 687 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 688 "CHARACTER SET": lambda self: self.expression( 689 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "CHECK": lambda self: self.expression( 692 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 693 ), 694 "COLLATE": lambda self: self.expression( 695 exp.CollateColumnConstraint, this=self._parse_var() 696 ), 697 "COMMENT": lambda self: self.expression( 698 exp.CommentColumnConstraint, this=self._parse_string() 699 ), 700 "COMPRESS": lambda self: self._parse_compress(), 701 "CLUSTERED": lambda self: self.expression( 702 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 703 ), 704 "NONCLUSTERED": lambda self: self.expression( 705 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 706 ), 707 "DEFAULT": lambda self: self.expression( 708 exp.DefaultColumnConstraint, this=self._parse_bitwise() 709 ), 710 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 711 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 712 "FORMAT": lambda self: self.expression( 713 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 714 ), 715 "GENERATED": lambda self: self._parse_generated_as_identity(), 716 "IDENTITY": lambda self: self._parse_auto_increment(), 717 "INLINE": lambda self: self._parse_inline(), 718 "LIKE": lambda self: self._parse_create_like(), 719 "NOT": lambda self: self._parse_not_constraint(), 720 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 721 "ON": lambda self: ( 722 self._match(TokenType.UPDATE) 723 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 724 ) 725 or self.expression(exp.OnProperty, this=self._parse_id_var()), 726 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 727 "PRIMARY KEY": lambda self: self._parse_primary_key(), 728 "REFERENCES": lambda self: self._parse_references(match=False), 729 "TITLE": lambda self: self.expression( 730 exp.TitleColumnConstraint, this=self._parse_var_or_string() 731 ), 732 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 733 "UNIQUE": lambda self: self._parse_unique(), 734 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 735 "WITH": lambda self: self.expression( 736 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 737 ), 738 } 739 740 ALTER_PARSERS = { 741 "ADD": lambda self: self._parse_alter_table_add(), 742 "ALTER": lambda self: self._parse_alter_table_alter(), 743 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 744 "DROP": lambda self: self._parse_alter_table_drop(), 745 "RENAME": lambda self: self._parse_alter_table_rename(), 746 } 747 748 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 749 750 NO_PAREN_FUNCTION_PARSERS = { 751 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 752 "CASE": lambda self: self._parse_case(), 753 "IF": lambda self: self._parse_if(), 754 "NEXT": lambda self: self._parse_next_value_for(), 755 } 756 757 INVALID_FUNC_NAME_TOKENS = { 758 TokenType.IDENTIFIER, 759 TokenType.STRING, 760 } 761 762 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 763 764 FUNCTION_PARSERS = { 765 "ANY_VALUE": lambda self: self._parse_any_value(), 766 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 767 "CONCAT": lambda self: self._parse_concat(), 768 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 769 "DECODE": lambda self: self._parse_decode(), 770 "EXTRACT": lambda self: self._parse_extract(), 771 "JSON_OBJECT": lambda self: self._parse_json_object(), 772 "LOG": lambda self: self._parse_logarithm(), 773 "MATCH": lambda self: self._parse_match_against(), 774 "OPENJSON": lambda self: self._parse_open_json(), 775 "POSITION": lambda self: self._parse_position(), 776 "SAFE_CAST": lambda self: self._parse_cast(False), 777 "STRING_AGG": lambda self: self._parse_string_agg(), 778 "SUBSTRING": lambda self: self._parse_substring(), 779 "TRIM": lambda self: self._parse_trim(), 780 "TRY_CAST": lambda self: self._parse_cast(False), 781 "TRY_CONVERT": lambda self: self._parse_convert(False), 782 } 783 784 QUERY_MODIFIER_PARSERS = { 785 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 786 TokenType.WHERE: lambda self: ("where", self._parse_where()), 787 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 788 TokenType.HAVING: lambda self: ("having", self._parse_having()), 789 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 790 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 791 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 792 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 793 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 794 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 795 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 796 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 797 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 798 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 799 TokenType.CLUSTER_BY: lambda self: ( 800 "cluster", 801 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 802 ), 803 TokenType.DISTRIBUTE_BY: lambda self: ( 804 "distribute", 805 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 806 ), 807 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 808 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 809 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 810 } 811 812 SET_PARSERS = { 813 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 814 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 815 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 816 "TRANSACTION": lambda self: self._parse_set_transaction(), 817 } 818 819 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 820 821 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 822 823 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 824 825 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 826 827 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 828 829 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 830 TRANSACTION_CHARACTERISTICS = { 831 "ISOLATION LEVEL REPEATABLE READ", 832 "ISOLATION LEVEL READ COMMITTED", 833 "ISOLATION LEVEL READ UNCOMMITTED", 834 "ISOLATION LEVEL SERIALIZABLE", 835 "READ WRITE", 836 "READ ONLY", 837 } 838 839 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 840 841 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 842 843 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 844 845 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 846 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 847 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 848 849 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 850 851 DISTINCT_TOKENS = {TokenType.DISTINCT} 852 853 STRICT_CAST = True 854 855 # A NULL arg in CONCAT yields NULL by default 856 CONCAT_NULL_OUTPUTS_STRING = False 857 858 PREFIXED_PIVOT_COLUMNS = False 859 IDENTIFY_PIVOT_STRINGS = False 860 861 LOG_BASE_FIRST = True 862 LOG_DEFAULTS_TO_LN = False 863 864 SUPPORTS_USER_DEFINED_TYPES = True 865 866 # Whether or not ADD is present for each column added by ALTER TABLE 867 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 868 869 # Whether or not the table sample clause expects CSV syntax 870 TABLESAMPLE_CSV = False 871 872 __slots__ = ( 873 "error_level", 874 "error_message_context", 875 "max_errors", 876 "sql", 877 "errors", 878 "_tokens", 879 "_index", 880 "_curr", 881 "_next", 882 "_prev", 883 "_prev_comments", 884 "_tokenizer", 885 ) 886 887 # Autofilled 888 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 889 INDEX_OFFSET: int = 0 890 UNNEST_COLUMN_ONLY: bool = False 891 ALIAS_POST_TABLESAMPLE: bool = False 892 STRICT_STRING_CONCAT = False 893 NORMALIZE_FUNCTIONS = "upper" 894 NULL_ORDERING: str = "nulls_are_small" 895 SHOW_TRIE: t.Dict = {} 896 SET_TRIE: t.Dict = {} 897 FORMAT_MAPPING: t.Dict[str, str] = {} 898 FORMAT_TRIE: t.Dict = {} 899 TIME_MAPPING: t.Dict[str, str] = {} 900 TIME_TRIE: t.Dict = {} 901 902 def __init__( 903 self, 904 error_level: t.Optional[ErrorLevel] = None, 905 error_message_context: int = 100, 906 max_errors: int = 3, 907 ): 908 self.error_level = error_level or ErrorLevel.IMMEDIATE 909 self.error_message_context = error_message_context 910 self.max_errors = max_errors 911 self._tokenizer = self.TOKENIZER_CLASS() 912 self.reset() 913 914 def reset(self): 915 self.sql = "" 916 self.errors = [] 917 self._tokens = [] 918 self._index = 0 919 self._curr = None 920 self._next = None 921 self._prev = None 922 self._prev_comments = None 923 924 def parse( 925 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 926 ) -> t.List[t.Optional[exp.Expression]]: 927 """ 928 Parses a list of tokens and returns a list of syntax trees, one tree 929 per parsed SQL statement. 930 931 Args: 932 raw_tokens: The list of tokens. 933 sql: The original SQL string, used to produce helpful debug messages. 934 935 Returns: 936 The list of the produced syntax trees. 937 """ 938 return self._parse( 939 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 940 ) 941 942 def parse_into( 943 self, 944 expression_types: exp.IntoType, 945 raw_tokens: t.List[Token], 946 sql: t.Optional[str] = None, 947 ) -> t.List[t.Optional[exp.Expression]]: 948 """ 949 Parses a list of tokens into a given Expression type. If a collection of Expression 950 types is given instead, this method will try to parse the token list into each one 951 of them, stopping at the first for which the parsing succeeds. 952 953 Args: 954 expression_types: The expression type(s) to try and parse the token list into. 955 raw_tokens: The list of tokens. 956 sql: The original SQL string, used to produce helpful debug messages. 957 958 Returns: 959 The target Expression. 960 """ 961 errors = [] 962 for expression_type in ensure_list(expression_types): 963 parser = self.EXPRESSION_PARSERS.get(expression_type) 964 if not parser: 965 raise TypeError(f"No parser registered for {expression_type}") 966 967 try: 968 return self._parse(parser, raw_tokens, sql) 969 except ParseError as e: 970 e.errors[0]["into_expression"] = expression_type 971 errors.append(e) 972 973 raise ParseError( 974 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 975 errors=merge_errors(errors), 976 ) from errors[-1] 977 978 def _parse( 979 self, 980 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 981 raw_tokens: t.List[Token], 982 sql: t.Optional[str] = None, 983 ) -> t.List[t.Optional[exp.Expression]]: 984 self.reset() 985 self.sql = sql or "" 986 987 total = len(raw_tokens) 988 chunks: t.List[t.List[Token]] = [[]] 989 990 for i, token in enumerate(raw_tokens): 991 if token.token_type == TokenType.SEMICOLON: 992 if i < total - 1: 993 chunks.append([]) 994 else: 995 chunks[-1].append(token) 996 997 expressions = [] 998 999 for tokens in chunks: 1000 self._index = -1 1001 self._tokens = tokens 1002 self._advance() 1003 1004 expressions.append(parse_method(self)) 1005 1006 if self._index < len(self._tokens): 1007 self.raise_error("Invalid expression / Unexpected token") 1008 1009 self.check_errors() 1010 1011 return expressions 1012 1013 def check_errors(self) -> None: 1014 """Logs or raises any found errors, depending on the chosen error level setting.""" 1015 if self.error_level == ErrorLevel.WARN: 1016 for error in self.errors: 1017 logger.error(str(error)) 1018 elif self.error_level == ErrorLevel.RAISE and self.errors: 1019 raise ParseError( 1020 concat_messages(self.errors, self.max_errors), 1021 errors=merge_errors(self.errors), 1022 ) 1023 1024 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1025 """ 1026 Appends an error in the list of recorded errors or raises it, depending on the chosen 1027 error level setting. 1028 """ 1029 token = token or self._curr or self._prev or Token.string("") 1030 start = token.start 1031 end = token.end + 1 1032 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1033 highlight = self.sql[start:end] 1034 end_context = self.sql[end : end + self.error_message_context] 1035 1036 error = ParseError.new( 1037 f"{message}. Line {token.line}, Col: {token.col}.\n" 1038 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1039 description=message, 1040 line=token.line, 1041 col=token.col, 1042 start_context=start_context, 1043 highlight=highlight, 1044 end_context=end_context, 1045 ) 1046 1047 if self.error_level == ErrorLevel.IMMEDIATE: 1048 raise error 1049 1050 self.errors.append(error) 1051 1052 def expression( 1053 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1054 ) -> E: 1055 """ 1056 Creates a new, validated Expression. 1057 1058 Args: 1059 exp_class: The expression class to instantiate. 1060 comments: An optional list of comments to attach to the expression. 1061 kwargs: The arguments to set for the expression along with their respective values. 1062 1063 Returns: 1064 The target expression. 1065 """ 1066 instance = exp_class(**kwargs) 1067 instance.add_comments(comments) if comments else self._add_comments(instance) 1068 return self.validate_expression(instance) 1069 1070 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1071 if expression and self._prev_comments: 1072 expression.add_comments(self._prev_comments) 1073 self._prev_comments = None 1074 1075 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1076 """ 1077 Validates an Expression, making sure that all its mandatory arguments are set. 1078 1079 Args: 1080 expression: The expression to validate. 1081 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1082 1083 Returns: 1084 The validated expression. 1085 """ 1086 if self.error_level != ErrorLevel.IGNORE: 1087 for error_message in expression.error_messages(args): 1088 self.raise_error(error_message) 1089 1090 return expression 1091 1092 def _find_sql(self, start: Token, end: Token) -> str: 1093 return self.sql[start.start : end.end + 1] 1094 1095 def _advance(self, times: int = 1) -> None: 1096 self._index += times 1097 self._curr = seq_get(self._tokens, self._index) 1098 self._next = seq_get(self._tokens, self._index + 1) 1099 1100 if self._index > 0: 1101 self._prev = self._tokens[self._index - 1] 1102 self._prev_comments = self._prev.comments 1103 else: 1104 self._prev = None 1105 self._prev_comments = None 1106 1107 def _retreat(self, index: int) -> None: 1108 if index != self._index: 1109 self._advance(index - self._index) 1110 1111 def _parse_command(self) -> exp.Command: 1112 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1113 1114 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1115 start = self._prev 1116 exists = self._parse_exists() if allow_exists else None 1117 1118 self._match(TokenType.ON) 1119 1120 kind = self._match_set(self.CREATABLES) and self._prev 1121 if not kind: 1122 return self._parse_as_command(start) 1123 1124 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1125 this = self._parse_user_defined_function(kind=kind.token_type) 1126 elif kind.token_type == TokenType.TABLE: 1127 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1128 elif kind.token_type == TokenType.COLUMN: 1129 this = self._parse_column() 1130 else: 1131 this = self._parse_id_var() 1132 1133 self._match(TokenType.IS) 1134 1135 return self.expression( 1136 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1137 ) 1138 1139 def _parse_to_table( 1140 self, 1141 ) -> exp.ToTableProperty: 1142 table = self._parse_table_parts(schema=True) 1143 return self.expression(exp.ToTableProperty, this=table) 1144 1145 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1146 def _parse_ttl(self) -> exp.Expression: 1147 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1148 this = self._parse_bitwise() 1149 1150 if self._match_text_seq("DELETE"): 1151 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1152 if self._match_text_seq("RECOMPRESS"): 1153 return self.expression( 1154 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1155 ) 1156 if self._match_text_seq("TO", "DISK"): 1157 return self.expression( 1158 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1159 ) 1160 if self._match_text_seq("TO", "VOLUME"): 1161 return self.expression( 1162 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1163 ) 1164 1165 return this 1166 1167 expressions = self._parse_csv(_parse_ttl_action) 1168 where = self._parse_where() 1169 group = self._parse_group() 1170 1171 aggregates = None 1172 if group and self._match(TokenType.SET): 1173 aggregates = self._parse_csv(self._parse_set_item) 1174 1175 return self.expression( 1176 exp.MergeTreeTTL, 1177 expressions=expressions, 1178 where=where, 1179 group=group, 1180 aggregates=aggregates, 1181 ) 1182 1183 def _parse_statement(self) -> t.Optional[exp.Expression]: 1184 if self._curr is None: 1185 return None 1186 1187 if self._match_set(self.STATEMENT_PARSERS): 1188 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1189 1190 if self._match_set(Tokenizer.COMMANDS): 1191 return self._parse_command() 1192 1193 expression = self._parse_expression() 1194 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1195 return self._parse_query_modifiers(expression) 1196 1197 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1198 start = self._prev 1199 temporary = self._match(TokenType.TEMPORARY) 1200 materialized = self._match_text_seq("MATERIALIZED") 1201 1202 kind = self._match_set(self.CREATABLES) and self._prev.text 1203 if not kind: 1204 return self._parse_as_command(start) 1205 1206 return self.expression( 1207 exp.Drop, 1208 comments=start.comments, 1209 exists=exists or self._parse_exists(), 1210 this=self._parse_table(schema=True), 1211 kind=kind, 1212 temporary=temporary, 1213 materialized=materialized, 1214 cascade=self._match_text_seq("CASCADE"), 1215 constraints=self._match_text_seq("CONSTRAINTS"), 1216 purge=self._match_text_seq("PURGE"), 1217 ) 1218 1219 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1220 return ( 1221 self._match_text_seq("IF") 1222 and (not not_ or self._match(TokenType.NOT)) 1223 and self._match(TokenType.EXISTS) 1224 ) 1225 1226 def _parse_create(self) -> exp.Create | exp.Command: 1227 # Note: this can't be None because we've matched a statement parser 1228 start = self._prev 1229 comments = self._prev_comments 1230 1231 replace = start.text.upper() == "REPLACE" or self._match_pair( 1232 TokenType.OR, TokenType.REPLACE 1233 ) 1234 unique = self._match(TokenType.UNIQUE) 1235 1236 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1237 self._advance() 1238 1239 properties = None 1240 create_token = self._match_set(self.CREATABLES) and self._prev 1241 1242 if not create_token: 1243 # exp.Properties.Location.POST_CREATE 1244 properties = self._parse_properties() 1245 create_token = self._match_set(self.CREATABLES) and self._prev 1246 1247 if not properties or not create_token: 1248 return self._parse_as_command(start) 1249 1250 exists = self._parse_exists(not_=True) 1251 this = None 1252 expression: t.Optional[exp.Expression] = None 1253 indexes = None 1254 no_schema_binding = None 1255 begin = None 1256 clone = None 1257 1258 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1259 nonlocal properties 1260 if properties and temp_props: 1261 properties.expressions.extend(temp_props.expressions) 1262 elif temp_props: 1263 properties = temp_props 1264 1265 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1266 this = self._parse_user_defined_function(kind=create_token.token_type) 1267 1268 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1269 extend_props(self._parse_properties()) 1270 1271 self._match(TokenType.ALIAS) 1272 1273 if self._match(TokenType.COMMAND): 1274 expression = self._parse_as_command(self._prev) 1275 else: 1276 begin = self._match(TokenType.BEGIN) 1277 return_ = self._match_text_seq("RETURN") 1278 expression = self._parse_statement() 1279 1280 if return_: 1281 expression = self.expression(exp.Return, this=expression) 1282 elif create_token.token_type == TokenType.INDEX: 1283 this = self._parse_index(index=self._parse_id_var()) 1284 elif create_token.token_type in self.DB_CREATABLES: 1285 table_parts = self._parse_table_parts(schema=True) 1286 1287 # exp.Properties.Location.POST_NAME 1288 self._match(TokenType.COMMA) 1289 extend_props(self._parse_properties(before=True)) 1290 1291 this = self._parse_schema(this=table_parts) 1292 1293 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1294 extend_props(self._parse_properties()) 1295 1296 self._match(TokenType.ALIAS) 1297 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1298 # exp.Properties.Location.POST_ALIAS 1299 extend_props(self._parse_properties()) 1300 1301 expression = self._parse_ddl_select() 1302 1303 if create_token.token_type == TokenType.TABLE: 1304 # exp.Properties.Location.POST_EXPRESSION 1305 extend_props(self._parse_properties()) 1306 1307 indexes = [] 1308 while True: 1309 index = self._parse_index() 1310 1311 # exp.Properties.Location.POST_INDEX 1312 extend_props(self._parse_properties()) 1313 1314 if not index: 1315 break 1316 else: 1317 self._match(TokenType.COMMA) 1318 indexes.append(index) 1319 elif create_token.token_type == TokenType.VIEW: 1320 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1321 no_schema_binding = True 1322 1323 shallow = self._match_text_seq("SHALLOW") 1324 1325 if self._match_text_seq("CLONE"): 1326 clone = self._parse_table(schema=True) 1327 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1328 clone_kind = ( 1329 self._match(TokenType.L_PAREN) 1330 and self._match_texts(self.CLONE_KINDS) 1331 and self._prev.text.upper() 1332 ) 1333 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1334 self._match(TokenType.R_PAREN) 1335 clone = self.expression( 1336 exp.Clone, 1337 this=clone, 1338 when=when, 1339 kind=clone_kind, 1340 shallow=shallow, 1341 expression=clone_expression, 1342 ) 1343 1344 return self.expression( 1345 exp.Create, 1346 comments=comments, 1347 this=this, 1348 kind=create_token.text, 1349 replace=replace, 1350 unique=unique, 1351 expression=expression, 1352 exists=exists, 1353 properties=properties, 1354 indexes=indexes, 1355 no_schema_binding=no_schema_binding, 1356 begin=begin, 1357 clone=clone, 1358 ) 1359 1360 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1361 # only used for teradata currently 1362 self._match(TokenType.COMMA) 1363 1364 kwargs = { 1365 "no": self._match_text_seq("NO"), 1366 "dual": self._match_text_seq("DUAL"), 1367 "before": self._match_text_seq("BEFORE"), 1368 "default": self._match_text_seq("DEFAULT"), 1369 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1370 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1371 "after": self._match_text_seq("AFTER"), 1372 "minimum": self._match_texts(("MIN", "MINIMUM")), 1373 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1374 } 1375 1376 if self._match_texts(self.PROPERTY_PARSERS): 1377 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1378 try: 1379 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1380 except TypeError: 1381 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1382 1383 return None 1384 1385 def _parse_property(self) -> t.Optional[exp.Expression]: 1386 if self._match_texts(self.PROPERTY_PARSERS): 1387 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1388 1389 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1390 return self._parse_character_set(default=True) 1391 1392 if self._match_text_seq("COMPOUND", "SORTKEY"): 1393 return self._parse_sortkey(compound=True) 1394 1395 if self._match_text_seq("SQL", "SECURITY"): 1396 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1397 1398 assignment = self._match_pair( 1399 TokenType.VAR, TokenType.EQ, advance=False 1400 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1401 1402 if assignment: 1403 key = self._parse_var_or_string() 1404 self._match(TokenType.EQ) 1405 return self.expression( 1406 exp.Property, 1407 this=key, 1408 value=self._parse_column() or self._parse_var(any_token=True), 1409 ) 1410 1411 return None 1412 1413 def _parse_stored(self) -> exp.FileFormatProperty: 1414 self._match(TokenType.ALIAS) 1415 1416 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1417 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1418 1419 return self.expression( 1420 exp.FileFormatProperty, 1421 this=self.expression( 1422 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1423 ) 1424 if input_format or output_format 1425 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1426 ) 1427 1428 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1429 self._match(TokenType.EQ) 1430 self._match(TokenType.ALIAS) 1431 return self.expression(exp_class, this=self._parse_field()) 1432 1433 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1434 properties = [] 1435 while True: 1436 if before: 1437 prop = self._parse_property_before() 1438 else: 1439 prop = self._parse_property() 1440 1441 if not prop: 1442 break 1443 for p in ensure_list(prop): 1444 properties.append(p) 1445 1446 if properties: 1447 return self.expression(exp.Properties, expressions=properties) 1448 1449 return None 1450 1451 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1452 return self.expression( 1453 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1454 ) 1455 1456 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1457 if self._index >= 2: 1458 pre_volatile_token = self._tokens[self._index - 2] 1459 else: 1460 pre_volatile_token = None 1461 1462 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1463 return exp.VolatileProperty() 1464 1465 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1466 1467 def _parse_with_property( 1468 self, 1469 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1470 if self._match(TokenType.L_PAREN, advance=False): 1471 return self._parse_wrapped_csv(self._parse_property) 1472 1473 if self._match_text_seq("JOURNAL"): 1474 return self._parse_withjournaltable() 1475 1476 if self._match_text_seq("DATA"): 1477 return self._parse_withdata(no=False) 1478 elif self._match_text_seq("NO", "DATA"): 1479 return self._parse_withdata(no=True) 1480 1481 if not self._next: 1482 return None 1483 1484 return self._parse_withisolatedloading() 1485 1486 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1487 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1488 self._match(TokenType.EQ) 1489 1490 user = self._parse_id_var() 1491 self._match(TokenType.PARAMETER) 1492 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1493 1494 if not user or not host: 1495 return None 1496 1497 return exp.DefinerProperty(this=f"{user}@{host}") 1498 1499 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1500 self._match(TokenType.TABLE) 1501 self._match(TokenType.EQ) 1502 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1503 1504 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1505 return self.expression(exp.LogProperty, no=no) 1506 1507 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1508 return self.expression(exp.JournalProperty, **kwargs) 1509 1510 def _parse_checksum(self) -> exp.ChecksumProperty: 1511 self._match(TokenType.EQ) 1512 1513 on = None 1514 if self._match(TokenType.ON): 1515 on = True 1516 elif self._match_text_seq("OFF"): 1517 on = False 1518 1519 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1520 1521 def _parse_cluster(self) -> exp.Cluster: 1522 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1523 1524 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1525 self._match_text_seq("BY") 1526 1527 self._match_l_paren() 1528 expressions = self._parse_csv(self._parse_column) 1529 self._match_r_paren() 1530 1531 if self._match_text_seq("SORTED", "BY"): 1532 self._match_l_paren() 1533 sorted_by = self._parse_csv(self._parse_ordered) 1534 self._match_r_paren() 1535 else: 1536 sorted_by = None 1537 1538 self._match(TokenType.INTO) 1539 buckets = self._parse_number() 1540 self._match_text_seq("BUCKETS") 1541 1542 return self.expression( 1543 exp.ClusteredByProperty, 1544 expressions=expressions, 1545 sorted_by=sorted_by, 1546 buckets=buckets, 1547 ) 1548 1549 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1550 if not self._match_text_seq("GRANTS"): 1551 self._retreat(self._index - 1) 1552 return None 1553 1554 return self.expression(exp.CopyGrantsProperty) 1555 1556 def _parse_freespace(self) -> exp.FreespaceProperty: 1557 self._match(TokenType.EQ) 1558 return self.expression( 1559 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1560 ) 1561 1562 def _parse_mergeblockratio( 1563 self, no: bool = False, default: bool = False 1564 ) -> exp.MergeBlockRatioProperty: 1565 if self._match(TokenType.EQ): 1566 return self.expression( 1567 exp.MergeBlockRatioProperty, 1568 this=self._parse_number(), 1569 percent=self._match(TokenType.PERCENT), 1570 ) 1571 1572 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1573 1574 def _parse_datablocksize( 1575 self, 1576 default: t.Optional[bool] = None, 1577 minimum: t.Optional[bool] = None, 1578 maximum: t.Optional[bool] = None, 1579 ) -> exp.DataBlocksizeProperty: 1580 self._match(TokenType.EQ) 1581 size = self._parse_number() 1582 1583 units = None 1584 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1585 units = self._prev.text 1586 1587 return self.expression( 1588 exp.DataBlocksizeProperty, 1589 size=size, 1590 units=units, 1591 default=default, 1592 minimum=minimum, 1593 maximum=maximum, 1594 ) 1595 1596 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1597 self._match(TokenType.EQ) 1598 always = self._match_text_seq("ALWAYS") 1599 manual = self._match_text_seq("MANUAL") 1600 never = self._match_text_seq("NEVER") 1601 default = self._match_text_seq("DEFAULT") 1602 1603 autotemp = None 1604 if self._match_text_seq("AUTOTEMP"): 1605 autotemp = self._parse_schema() 1606 1607 return self.expression( 1608 exp.BlockCompressionProperty, 1609 always=always, 1610 manual=manual, 1611 never=never, 1612 default=default, 1613 autotemp=autotemp, 1614 ) 1615 1616 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1617 no = self._match_text_seq("NO") 1618 concurrent = self._match_text_seq("CONCURRENT") 1619 self._match_text_seq("ISOLATED", "LOADING") 1620 for_all = self._match_text_seq("FOR", "ALL") 1621 for_insert = self._match_text_seq("FOR", "INSERT") 1622 for_none = self._match_text_seq("FOR", "NONE") 1623 return self.expression( 1624 exp.IsolatedLoadingProperty, 1625 no=no, 1626 concurrent=concurrent, 1627 for_all=for_all, 1628 for_insert=for_insert, 1629 for_none=for_none, 1630 ) 1631 1632 def _parse_locking(self) -> exp.LockingProperty: 1633 if self._match(TokenType.TABLE): 1634 kind = "TABLE" 1635 elif self._match(TokenType.VIEW): 1636 kind = "VIEW" 1637 elif self._match(TokenType.ROW): 1638 kind = "ROW" 1639 elif self._match_text_seq("DATABASE"): 1640 kind = "DATABASE" 1641 else: 1642 kind = None 1643 1644 if kind in ("DATABASE", "TABLE", "VIEW"): 1645 this = self._parse_table_parts() 1646 else: 1647 this = None 1648 1649 if self._match(TokenType.FOR): 1650 for_or_in = "FOR" 1651 elif self._match(TokenType.IN): 1652 for_or_in = "IN" 1653 else: 1654 for_or_in = None 1655 1656 if self._match_text_seq("ACCESS"): 1657 lock_type = "ACCESS" 1658 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1659 lock_type = "EXCLUSIVE" 1660 elif self._match_text_seq("SHARE"): 1661 lock_type = "SHARE" 1662 elif self._match_text_seq("READ"): 1663 lock_type = "READ" 1664 elif self._match_text_seq("WRITE"): 1665 lock_type = "WRITE" 1666 elif self._match_text_seq("CHECKSUM"): 1667 lock_type = "CHECKSUM" 1668 else: 1669 lock_type = None 1670 1671 override = self._match_text_seq("OVERRIDE") 1672 1673 return self.expression( 1674 exp.LockingProperty, 1675 this=this, 1676 kind=kind, 1677 for_or_in=for_or_in, 1678 lock_type=lock_type, 1679 override=override, 1680 ) 1681 1682 def _parse_partition_by(self) -> t.List[exp.Expression]: 1683 if self._match(TokenType.PARTITION_BY): 1684 return self._parse_csv(self._parse_conjunction) 1685 return [] 1686 1687 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1688 self._match(TokenType.EQ) 1689 return self.expression( 1690 exp.PartitionedByProperty, 1691 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1692 ) 1693 1694 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1695 if self._match_text_seq("AND", "STATISTICS"): 1696 statistics = True 1697 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1698 statistics = False 1699 else: 1700 statistics = None 1701 1702 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1703 1704 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1705 if self._match_text_seq("PRIMARY", "INDEX"): 1706 return exp.NoPrimaryIndexProperty() 1707 return None 1708 1709 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1710 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1711 return exp.OnCommitProperty() 1712 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1713 return exp.OnCommitProperty(delete=True) 1714 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1715 1716 def _parse_distkey(self) -> exp.DistKeyProperty: 1717 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1718 1719 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1720 table = self._parse_table(schema=True) 1721 1722 options = [] 1723 while self._match_texts(("INCLUDING", "EXCLUDING")): 1724 this = self._prev.text.upper() 1725 1726 id_var = self._parse_id_var() 1727 if not id_var: 1728 return None 1729 1730 options.append( 1731 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1732 ) 1733 1734 return self.expression(exp.LikeProperty, this=table, expressions=options) 1735 1736 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1737 return self.expression( 1738 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1739 ) 1740 1741 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1742 self._match(TokenType.EQ) 1743 return self.expression( 1744 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1745 ) 1746 1747 def _parse_returns(self) -> exp.ReturnsProperty: 1748 value: t.Optional[exp.Expression] 1749 is_table = self._match(TokenType.TABLE) 1750 1751 if is_table: 1752 if self._match(TokenType.LT): 1753 value = self.expression( 1754 exp.Schema, 1755 this="TABLE", 1756 expressions=self._parse_csv(self._parse_struct_types), 1757 ) 1758 if not self._match(TokenType.GT): 1759 self.raise_error("Expecting >") 1760 else: 1761 value = self._parse_schema(exp.var("TABLE")) 1762 else: 1763 value = self._parse_types() 1764 1765 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1766 1767 def _parse_describe(self) -> exp.Describe: 1768 kind = self._match_set(self.CREATABLES) and self._prev.text 1769 this = self._parse_table(schema=True) 1770 properties = self._parse_properties() 1771 expressions = properties.expressions if properties else None 1772 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1773 1774 def _parse_insert(self) -> exp.Insert: 1775 comments = ensure_list(self._prev_comments) 1776 overwrite = self._match(TokenType.OVERWRITE) 1777 ignore = self._match(TokenType.IGNORE) 1778 local = self._match_text_seq("LOCAL") 1779 alternative = None 1780 1781 if self._match_text_seq("DIRECTORY"): 1782 this: t.Optional[exp.Expression] = self.expression( 1783 exp.Directory, 1784 this=self._parse_var_or_string(), 1785 local=local, 1786 row_format=self._parse_row_format(match_row=True), 1787 ) 1788 else: 1789 if self._match(TokenType.OR): 1790 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1791 1792 self._match(TokenType.INTO) 1793 comments += ensure_list(self._prev_comments) 1794 self._match(TokenType.TABLE) 1795 this = self._parse_table(schema=True) 1796 1797 returning = self._parse_returning() 1798 1799 return self.expression( 1800 exp.Insert, 1801 comments=comments, 1802 this=this, 1803 by_name=self._match_text_seq("BY", "NAME"), 1804 exists=self._parse_exists(), 1805 partition=self._parse_partition(), 1806 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1807 and self._parse_conjunction(), 1808 expression=self._parse_ddl_select(), 1809 conflict=self._parse_on_conflict(), 1810 returning=returning or self._parse_returning(), 1811 overwrite=overwrite, 1812 alternative=alternative, 1813 ignore=ignore, 1814 ) 1815 1816 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1817 conflict = self._match_text_seq("ON", "CONFLICT") 1818 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1819 1820 if not conflict and not duplicate: 1821 return None 1822 1823 nothing = None 1824 expressions = None 1825 key = None 1826 constraint = None 1827 1828 if conflict: 1829 if self._match_text_seq("ON", "CONSTRAINT"): 1830 constraint = self._parse_id_var() 1831 else: 1832 key = self._parse_csv(self._parse_value) 1833 1834 self._match_text_seq("DO") 1835 if self._match_text_seq("NOTHING"): 1836 nothing = True 1837 else: 1838 self._match(TokenType.UPDATE) 1839 self._match(TokenType.SET) 1840 expressions = self._parse_csv(self._parse_equality) 1841 1842 return self.expression( 1843 exp.OnConflict, 1844 duplicate=duplicate, 1845 expressions=expressions, 1846 nothing=nothing, 1847 key=key, 1848 constraint=constraint, 1849 ) 1850 1851 def _parse_returning(self) -> t.Optional[exp.Returning]: 1852 if not self._match(TokenType.RETURNING): 1853 return None 1854 return self.expression( 1855 exp.Returning, 1856 expressions=self._parse_csv(self._parse_expression), 1857 into=self._match(TokenType.INTO) and self._parse_table_part(), 1858 ) 1859 1860 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1861 if not self._match(TokenType.FORMAT): 1862 return None 1863 return self._parse_row_format() 1864 1865 def _parse_row_format( 1866 self, match_row: bool = False 1867 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1868 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1869 return None 1870 1871 if self._match_text_seq("SERDE"): 1872 this = self._parse_string() 1873 1874 serde_properties = None 1875 if self._match(TokenType.SERDE_PROPERTIES): 1876 serde_properties = self.expression( 1877 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1878 ) 1879 1880 return self.expression( 1881 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1882 ) 1883 1884 self._match_text_seq("DELIMITED") 1885 1886 kwargs = {} 1887 1888 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1889 kwargs["fields"] = self._parse_string() 1890 if self._match_text_seq("ESCAPED", "BY"): 1891 kwargs["escaped"] = self._parse_string() 1892 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1893 kwargs["collection_items"] = self._parse_string() 1894 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1895 kwargs["map_keys"] = self._parse_string() 1896 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1897 kwargs["lines"] = self._parse_string() 1898 if self._match_text_seq("NULL", "DEFINED", "AS"): 1899 kwargs["null"] = self._parse_string() 1900 1901 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1902 1903 def _parse_load(self) -> exp.LoadData | exp.Command: 1904 if self._match_text_seq("DATA"): 1905 local = self._match_text_seq("LOCAL") 1906 self._match_text_seq("INPATH") 1907 inpath = self._parse_string() 1908 overwrite = self._match(TokenType.OVERWRITE) 1909 self._match_pair(TokenType.INTO, TokenType.TABLE) 1910 1911 return self.expression( 1912 exp.LoadData, 1913 this=self._parse_table(schema=True), 1914 local=local, 1915 overwrite=overwrite, 1916 inpath=inpath, 1917 partition=self._parse_partition(), 1918 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1919 serde=self._match_text_seq("SERDE") and self._parse_string(), 1920 ) 1921 return self._parse_as_command(self._prev) 1922 1923 def _parse_delete(self) -> exp.Delete: 1924 # This handles MySQL's "Multiple-Table Syntax" 1925 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1926 tables = None 1927 comments = self._prev_comments 1928 if not self._match(TokenType.FROM, advance=False): 1929 tables = self._parse_csv(self._parse_table) or None 1930 1931 returning = self._parse_returning() 1932 1933 return self.expression( 1934 exp.Delete, 1935 comments=comments, 1936 tables=tables, 1937 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1938 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1939 where=self._parse_where(), 1940 returning=returning or self._parse_returning(), 1941 limit=self._parse_limit(), 1942 ) 1943 1944 def _parse_update(self) -> exp.Update: 1945 comments = self._prev_comments 1946 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1947 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1948 returning = self._parse_returning() 1949 return self.expression( 1950 exp.Update, 1951 comments=comments, 1952 **{ # type: ignore 1953 "this": this, 1954 "expressions": expressions, 1955 "from": self._parse_from(joins=True), 1956 "where": self._parse_where(), 1957 "returning": returning or self._parse_returning(), 1958 "order": self._parse_order(), 1959 "limit": self._parse_limit(), 1960 }, 1961 ) 1962 1963 def _parse_uncache(self) -> exp.Uncache: 1964 if not self._match(TokenType.TABLE): 1965 self.raise_error("Expecting TABLE after UNCACHE") 1966 1967 return self.expression( 1968 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1969 ) 1970 1971 def _parse_cache(self) -> exp.Cache: 1972 lazy = self._match_text_seq("LAZY") 1973 self._match(TokenType.TABLE) 1974 table = self._parse_table(schema=True) 1975 1976 options = [] 1977 if self._match_text_seq("OPTIONS"): 1978 self._match_l_paren() 1979 k = self._parse_string() 1980 self._match(TokenType.EQ) 1981 v = self._parse_string() 1982 options = [k, v] 1983 self._match_r_paren() 1984 1985 self._match(TokenType.ALIAS) 1986 return self.expression( 1987 exp.Cache, 1988 this=table, 1989 lazy=lazy, 1990 options=options, 1991 expression=self._parse_select(nested=True), 1992 ) 1993 1994 def _parse_partition(self) -> t.Optional[exp.Partition]: 1995 if not self._match(TokenType.PARTITION): 1996 return None 1997 1998 return self.expression( 1999 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2000 ) 2001 2002 def _parse_value(self) -> exp.Tuple: 2003 if self._match(TokenType.L_PAREN): 2004 expressions = self._parse_csv(self._parse_conjunction) 2005 self._match_r_paren() 2006 return self.expression(exp.Tuple, expressions=expressions) 2007 2008 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2009 # https://prestodb.io/docs/current/sql/values.html 2010 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2011 2012 def _parse_projections(self) -> t.List[exp.Expression]: 2013 return self._parse_expressions() 2014 2015 def _parse_select( 2016 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2017 ) -> t.Optional[exp.Expression]: 2018 cte = self._parse_with() 2019 2020 if cte: 2021 this = self._parse_statement() 2022 2023 if not this: 2024 self.raise_error("Failed to parse any statement following CTE") 2025 return cte 2026 2027 if "with" in this.arg_types: 2028 this.set("with", cte) 2029 else: 2030 self.raise_error(f"{this.key} does not support CTE") 2031 this = cte 2032 2033 return this 2034 2035 # duckdb supports leading with FROM x 2036 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2037 2038 if self._match(TokenType.SELECT): 2039 comments = self._prev_comments 2040 2041 hint = self._parse_hint() 2042 all_ = self._match(TokenType.ALL) 2043 distinct = self._match_set(self.DISTINCT_TOKENS) 2044 2045 kind = ( 2046 self._match(TokenType.ALIAS) 2047 and self._match_texts(("STRUCT", "VALUE")) 2048 and self._prev.text 2049 ) 2050 2051 if distinct: 2052 distinct = self.expression( 2053 exp.Distinct, 2054 on=self._parse_value() if self._match(TokenType.ON) else None, 2055 ) 2056 2057 if all_ and distinct: 2058 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2059 2060 limit = self._parse_limit(top=True) 2061 projections = self._parse_projections() 2062 2063 this = self.expression( 2064 exp.Select, 2065 kind=kind, 2066 hint=hint, 2067 distinct=distinct, 2068 expressions=projections, 2069 limit=limit, 2070 ) 2071 this.comments = comments 2072 2073 into = self._parse_into() 2074 if into: 2075 this.set("into", into) 2076 2077 if not from_: 2078 from_ = self._parse_from() 2079 2080 if from_: 2081 this.set("from", from_) 2082 2083 this = self._parse_query_modifiers(this) 2084 elif (table or nested) and self._match(TokenType.L_PAREN): 2085 if self._match(TokenType.PIVOT): 2086 this = self._parse_simplified_pivot() 2087 elif self._match(TokenType.FROM): 2088 this = exp.select("*").from_( 2089 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2090 ) 2091 else: 2092 this = self._parse_table() if table else self._parse_select(nested=True) 2093 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2094 2095 self._match_r_paren() 2096 2097 # We return early here so that the UNION isn't attached to the subquery by the 2098 # following call to _parse_set_operations, but instead becomes the parent node 2099 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2100 elif self._match(TokenType.VALUES): 2101 this = self.expression( 2102 exp.Values, 2103 expressions=self._parse_csv(self._parse_value), 2104 alias=self._parse_table_alias(), 2105 ) 2106 elif from_: 2107 this = exp.select("*").from_(from_.this, copy=False) 2108 else: 2109 this = None 2110 2111 return self._parse_set_operations(this) 2112 2113 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2114 if not skip_with_token and not self._match(TokenType.WITH): 2115 return None 2116 2117 comments = self._prev_comments 2118 recursive = self._match(TokenType.RECURSIVE) 2119 2120 expressions = [] 2121 while True: 2122 expressions.append(self._parse_cte()) 2123 2124 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2125 break 2126 else: 2127 self._match(TokenType.WITH) 2128 2129 return self.expression( 2130 exp.With, comments=comments, expressions=expressions, recursive=recursive 2131 ) 2132 2133 def _parse_cte(self) -> exp.CTE: 2134 alias = self._parse_table_alias() 2135 if not alias or not alias.this: 2136 self.raise_error("Expected CTE to have alias") 2137 2138 self._match(TokenType.ALIAS) 2139 return self.expression( 2140 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2141 ) 2142 2143 def _parse_table_alias( 2144 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2145 ) -> t.Optional[exp.TableAlias]: 2146 any_token = self._match(TokenType.ALIAS) 2147 alias = ( 2148 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2149 or self._parse_string_as_identifier() 2150 ) 2151 2152 index = self._index 2153 if self._match(TokenType.L_PAREN): 2154 columns = self._parse_csv(self._parse_function_parameter) 2155 self._match_r_paren() if columns else self._retreat(index) 2156 else: 2157 columns = None 2158 2159 if not alias and not columns: 2160 return None 2161 2162 return self.expression(exp.TableAlias, this=alias, columns=columns) 2163 2164 def _parse_subquery( 2165 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2166 ) -> t.Optional[exp.Subquery]: 2167 if not this: 2168 return None 2169 2170 return self.expression( 2171 exp.Subquery, 2172 this=this, 2173 pivots=self._parse_pivots(), 2174 alias=self._parse_table_alias() if parse_alias else None, 2175 ) 2176 2177 def _parse_query_modifiers( 2178 self, this: t.Optional[exp.Expression] 2179 ) -> t.Optional[exp.Expression]: 2180 if isinstance(this, self.MODIFIABLES): 2181 for join in iter(self._parse_join, None): 2182 this.append("joins", join) 2183 for lateral in iter(self._parse_lateral, None): 2184 this.append("laterals", lateral) 2185 2186 while True: 2187 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2188 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2189 key, expression = parser(self) 2190 2191 if expression: 2192 this.set(key, expression) 2193 if key == "limit": 2194 offset = expression.args.pop("offset", None) 2195 if offset: 2196 this.set("offset", exp.Offset(expression=offset)) 2197 continue 2198 break 2199 return this 2200 2201 def _parse_hint(self) -> t.Optional[exp.Hint]: 2202 if self._match(TokenType.HINT): 2203 hints = [] 2204 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2205 hints.extend(hint) 2206 2207 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2208 self.raise_error("Expected */ after HINT") 2209 2210 return self.expression(exp.Hint, expressions=hints) 2211 2212 return None 2213 2214 def _parse_into(self) -> t.Optional[exp.Into]: 2215 if not self._match(TokenType.INTO): 2216 return None 2217 2218 temp = self._match(TokenType.TEMPORARY) 2219 unlogged = self._match_text_seq("UNLOGGED") 2220 self._match(TokenType.TABLE) 2221 2222 return self.expression( 2223 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2224 ) 2225 2226 def _parse_from( 2227 self, joins: bool = False, skip_from_token: bool = False 2228 ) -> t.Optional[exp.From]: 2229 if not skip_from_token and not self._match(TokenType.FROM): 2230 return None 2231 2232 return self.expression( 2233 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2234 ) 2235 2236 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2237 if not self._match(TokenType.MATCH_RECOGNIZE): 2238 return None 2239 2240 self._match_l_paren() 2241 2242 partition = self._parse_partition_by() 2243 order = self._parse_order() 2244 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2245 2246 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2247 rows = exp.var("ONE ROW PER MATCH") 2248 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2249 text = "ALL ROWS PER MATCH" 2250 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2251 text += f" SHOW EMPTY MATCHES" 2252 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2253 text += f" OMIT EMPTY MATCHES" 2254 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2255 text += f" WITH UNMATCHED ROWS" 2256 rows = exp.var(text) 2257 else: 2258 rows = None 2259 2260 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2261 text = "AFTER MATCH SKIP" 2262 if self._match_text_seq("PAST", "LAST", "ROW"): 2263 text += f" PAST LAST ROW" 2264 elif self._match_text_seq("TO", "NEXT", "ROW"): 2265 text += f" TO NEXT ROW" 2266 elif self._match_text_seq("TO", "FIRST"): 2267 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2268 elif self._match_text_seq("TO", "LAST"): 2269 text += f" TO LAST {self._advance_any().text}" # type: ignore 2270 after = exp.var(text) 2271 else: 2272 after = None 2273 2274 if self._match_text_seq("PATTERN"): 2275 self._match_l_paren() 2276 2277 if not self._curr: 2278 self.raise_error("Expecting )", self._curr) 2279 2280 paren = 1 2281 start = self._curr 2282 2283 while self._curr and paren > 0: 2284 if self._curr.token_type == TokenType.L_PAREN: 2285 paren += 1 2286 if self._curr.token_type == TokenType.R_PAREN: 2287 paren -= 1 2288 2289 end = self._prev 2290 self._advance() 2291 2292 if paren > 0: 2293 self.raise_error("Expecting )", self._curr) 2294 2295 pattern = exp.var(self._find_sql(start, end)) 2296 else: 2297 pattern = None 2298 2299 define = ( 2300 self._parse_csv( 2301 lambda: self.expression( 2302 exp.Alias, 2303 alias=self._parse_id_var(any_token=True), 2304 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2305 ) 2306 ) 2307 if self._match_text_seq("DEFINE") 2308 else None 2309 ) 2310 2311 self._match_r_paren() 2312 2313 return self.expression( 2314 exp.MatchRecognize, 2315 partition_by=partition, 2316 order=order, 2317 measures=measures, 2318 rows=rows, 2319 after=after, 2320 pattern=pattern, 2321 define=define, 2322 alias=self._parse_table_alias(), 2323 ) 2324 2325 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2326 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2327 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2328 2329 if outer_apply or cross_apply: 2330 this = self._parse_select(table=True) 2331 view = None 2332 outer = not cross_apply 2333 elif self._match(TokenType.LATERAL): 2334 this = self._parse_select(table=True) 2335 view = self._match(TokenType.VIEW) 2336 outer = self._match(TokenType.OUTER) 2337 else: 2338 return None 2339 2340 if not this: 2341 this = ( 2342 self._parse_unnest() 2343 or self._parse_function() 2344 or self._parse_id_var(any_token=False) 2345 ) 2346 2347 while self._match(TokenType.DOT): 2348 this = exp.Dot( 2349 this=this, 2350 expression=self._parse_function() or self._parse_id_var(any_token=False), 2351 ) 2352 2353 if view: 2354 table = self._parse_id_var(any_token=False) 2355 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2356 table_alias: t.Optional[exp.TableAlias] = self.expression( 2357 exp.TableAlias, this=table, columns=columns 2358 ) 2359 elif isinstance(this, exp.Subquery) and this.alias: 2360 # Ensures parity between the Subquery's and the Lateral's "alias" args 2361 table_alias = this.args["alias"].copy() 2362 else: 2363 table_alias = self._parse_table_alias() 2364 2365 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2366 2367 def _parse_join_parts( 2368 self, 2369 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2370 return ( 2371 self._match_set(self.JOIN_METHODS) and self._prev, 2372 self._match_set(self.JOIN_SIDES) and self._prev, 2373 self._match_set(self.JOIN_KINDS) and self._prev, 2374 ) 2375 2376 def _parse_join( 2377 self, skip_join_token: bool = False, parse_bracket: bool = False 2378 ) -> t.Optional[exp.Join]: 2379 if self._match(TokenType.COMMA): 2380 return self.expression(exp.Join, this=self._parse_table()) 2381 2382 index = self._index 2383 method, side, kind = self._parse_join_parts() 2384 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2385 join = self._match(TokenType.JOIN) 2386 2387 if not skip_join_token and not join: 2388 self._retreat(index) 2389 kind = None 2390 method = None 2391 side = None 2392 2393 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2394 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2395 2396 if not skip_join_token and not join and not outer_apply and not cross_apply: 2397 return None 2398 2399 if outer_apply: 2400 side = Token(TokenType.LEFT, "LEFT") 2401 2402 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2403 2404 if method: 2405 kwargs["method"] = method.text 2406 if side: 2407 kwargs["side"] = side.text 2408 if kind: 2409 kwargs["kind"] = kind.text 2410 if hint: 2411 kwargs["hint"] = hint 2412 2413 if self._match(TokenType.ON): 2414 kwargs["on"] = self._parse_conjunction() 2415 elif self._match(TokenType.USING): 2416 kwargs["using"] = self._parse_wrapped_id_vars() 2417 elif not (kind and kind.token_type == TokenType.CROSS): 2418 index = self._index 2419 joins = self._parse_joins() 2420 2421 if joins and self._match(TokenType.ON): 2422 kwargs["on"] = self._parse_conjunction() 2423 elif joins and self._match(TokenType.USING): 2424 kwargs["using"] = self._parse_wrapped_id_vars() 2425 else: 2426 joins = None 2427 self._retreat(index) 2428 2429 kwargs["this"].set("joins", joins) 2430 2431 comments = [c for token in (method, side, kind) if token for c in token.comments] 2432 return self.expression(exp.Join, comments=comments, **kwargs) 2433 2434 def _parse_index( 2435 self, 2436 index: t.Optional[exp.Expression] = None, 2437 ) -> t.Optional[exp.Index]: 2438 if index: 2439 unique = None 2440 primary = None 2441 amp = None 2442 2443 self._match(TokenType.ON) 2444 self._match(TokenType.TABLE) # hive 2445 table = self._parse_table_parts(schema=True) 2446 else: 2447 unique = self._match(TokenType.UNIQUE) 2448 primary = self._match_text_seq("PRIMARY") 2449 amp = self._match_text_seq("AMP") 2450 2451 if not self._match(TokenType.INDEX): 2452 return None 2453 2454 index = self._parse_id_var() 2455 table = None 2456 2457 using = self._parse_field() if self._match(TokenType.USING) else None 2458 2459 if self._match(TokenType.L_PAREN, advance=False): 2460 columns = self._parse_wrapped_csv(self._parse_ordered) 2461 else: 2462 columns = None 2463 2464 return self.expression( 2465 exp.Index, 2466 this=index, 2467 table=table, 2468 using=using, 2469 columns=columns, 2470 unique=unique, 2471 primary=primary, 2472 amp=amp, 2473 partition_by=self._parse_partition_by(), 2474 ) 2475 2476 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2477 hints: t.List[exp.Expression] = [] 2478 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2479 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2480 hints.append( 2481 self.expression( 2482 exp.WithTableHint, 2483 expressions=self._parse_csv( 2484 lambda: self._parse_function() or self._parse_var(any_token=True) 2485 ), 2486 ) 2487 ) 2488 self._match_r_paren() 2489 else: 2490 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2491 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2492 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2493 2494 self._match_texts({"INDEX", "KEY"}) 2495 if self._match(TokenType.FOR): 2496 hint.set("target", self._advance_any() and self._prev.text.upper()) 2497 2498 hint.set("expressions", self._parse_wrapped_id_vars()) 2499 hints.append(hint) 2500 2501 return hints or None 2502 2503 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2504 return ( 2505 (not schema and self._parse_function(optional_parens=False)) 2506 or self._parse_id_var(any_token=False) 2507 or self._parse_string_as_identifier() 2508 or self._parse_placeholder() 2509 ) 2510 2511 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2512 catalog = None 2513 db = None 2514 table = self._parse_table_part(schema=schema) 2515 2516 while self._match(TokenType.DOT): 2517 if catalog: 2518 # This allows nesting the table in arbitrarily many dot expressions if needed 2519 table = self.expression( 2520 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2521 ) 2522 else: 2523 catalog = db 2524 db = table 2525 table = self._parse_table_part(schema=schema) 2526 2527 if not table: 2528 self.raise_error(f"Expected table name but got {self._curr}") 2529 2530 return self.expression( 2531 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2532 ) 2533 2534 def _parse_table( 2535 self, 2536 schema: bool = False, 2537 joins: bool = False, 2538 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2539 parse_bracket: bool = False, 2540 ) -> t.Optional[exp.Expression]: 2541 lateral = self._parse_lateral() 2542 if lateral: 2543 return lateral 2544 2545 unnest = self._parse_unnest() 2546 if unnest: 2547 return unnest 2548 2549 values = self._parse_derived_table_values() 2550 if values: 2551 return values 2552 2553 subquery = self._parse_select(table=True) 2554 if subquery: 2555 if not subquery.args.get("pivots"): 2556 subquery.set("pivots", self._parse_pivots()) 2557 return subquery 2558 2559 bracket = parse_bracket and self._parse_bracket(None) 2560 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2561 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2562 2563 if schema: 2564 return self._parse_schema(this=this) 2565 2566 version = self._parse_version() 2567 2568 if version: 2569 this.set("version", version) 2570 2571 if self.ALIAS_POST_TABLESAMPLE: 2572 table_sample = self._parse_table_sample() 2573 2574 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2575 if alias: 2576 this.set("alias", alias) 2577 2578 this.set("hints", self._parse_table_hints()) 2579 2580 if not this.args.get("pivots"): 2581 this.set("pivots", self._parse_pivots()) 2582 2583 if not self.ALIAS_POST_TABLESAMPLE: 2584 table_sample = self._parse_table_sample() 2585 2586 if table_sample: 2587 table_sample.set("this", this) 2588 this = table_sample 2589 2590 if joins: 2591 for join in iter(self._parse_join, None): 2592 this.append("joins", join) 2593 2594 return this 2595 2596 def _parse_version(self) -> t.Optional[exp.Version]: 2597 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2598 this = "TIMESTAMP" 2599 elif self._match(TokenType.VERSION_SNAPSHOT): 2600 this = "VERSION" 2601 else: 2602 return None 2603 2604 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2605 kind = self._prev.text.upper() 2606 start = self._parse_bitwise() 2607 self._match_texts(("TO", "AND")) 2608 end = self._parse_bitwise() 2609 expression: t.Optional[exp.Expression] = self.expression( 2610 exp.Tuple, expressions=[start, end] 2611 ) 2612 elif self._match_text_seq("CONTAINED", "IN"): 2613 kind = "CONTAINED IN" 2614 expression = self.expression( 2615 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2616 ) 2617 elif self._match(TokenType.ALL): 2618 kind = "ALL" 2619 expression = None 2620 else: 2621 self._match_text_seq("AS", "OF") 2622 kind = "AS OF" 2623 expression = self._parse_type() 2624 2625 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2626 2627 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2628 if not self._match(TokenType.UNNEST): 2629 return None 2630 2631 expressions = self._parse_wrapped_csv(self._parse_type) 2632 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2633 2634 alias = self._parse_table_alias() if with_alias else None 2635 2636 if alias and self.UNNEST_COLUMN_ONLY: 2637 if alias.args.get("columns"): 2638 self.raise_error("Unexpected extra column alias in unnest.") 2639 2640 alias.set("columns", [alias.this]) 2641 alias.set("this", None) 2642 2643 offset = None 2644 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2645 self._match(TokenType.ALIAS) 2646 offset = self._parse_id_var() or exp.to_identifier("offset") 2647 2648 return self.expression( 2649 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2650 ) 2651 2652 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2653 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2654 if not is_derived and not self._match(TokenType.VALUES): 2655 return None 2656 2657 expressions = self._parse_csv(self._parse_value) 2658 alias = self._parse_table_alias() 2659 2660 if is_derived: 2661 self._match_r_paren() 2662 2663 return self.expression( 2664 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2665 ) 2666 2667 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2668 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2669 as_modifier and self._match_text_seq("USING", "SAMPLE") 2670 ): 2671 return None 2672 2673 bucket_numerator = None 2674 bucket_denominator = None 2675 bucket_field = None 2676 percent = None 2677 rows = None 2678 size = None 2679 seed = None 2680 2681 kind = ( 2682 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2683 ) 2684 method = self._parse_var(tokens=(TokenType.ROW,)) 2685 2686 self._match(TokenType.L_PAREN) 2687 2688 if self.TABLESAMPLE_CSV: 2689 num = None 2690 expressions = self._parse_csv(self._parse_primary) 2691 else: 2692 expressions = None 2693 num = self._parse_number() 2694 2695 if self._match_text_seq("BUCKET"): 2696 bucket_numerator = self._parse_number() 2697 self._match_text_seq("OUT", "OF") 2698 bucket_denominator = bucket_denominator = self._parse_number() 2699 self._match(TokenType.ON) 2700 bucket_field = self._parse_field() 2701 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2702 percent = num 2703 elif self._match(TokenType.ROWS): 2704 rows = num 2705 elif num: 2706 size = num 2707 2708 self._match(TokenType.R_PAREN) 2709 2710 if self._match(TokenType.L_PAREN): 2711 method = self._parse_var() 2712 seed = self._match(TokenType.COMMA) and self._parse_number() 2713 self._match_r_paren() 2714 elif self._match_texts(("SEED", "REPEATABLE")): 2715 seed = self._parse_wrapped(self._parse_number) 2716 2717 return self.expression( 2718 exp.TableSample, 2719 expressions=expressions, 2720 method=method, 2721 bucket_numerator=bucket_numerator, 2722 bucket_denominator=bucket_denominator, 2723 bucket_field=bucket_field, 2724 percent=percent, 2725 rows=rows, 2726 size=size, 2727 seed=seed, 2728 kind=kind, 2729 ) 2730 2731 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2732 return list(iter(self._parse_pivot, None)) or None 2733 2734 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2735 return list(iter(self._parse_join, None)) or None 2736 2737 # https://duckdb.org/docs/sql/statements/pivot 2738 def _parse_simplified_pivot(self) -> exp.Pivot: 2739 def _parse_on() -> t.Optional[exp.Expression]: 2740 this = self._parse_bitwise() 2741 return self._parse_in(this) if self._match(TokenType.IN) else this 2742 2743 this = self._parse_table() 2744 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2745 using = self._match(TokenType.USING) and self._parse_csv( 2746 lambda: self._parse_alias(self._parse_function()) 2747 ) 2748 group = self._parse_group() 2749 return self.expression( 2750 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2751 ) 2752 2753 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2754 index = self._index 2755 include_nulls = None 2756 2757 if self._match(TokenType.PIVOT): 2758 unpivot = False 2759 elif self._match(TokenType.UNPIVOT): 2760 unpivot = True 2761 2762 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2763 if self._match_text_seq("INCLUDE", "NULLS"): 2764 include_nulls = True 2765 elif self._match_text_seq("EXCLUDE", "NULLS"): 2766 include_nulls = False 2767 else: 2768 return None 2769 2770 expressions = [] 2771 field = None 2772 2773 if not self._match(TokenType.L_PAREN): 2774 self._retreat(index) 2775 return None 2776 2777 if unpivot: 2778 expressions = self._parse_csv(self._parse_column) 2779 else: 2780 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2781 2782 if not expressions: 2783 self.raise_error("Failed to parse PIVOT's aggregation list") 2784 2785 if not self._match(TokenType.FOR): 2786 self.raise_error("Expecting FOR") 2787 2788 value = self._parse_column() 2789 2790 if not self._match(TokenType.IN): 2791 self.raise_error("Expecting IN") 2792 2793 field = self._parse_in(value, alias=True) 2794 2795 self._match_r_paren() 2796 2797 pivot = self.expression( 2798 exp.Pivot, 2799 expressions=expressions, 2800 field=field, 2801 unpivot=unpivot, 2802 include_nulls=include_nulls, 2803 ) 2804 2805 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2806 pivot.set("alias", self._parse_table_alias()) 2807 2808 if not unpivot: 2809 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2810 2811 columns: t.List[exp.Expression] = [] 2812 for fld in pivot.args["field"].expressions: 2813 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2814 for name in names: 2815 if self.PREFIXED_PIVOT_COLUMNS: 2816 name = f"{name}_{field_name}" if name else field_name 2817 else: 2818 name = f"{field_name}_{name}" if name else field_name 2819 2820 columns.append(exp.to_identifier(name)) 2821 2822 pivot.set("columns", columns) 2823 2824 return pivot 2825 2826 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2827 return [agg.alias for agg in aggregations] 2828 2829 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2830 if not skip_where_token and not self._match(TokenType.WHERE): 2831 return None 2832 2833 return self.expression( 2834 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2835 ) 2836 2837 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2838 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2839 return None 2840 2841 elements = defaultdict(list) 2842 2843 if self._match(TokenType.ALL): 2844 return self.expression(exp.Group, all=True) 2845 2846 while True: 2847 expressions = self._parse_csv(self._parse_conjunction) 2848 if expressions: 2849 elements["expressions"].extend(expressions) 2850 2851 grouping_sets = self._parse_grouping_sets() 2852 if grouping_sets: 2853 elements["grouping_sets"].extend(grouping_sets) 2854 2855 rollup = None 2856 cube = None 2857 totals = None 2858 2859 with_ = self._match(TokenType.WITH) 2860 if self._match(TokenType.ROLLUP): 2861 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2862 elements["rollup"].extend(ensure_list(rollup)) 2863 2864 if self._match(TokenType.CUBE): 2865 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2866 elements["cube"].extend(ensure_list(cube)) 2867 2868 if self._match_text_seq("TOTALS"): 2869 totals = True 2870 elements["totals"] = True # type: ignore 2871 2872 if not (grouping_sets or rollup or cube or totals): 2873 break 2874 2875 return self.expression(exp.Group, **elements) # type: ignore 2876 2877 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2878 if not self._match(TokenType.GROUPING_SETS): 2879 return None 2880 2881 return self._parse_wrapped_csv(self._parse_grouping_set) 2882 2883 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2884 if self._match(TokenType.L_PAREN): 2885 grouping_set = self._parse_csv(self._parse_column) 2886 self._match_r_paren() 2887 return self.expression(exp.Tuple, expressions=grouping_set) 2888 2889 return self._parse_column() 2890 2891 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2892 if not skip_having_token and not self._match(TokenType.HAVING): 2893 return None 2894 return self.expression(exp.Having, this=self._parse_conjunction()) 2895 2896 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2897 if not self._match(TokenType.QUALIFY): 2898 return None 2899 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2900 2901 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2902 if skip_start_token: 2903 start = None 2904 elif self._match(TokenType.START_WITH): 2905 start = self._parse_conjunction() 2906 else: 2907 return None 2908 2909 self._match(TokenType.CONNECT_BY) 2910 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2911 exp.Prior, this=self._parse_bitwise() 2912 ) 2913 connect = self._parse_conjunction() 2914 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2915 return self.expression(exp.Connect, start=start, connect=connect) 2916 2917 def _parse_order( 2918 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2919 ) -> t.Optional[exp.Expression]: 2920 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2921 return this 2922 2923 return self.expression( 2924 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2925 ) 2926 2927 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2928 if not self._match(token): 2929 return None 2930 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2931 2932 def _parse_ordered(self) -> exp.Ordered: 2933 this = self._parse_conjunction() 2934 self._match(TokenType.ASC) 2935 2936 is_desc = self._match(TokenType.DESC) 2937 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2938 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2939 desc = is_desc or False 2940 asc = not desc 2941 nulls_first = is_nulls_first or False 2942 explicitly_null_ordered = is_nulls_first or is_nulls_last 2943 2944 if ( 2945 not explicitly_null_ordered 2946 and ( 2947 (asc and self.NULL_ORDERING == "nulls_are_small") 2948 or (desc and self.NULL_ORDERING != "nulls_are_small") 2949 ) 2950 and self.NULL_ORDERING != "nulls_are_last" 2951 ): 2952 nulls_first = True 2953 2954 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2955 2956 def _parse_limit( 2957 self, this: t.Optional[exp.Expression] = None, top: bool = False 2958 ) -> t.Optional[exp.Expression]: 2959 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2960 comments = self._prev_comments 2961 if top: 2962 limit_paren = self._match(TokenType.L_PAREN) 2963 expression = self._parse_number() 2964 2965 if limit_paren: 2966 self._match_r_paren() 2967 else: 2968 expression = self._parse_term() 2969 2970 if self._match(TokenType.COMMA): 2971 offset = expression 2972 expression = self._parse_term() 2973 else: 2974 offset = None 2975 2976 limit_exp = self.expression( 2977 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2978 ) 2979 2980 return limit_exp 2981 2982 if self._match(TokenType.FETCH): 2983 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2984 direction = self._prev.text if direction else "FIRST" 2985 2986 count = self._parse_number() 2987 percent = self._match(TokenType.PERCENT) 2988 2989 self._match_set((TokenType.ROW, TokenType.ROWS)) 2990 2991 only = self._match_text_seq("ONLY") 2992 with_ties = self._match_text_seq("WITH", "TIES") 2993 2994 if only and with_ties: 2995 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2996 2997 return self.expression( 2998 exp.Fetch, 2999 direction=direction, 3000 count=count, 3001 percent=percent, 3002 with_ties=with_ties, 3003 ) 3004 3005 return this 3006 3007 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3008 if not self._match(TokenType.OFFSET): 3009 return this 3010 3011 count = self._parse_term() 3012 self._match_set((TokenType.ROW, TokenType.ROWS)) 3013 return self.expression(exp.Offset, this=this, expression=count) 3014 3015 def _parse_locks(self) -> t.List[exp.Lock]: 3016 locks = [] 3017 while True: 3018 if self._match_text_seq("FOR", "UPDATE"): 3019 update = True 3020 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3021 "LOCK", "IN", "SHARE", "MODE" 3022 ): 3023 update = False 3024 else: 3025 break 3026 3027 expressions = None 3028 if self._match_text_seq("OF"): 3029 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3030 3031 wait: t.Optional[bool | exp.Expression] = None 3032 if self._match_text_seq("NOWAIT"): 3033 wait = True 3034 elif self._match_text_seq("WAIT"): 3035 wait = self._parse_primary() 3036 elif self._match_text_seq("SKIP", "LOCKED"): 3037 wait = False 3038 3039 locks.append( 3040 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3041 ) 3042 3043 return locks 3044 3045 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3046 if not self._match_set(self.SET_OPERATIONS): 3047 return this 3048 3049 token_type = self._prev.token_type 3050 3051 if token_type == TokenType.UNION: 3052 expression = exp.Union 3053 elif token_type == TokenType.EXCEPT: 3054 expression = exp.Except 3055 else: 3056 expression = exp.Intersect 3057 3058 return self.expression( 3059 expression, 3060 this=this, 3061 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3062 by_name=self._match_text_seq("BY", "NAME"), 3063 expression=self._parse_set_operations(self._parse_select(nested=True)), 3064 ) 3065 3066 def _parse_expression(self) -> t.Optional[exp.Expression]: 3067 return self._parse_alias(self._parse_conjunction()) 3068 3069 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3070 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3071 3072 def _parse_equality(self) -> t.Optional[exp.Expression]: 3073 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3074 3075 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3076 return self._parse_tokens(self._parse_range, self.COMPARISON) 3077 3078 def _parse_range(self) -> t.Optional[exp.Expression]: 3079 this = self._parse_bitwise() 3080 negate = self._match(TokenType.NOT) 3081 3082 if self._match_set(self.RANGE_PARSERS): 3083 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3084 if not expression: 3085 return this 3086 3087 this = expression 3088 elif self._match(TokenType.ISNULL): 3089 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3090 3091 # Postgres supports ISNULL and NOTNULL for conditions. 3092 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3093 if self._match(TokenType.NOTNULL): 3094 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3095 this = self.expression(exp.Not, this=this) 3096 3097 if negate: 3098 this = self.expression(exp.Not, this=this) 3099 3100 if self._match(TokenType.IS): 3101 this = self._parse_is(this) 3102 3103 return this 3104 3105 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3106 index = self._index - 1 3107 negate = self._match(TokenType.NOT) 3108 3109 if self._match_text_seq("DISTINCT", "FROM"): 3110 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3111 return self.expression(klass, this=this, expression=self._parse_expression()) 3112 3113 expression = self._parse_null() or self._parse_boolean() 3114 if not expression: 3115 self._retreat(index) 3116 return None 3117 3118 this = self.expression(exp.Is, this=this, expression=expression) 3119 return self.expression(exp.Not, this=this) if negate else this 3120 3121 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3122 unnest = self._parse_unnest(with_alias=False) 3123 if unnest: 3124 this = self.expression(exp.In, this=this, unnest=unnest) 3125 elif self._match(TokenType.L_PAREN): 3126 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3127 3128 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3129 this = self.expression(exp.In, this=this, query=expressions[0]) 3130 else: 3131 this = self.expression(exp.In, this=this, expressions=expressions) 3132 3133 self._match_r_paren(this) 3134 else: 3135 this = self.expression(exp.In, this=this, field=self._parse_field()) 3136 3137 return this 3138 3139 def _parse_between(self, this: exp.Expression) -> exp.Between: 3140 low = self._parse_bitwise() 3141 self._match(TokenType.AND) 3142 high = self._parse_bitwise() 3143 return self.expression(exp.Between, this=this, low=low, high=high) 3144 3145 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3146 if not self._match(TokenType.ESCAPE): 3147 return this 3148 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3149 3150 def _parse_interval(self) -> t.Optional[exp.Interval]: 3151 index = self._index 3152 3153 if not self._match(TokenType.INTERVAL): 3154 return None 3155 3156 if self._match(TokenType.STRING, advance=False): 3157 this = self._parse_primary() 3158 else: 3159 this = self._parse_term() 3160 3161 if not this: 3162 self._retreat(index) 3163 return None 3164 3165 unit = self._parse_function() or self._parse_var(any_token=True) 3166 3167 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3168 # each INTERVAL expression into this canonical form so it's easy to transpile 3169 if this and this.is_number: 3170 this = exp.Literal.string(this.name) 3171 elif this and this.is_string: 3172 parts = this.name.split() 3173 3174 if len(parts) == 2: 3175 if unit: 3176 # This is not actually a unit, it's something else (e.g. a "window side") 3177 unit = None 3178 self._retreat(self._index - 1) 3179 3180 this = exp.Literal.string(parts[0]) 3181 unit = self.expression(exp.Var, this=parts[1]) 3182 3183 return self.expression(exp.Interval, this=this, unit=unit) 3184 3185 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3186 this = self._parse_term() 3187 3188 while True: 3189 if self._match_set(self.BITWISE): 3190 this = self.expression( 3191 self.BITWISE[self._prev.token_type], 3192 this=this, 3193 expression=self._parse_term(), 3194 ) 3195 elif self._match(TokenType.DQMARK): 3196 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3197 elif self._match_pair(TokenType.LT, TokenType.LT): 3198 this = self.expression( 3199 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3200 ) 3201 elif self._match_pair(TokenType.GT, TokenType.GT): 3202 this = self.expression( 3203 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3204 ) 3205 else: 3206 break 3207 3208 return this 3209 3210 def _parse_term(self) -> t.Optional[exp.Expression]: 3211 return self._parse_tokens(self._parse_factor, self.TERM) 3212 3213 def _parse_factor(self) -> t.Optional[exp.Expression]: 3214 return self._parse_tokens(self._parse_unary, self.FACTOR) 3215 3216 def _parse_unary(self) -> t.Optional[exp.Expression]: 3217 if self._match_set(self.UNARY_PARSERS): 3218 return self.UNARY_PARSERS[self._prev.token_type](self) 3219 return self._parse_at_time_zone(self._parse_type()) 3220 3221 def _parse_type(self) -> t.Optional[exp.Expression]: 3222 interval = self._parse_interval() 3223 if interval: 3224 return interval 3225 3226 index = self._index 3227 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3228 this = self._parse_column() 3229 3230 if data_type: 3231 if isinstance(this, exp.Literal): 3232 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3233 if parser: 3234 return parser(self, this, data_type) 3235 return self.expression(exp.Cast, this=this, to=data_type) 3236 if not data_type.expressions: 3237 self._retreat(index) 3238 return self._parse_column() 3239 return self._parse_column_ops(data_type) 3240 3241 return this 3242 3243 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3244 this = self._parse_type() 3245 if not this: 3246 return None 3247 3248 return self.expression( 3249 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3250 ) 3251 3252 def _parse_types( 3253 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3254 ) -> t.Optional[exp.Expression]: 3255 index = self._index 3256 3257 prefix = self._match_text_seq("SYSUDTLIB", ".") 3258 3259 if not self._match_set(self.TYPE_TOKENS): 3260 identifier = allow_identifiers and self._parse_id_var( 3261 any_token=False, tokens=(TokenType.VAR,) 3262 ) 3263 3264 if identifier: 3265 tokens = self._tokenizer.tokenize(identifier.name) 3266 3267 if len(tokens) != 1: 3268 self.raise_error("Unexpected identifier", self._prev) 3269 3270 if tokens[0].token_type in self.TYPE_TOKENS: 3271 self._prev = tokens[0] 3272 elif self.SUPPORTS_USER_DEFINED_TYPES: 3273 return identifier 3274 else: 3275 return None 3276 else: 3277 return None 3278 3279 type_token = self._prev.token_type 3280 3281 if type_token == TokenType.PSEUDO_TYPE: 3282 return self.expression(exp.PseudoType, this=self._prev.text) 3283 3284 if type_token == TokenType.OBJECT_IDENTIFIER: 3285 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3286 3287 nested = type_token in self.NESTED_TYPE_TOKENS 3288 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3289 expressions = None 3290 maybe_func = False 3291 3292 if self._match(TokenType.L_PAREN): 3293 if is_struct: 3294 expressions = self._parse_csv(self._parse_struct_types) 3295 elif nested: 3296 expressions = self._parse_csv( 3297 lambda: self._parse_types( 3298 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3299 ) 3300 ) 3301 elif type_token in self.ENUM_TYPE_TOKENS: 3302 expressions = self._parse_csv(self._parse_equality) 3303 else: 3304 expressions = self._parse_csv(self._parse_type_size) 3305 3306 if not expressions or not self._match(TokenType.R_PAREN): 3307 self._retreat(index) 3308 return None 3309 3310 maybe_func = True 3311 3312 this: t.Optional[exp.Expression] = None 3313 values: t.Optional[t.List[exp.Expression]] = None 3314 3315 if nested and self._match(TokenType.LT): 3316 if is_struct: 3317 expressions = self._parse_csv(self._parse_struct_types) 3318 else: 3319 expressions = self._parse_csv( 3320 lambda: self._parse_types( 3321 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3322 ) 3323 ) 3324 3325 if not self._match(TokenType.GT): 3326 self.raise_error("Expecting >") 3327 3328 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3329 values = self._parse_csv(self._parse_conjunction) 3330 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3331 3332 if type_token in self.TIMESTAMPS: 3333 if self._match_text_seq("WITH", "TIME", "ZONE"): 3334 maybe_func = False 3335 tz_type = ( 3336 exp.DataType.Type.TIMETZ 3337 if type_token in self.TIMES 3338 else exp.DataType.Type.TIMESTAMPTZ 3339 ) 3340 this = exp.DataType(this=tz_type, expressions=expressions) 3341 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3342 maybe_func = False 3343 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3344 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3345 maybe_func = False 3346 elif type_token == TokenType.INTERVAL: 3347 unit = self._parse_var() 3348 3349 if self._match_text_seq("TO"): 3350 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3351 else: 3352 span = None 3353 3354 if span or not unit: 3355 this = self.expression( 3356 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3357 ) 3358 else: 3359 this = self.expression(exp.Interval, unit=unit) 3360 3361 if maybe_func and check_func: 3362 index2 = self._index 3363 peek = self._parse_string() 3364 3365 if not peek: 3366 self._retreat(index) 3367 return None 3368 3369 self._retreat(index2) 3370 3371 if not this: 3372 if self._match_text_seq("UNSIGNED"): 3373 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3374 if not unsigned_type_token: 3375 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3376 3377 type_token = unsigned_type_token or type_token 3378 3379 this = exp.DataType( 3380 this=exp.DataType.Type[type_token.value], 3381 expressions=expressions, 3382 nested=nested, 3383 values=values, 3384 prefix=prefix, 3385 ) 3386 3387 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3388 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3389 3390 return this 3391 3392 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3393 this = self._parse_type() or self._parse_id_var() 3394 self._match(TokenType.COLON) 3395 return self._parse_column_def(this) 3396 3397 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3398 if not self._match_text_seq("AT", "TIME", "ZONE"): 3399 return this 3400 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3401 3402 def _parse_column(self) -> t.Optional[exp.Expression]: 3403 this = self._parse_field() 3404 if isinstance(this, exp.Identifier): 3405 this = self.expression(exp.Column, this=this) 3406 elif not this: 3407 return self._parse_bracket(this) 3408 return self._parse_column_ops(this) 3409 3410 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3411 this = self._parse_bracket(this) 3412 3413 while self._match_set(self.COLUMN_OPERATORS): 3414 op_token = self._prev.token_type 3415 op = self.COLUMN_OPERATORS.get(op_token) 3416 3417 if op_token == TokenType.DCOLON: 3418 field = self._parse_types() 3419 if not field: 3420 self.raise_error("Expected type") 3421 elif op and self._curr: 3422 self._advance() 3423 value = self._prev.text 3424 field = ( 3425 exp.Literal.number(value) 3426 if self._prev.token_type == TokenType.NUMBER 3427 else exp.Literal.string(value) 3428 ) 3429 else: 3430 field = self._parse_field(anonymous_func=True, any_token=True) 3431 3432 if isinstance(field, exp.Func): 3433 # bigquery allows function calls like x.y.count(...) 3434 # SAFE.SUBSTR(...) 3435 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3436 this = self._replace_columns_with_dots(this) 3437 3438 if op: 3439 this = op(self, this, field) 3440 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3441 this = self.expression( 3442 exp.Column, 3443 this=field, 3444 table=this.this, 3445 db=this.args.get("table"), 3446 catalog=this.args.get("db"), 3447 ) 3448 else: 3449 this = self.expression(exp.Dot, this=this, expression=field) 3450 this = self._parse_bracket(this) 3451 return this 3452 3453 def _parse_primary(self) -> t.Optional[exp.Expression]: 3454 if self._match_set(self.PRIMARY_PARSERS): 3455 token_type = self._prev.token_type 3456 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3457 3458 if token_type == TokenType.STRING: 3459 expressions = [primary] 3460 while self._match(TokenType.STRING): 3461 expressions.append(exp.Literal.string(self._prev.text)) 3462 3463 if len(expressions) > 1: 3464 return self.expression(exp.Concat, expressions=expressions) 3465 3466 return primary 3467 3468 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3469 return exp.Literal.number(f"0.{self._prev.text}") 3470 3471 if self._match(TokenType.L_PAREN): 3472 comments = self._prev_comments 3473 query = self._parse_select() 3474 3475 if query: 3476 expressions = [query] 3477 else: 3478 expressions = self._parse_expressions() 3479 3480 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3481 3482 if isinstance(this, exp.Subqueryable): 3483 this = self._parse_set_operations( 3484 self._parse_subquery(this=this, parse_alias=False) 3485 ) 3486 elif len(expressions) > 1: 3487 this = self.expression(exp.Tuple, expressions=expressions) 3488 else: 3489 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3490 3491 if this: 3492 this.add_comments(comments) 3493 3494 self._match_r_paren(expression=this) 3495 return this 3496 3497 return None 3498 3499 def _parse_field( 3500 self, 3501 any_token: bool = False, 3502 tokens: t.Optional[t.Collection[TokenType]] = None, 3503 anonymous_func: bool = False, 3504 ) -> t.Optional[exp.Expression]: 3505 return ( 3506 self._parse_primary() 3507 or self._parse_function(anonymous=anonymous_func) 3508 or self._parse_id_var(any_token=any_token, tokens=tokens) 3509 ) 3510 3511 def _parse_function( 3512 self, 3513 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3514 anonymous: bool = False, 3515 optional_parens: bool = True, 3516 ) -> t.Optional[exp.Expression]: 3517 if not self._curr: 3518 return None 3519 3520 token_type = self._curr.token_type 3521 this = self._curr.text 3522 upper = this.upper() 3523 3524 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3525 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3526 self._advance() 3527 return parser(self) 3528 3529 if not self._next or self._next.token_type != TokenType.L_PAREN: 3530 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3531 self._advance() 3532 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3533 3534 return None 3535 3536 if token_type not in self.FUNC_TOKENS: 3537 return None 3538 3539 self._advance(2) 3540 3541 parser = self.FUNCTION_PARSERS.get(upper) 3542 if parser and not anonymous: 3543 this = parser(self) 3544 else: 3545 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3546 3547 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3548 this = self.expression(subquery_predicate, this=self._parse_select()) 3549 self._match_r_paren() 3550 return this 3551 3552 if functions is None: 3553 functions = self.FUNCTIONS 3554 3555 function = functions.get(upper) 3556 3557 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3558 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3559 3560 if function and not anonymous: 3561 func = self.validate_expression(function(args), args) 3562 if not self.NORMALIZE_FUNCTIONS: 3563 func.meta["name"] = this 3564 this = func 3565 else: 3566 this = self.expression(exp.Anonymous, this=this, expressions=args) 3567 3568 self._match_r_paren(this) 3569 return self._parse_window(this) 3570 3571 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3572 return self._parse_column_def(self._parse_id_var()) 3573 3574 def _parse_user_defined_function( 3575 self, kind: t.Optional[TokenType] = None 3576 ) -> t.Optional[exp.Expression]: 3577 this = self._parse_id_var() 3578 3579 while self._match(TokenType.DOT): 3580 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3581 3582 if not self._match(TokenType.L_PAREN): 3583 return this 3584 3585 expressions = self._parse_csv(self._parse_function_parameter) 3586 self._match_r_paren() 3587 return self.expression( 3588 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3589 ) 3590 3591 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3592 literal = self._parse_primary() 3593 if literal: 3594 return self.expression(exp.Introducer, this=token.text, expression=literal) 3595 3596 return self.expression(exp.Identifier, this=token.text) 3597 3598 def _parse_session_parameter(self) -> exp.SessionParameter: 3599 kind = None 3600 this = self._parse_id_var() or self._parse_primary() 3601 3602 if this and self._match(TokenType.DOT): 3603 kind = this.name 3604 this = self._parse_var() or self._parse_primary() 3605 3606 return self.expression(exp.SessionParameter, this=this, kind=kind) 3607 3608 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3609 index = self._index 3610 3611 if self._match(TokenType.L_PAREN): 3612 expressions = t.cast( 3613 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3614 ) 3615 3616 if not self._match(TokenType.R_PAREN): 3617 self._retreat(index) 3618 else: 3619 expressions = [self._parse_id_var()] 3620 3621 if self._match_set(self.LAMBDAS): 3622 return self.LAMBDAS[self._prev.token_type](self, expressions) 3623 3624 self._retreat(index) 3625 3626 this: t.Optional[exp.Expression] 3627 3628 if self._match(TokenType.DISTINCT): 3629 this = self.expression( 3630 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3631 ) 3632 else: 3633 this = self._parse_select_or_expression(alias=alias) 3634 3635 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3636 3637 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3638 index = self._index 3639 3640 if not self.errors: 3641 try: 3642 if self._parse_select(nested=True): 3643 return this 3644 except ParseError: 3645 pass 3646 finally: 3647 self.errors.clear() 3648 self._retreat(index) 3649 3650 if not self._match(TokenType.L_PAREN): 3651 return this 3652 3653 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3654 3655 self._match_r_paren() 3656 return self.expression(exp.Schema, this=this, expressions=args) 3657 3658 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3659 return self._parse_column_def(self._parse_field(any_token=True)) 3660 3661 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3662 # column defs are not really columns, they're identifiers 3663 if isinstance(this, exp.Column): 3664 this = this.this 3665 3666 kind = self._parse_types(schema=True) 3667 3668 if self._match_text_seq("FOR", "ORDINALITY"): 3669 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3670 3671 constraints: t.List[exp.Expression] = [] 3672 3673 if not kind and self._match(TokenType.ALIAS): 3674 constraints.append( 3675 self.expression( 3676 exp.ComputedColumnConstraint, 3677 this=self._parse_conjunction(), 3678 persisted=self._match_text_seq("PERSISTED"), 3679 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3680 ) 3681 ) 3682 3683 while True: 3684 constraint = self._parse_column_constraint() 3685 if not constraint: 3686 break 3687 constraints.append(constraint) 3688 3689 if not kind and not constraints: 3690 return this 3691 3692 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3693 3694 def _parse_auto_increment( 3695 self, 3696 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3697 start = None 3698 increment = None 3699 3700 if self._match(TokenType.L_PAREN, advance=False): 3701 args = self._parse_wrapped_csv(self._parse_bitwise) 3702 start = seq_get(args, 0) 3703 increment = seq_get(args, 1) 3704 elif self._match_text_seq("START"): 3705 start = self._parse_bitwise() 3706 self._match_text_seq("INCREMENT") 3707 increment = self._parse_bitwise() 3708 3709 if start and increment: 3710 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3711 3712 return exp.AutoIncrementColumnConstraint() 3713 3714 def _parse_compress(self) -> exp.CompressColumnConstraint: 3715 if self._match(TokenType.L_PAREN, advance=False): 3716 return self.expression( 3717 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3718 ) 3719 3720 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3721 3722 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3723 if self._match_text_seq("BY", "DEFAULT"): 3724 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3725 this = self.expression( 3726 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3727 ) 3728 else: 3729 self._match_text_seq("ALWAYS") 3730 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3731 3732 self._match(TokenType.ALIAS) 3733 identity = self._match_text_seq("IDENTITY") 3734 3735 if self._match(TokenType.L_PAREN): 3736 if self._match(TokenType.START_WITH): 3737 this.set("start", self._parse_bitwise()) 3738 if self._match_text_seq("INCREMENT", "BY"): 3739 this.set("increment", self._parse_bitwise()) 3740 if self._match_text_seq("MINVALUE"): 3741 this.set("minvalue", self._parse_bitwise()) 3742 if self._match_text_seq("MAXVALUE"): 3743 this.set("maxvalue", self._parse_bitwise()) 3744 3745 if self._match_text_seq("CYCLE"): 3746 this.set("cycle", True) 3747 elif self._match_text_seq("NO", "CYCLE"): 3748 this.set("cycle", False) 3749 3750 if not identity: 3751 this.set("expression", self._parse_bitwise()) 3752 3753 self._match_r_paren() 3754 3755 return this 3756 3757 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3758 self._match_text_seq("LENGTH") 3759 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3760 3761 def _parse_not_constraint( 3762 self, 3763 ) -> t.Optional[exp.Expression]: 3764 if self._match_text_seq("NULL"): 3765 return self.expression(exp.NotNullColumnConstraint) 3766 if self._match_text_seq("CASESPECIFIC"): 3767 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3768 if self._match_text_seq("FOR", "REPLICATION"): 3769 return self.expression(exp.NotForReplicationColumnConstraint) 3770 return None 3771 3772 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3773 if self._match(TokenType.CONSTRAINT): 3774 this = self._parse_id_var() 3775 else: 3776 this = None 3777 3778 if self._match_texts(self.CONSTRAINT_PARSERS): 3779 return self.expression( 3780 exp.ColumnConstraint, 3781 this=this, 3782 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3783 ) 3784 3785 return this 3786 3787 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3788 if not self._match(TokenType.CONSTRAINT): 3789 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3790 3791 this = self._parse_id_var() 3792 expressions = [] 3793 3794 while True: 3795 constraint = self._parse_unnamed_constraint() or self._parse_function() 3796 if not constraint: 3797 break 3798 expressions.append(constraint) 3799 3800 return self.expression(exp.Constraint, this=this, expressions=expressions) 3801 3802 def _parse_unnamed_constraint( 3803 self, constraints: t.Optional[t.Collection[str]] = None 3804 ) -> t.Optional[exp.Expression]: 3805 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3806 return None 3807 3808 constraint = self._prev.text.upper() 3809 if constraint not in self.CONSTRAINT_PARSERS: 3810 self.raise_error(f"No parser found for schema constraint {constraint}.") 3811 3812 return self.CONSTRAINT_PARSERS[constraint](self) 3813 3814 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3815 self._match_text_seq("KEY") 3816 return self.expression( 3817 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3818 ) 3819 3820 def _parse_key_constraint_options(self) -> t.List[str]: 3821 options = [] 3822 while True: 3823 if not self._curr: 3824 break 3825 3826 if self._match(TokenType.ON): 3827 action = None 3828 on = self._advance_any() and self._prev.text 3829 3830 if self._match_text_seq("NO", "ACTION"): 3831 action = "NO ACTION" 3832 elif self._match_text_seq("CASCADE"): 3833 action = "CASCADE" 3834 elif self._match_pair(TokenType.SET, TokenType.NULL): 3835 action = "SET NULL" 3836 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3837 action = "SET DEFAULT" 3838 else: 3839 self.raise_error("Invalid key constraint") 3840 3841 options.append(f"ON {on} {action}") 3842 elif self._match_text_seq("NOT", "ENFORCED"): 3843 options.append("NOT ENFORCED") 3844 elif self._match_text_seq("DEFERRABLE"): 3845 options.append("DEFERRABLE") 3846 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3847 options.append("INITIALLY DEFERRED") 3848 elif self._match_text_seq("NORELY"): 3849 options.append("NORELY") 3850 elif self._match_text_seq("MATCH", "FULL"): 3851 options.append("MATCH FULL") 3852 else: 3853 break 3854 3855 return options 3856 3857 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3858 if match and not self._match(TokenType.REFERENCES): 3859 return None 3860 3861 expressions = None 3862 this = self._parse_table(schema=True) 3863 options = self._parse_key_constraint_options() 3864 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3865 3866 def _parse_foreign_key(self) -> exp.ForeignKey: 3867 expressions = self._parse_wrapped_id_vars() 3868 reference = self._parse_references() 3869 options = {} 3870 3871 while self._match(TokenType.ON): 3872 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3873 self.raise_error("Expected DELETE or UPDATE") 3874 3875 kind = self._prev.text.lower() 3876 3877 if self._match_text_seq("NO", "ACTION"): 3878 action = "NO ACTION" 3879 elif self._match(TokenType.SET): 3880 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3881 action = "SET " + self._prev.text.upper() 3882 else: 3883 self._advance() 3884 action = self._prev.text.upper() 3885 3886 options[kind] = action 3887 3888 return self.expression( 3889 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3890 ) 3891 3892 def _parse_primary_key( 3893 self, wrapped_optional: bool = False, in_props: bool = False 3894 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3895 desc = ( 3896 self._match_set((TokenType.ASC, TokenType.DESC)) 3897 and self._prev.token_type == TokenType.DESC 3898 ) 3899 3900 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3901 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3902 3903 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3904 options = self._parse_key_constraint_options() 3905 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3906 3907 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3908 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3909 return this 3910 3911 bracket_kind = self._prev.token_type 3912 3913 if self._match(TokenType.COLON): 3914 expressions: t.List[exp.Expression] = [ 3915 self.expression(exp.Slice, expression=self._parse_conjunction()) 3916 ] 3917 else: 3918 expressions = self._parse_csv( 3919 lambda: self._parse_slice( 3920 self._parse_alias(self._parse_conjunction(), explicit=True) 3921 ) 3922 ) 3923 3924 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3925 if bracket_kind == TokenType.L_BRACE: 3926 this = self.expression(exp.Struct, expressions=expressions) 3927 elif not this or this.name.upper() == "ARRAY": 3928 this = self.expression(exp.Array, expressions=expressions) 3929 else: 3930 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3931 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3932 3933 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3934 self.raise_error("Expected ]") 3935 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3936 self.raise_error("Expected }") 3937 3938 self._add_comments(this) 3939 return self._parse_bracket(this) 3940 3941 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3942 if self._match(TokenType.COLON): 3943 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3944 return this 3945 3946 def _parse_case(self) -> t.Optional[exp.Expression]: 3947 ifs = [] 3948 default = None 3949 3950 comments = self._prev_comments 3951 expression = self._parse_conjunction() 3952 3953 while self._match(TokenType.WHEN): 3954 this = self._parse_conjunction() 3955 self._match(TokenType.THEN) 3956 then = self._parse_conjunction() 3957 ifs.append(self.expression(exp.If, this=this, true=then)) 3958 3959 if self._match(TokenType.ELSE): 3960 default = self._parse_conjunction() 3961 3962 if not self._match(TokenType.END): 3963 self.raise_error("Expected END after CASE", self._prev) 3964 3965 return self._parse_window( 3966 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3967 ) 3968 3969 def _parse_if(self) -> t.Optional[exp.Expression]: 3970 if self._match(TokenType.L_PAREN): 3971 args = self._parse_csv(self._parse_conjunction) 3972 this = self.validate_expression(exp.If.from_arg_list(args), args) 3973 self._match_r_paren() 3974 else: 3975 index = self._index - 1 3976 condition = self._parse_conjunction() 3977 3978 if not condition: 3979 self._retreat(index) 3980 return None 3981 3982 self._match(TokenType.THEN) 3983 true = self._parse_conjunction() 3984 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3985 self._match(TokenType.END) 3986 this = self.expression(exp.If, this=condition, true=true, false=false) 3987 3988 return self._parse_window(this) 3989 3990 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3991 if not self._match_text_seq("VALUE", "FOR"): 3992 self._retreat(self._index - 1) 3993 return None 3994 3995 return self.expression( 3996 exp.NextValueFor, 3997 this=self._parse_column(), 3998 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3999 ) 4000 4001 def _parse_extract(self) -> exp.Extract: 4002 this = self._parse_function() or self._parse_var() or self._parse_type() 4003 4004 if self._match(TokenType.FROM): 4005 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4006 4007 if not self._match(TokenType.COMMA): 4008 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4009 4010 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4011 4012 def _parse_any_value(self) -> exp.AnyValue: 4013 this = self._parse_lambda() 4014 is_max = None 4015 having = None 4016 4017 if self._match(TokenType.HAVING): 4018 self._match_texts(("MAX", "MIN")) 4019 is_max = self._prev.text == "MAX" 4020 having = self._parse_column() 4021 4022 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4023 4024 def _parse_cast(self, strict: bool) -> exp.Expression: 4025 this = self._parse_conjunction() 4026 4027 if not self._match(TokenType.ALIAS): 4028 if self._match(TokenType.COMMA): 4029 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4030 4031 self.raise_error("Expected AS after CAST") 4032 4033 fmt = None 4034 to = self._parse_types() 4035 4036 if not to: 4037 self.raise_error("Expected TYPE after CAST") 4038 elif isinstance(to, exp.Identifier): 4039 to = exp.DataType.build(to.name, udt=True) 4040 elif to.this == exp.DataType.Type.CHAR: 4041 if self._match(TokenType.CHARACTER_SET): 4042 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4043 elif self._match(TokenType.FORMAT): 4044 fmt_string = self._parse_string() 4045 fmt = self._parse_at_time_zone(fmt_string) 4046 4047 if to.this in exp.DataType.TEMPORAL_TYPES: 4048 this = self.expression( 4049 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4050 this=this, 4051 format=exp.Literal.string( 4052 format_time( 4053 fmt_string.this if fmt_string else "", 4054 self.FORMAT_MAPPING or self.TIME_MAPPING, 4055 self.FORMAT_TRIE or self.TIME_TRIE, 4056 ) 4057 ), 4058 ) 4059 4060 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4061 this.set("zone", fmt.args["zone"]) 4062 4063 return this 4064 4065 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4066 4067 def _parse_concat(self) -> t.Optional[exp.Expression]: 4068 args = self._parse_csv(self._parse_conjunction) 4069 if self.CONCAT_NULL_OUTPUTS_STRING: 4070 args = [ 4071 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 4072 for arg in args 4073 if arg 4074 ] 4075 4076 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4077 # we find such a call we replace it with its argument. 4078 if len(args) == 1: 4079 return args[0] 4080 4081 return self.expression( 4082 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4083 ) 4084 4085 def _parse_string_agg(self) -> exp.Expression: 4086 if self._match(TokenType.DISTINCT): 4087 args: t.List[t.Optional[exp.Expression]] = [ 4088 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4089 ] 4090 if self._match(TokenType.COMMA): 4091 args.extend(self._parse_csv(self._parse_conjunction)) 4092 else: 4093 args = self._parse_csv(self._parse_conjunction) # type: ignore 4094 4095 index = self._index 4096 if not self._match(TokenType.R_PAREN) and args: 4097 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4098 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4099 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4100 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4101 4102 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4103 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4104 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4105 if not self._match_text_seq("WITHIN", "GROUP"): 4106 self._retreat(index) 4107 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4108 4109 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4110 order = self._parse_order(this=seq_get(args, 0)) 4111 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4112 4113 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4114 this = self._parse_bitwise() 4115 4116 if self._match(TokenType.USING): 4117 to: t.Optional[exp.Expression] = self.expression( 4118 exp.CharacterSet, this=self._parse_var() 4119 ) 4120 elif self._match(TokenType.COMMA): 4121 to = self._parse_types() 4122 else: 4123 to = None 4124 4125 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4126 4127 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4128 """ 4129 There are generally two variants of the DECODE function: 4130 4131 - DECODE(bin, charset) 4132 - DECODE(expression, search, result [, search, result] ... [, default]) 4133 4134 The second variant will always be parsed into a CASE expression. Note that NULL 4135 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4136 instead of relying on pattern matching. 4137 """ 4138 args = self._parse_csv(self._parse_conjunction) 4139 4140 if len(args) < 3: 4141 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4142 4143 expression, *expressions = args 4144 if not expression: 4145 return None 4146 4147 ifs = [] 4148 for search, result in zip(expressions[::2], expressions[1::2]): 4149 if not search or not result: 4150 return None 4151 4152 if isinstance(search, exp.Literal): 4153 ifs.append( 4154 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4155 ) 4156 elif isinstance(search, exp.Null): 4157 ifs.append( 4158 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4159 ) 4160 else: 4161 cond = exp.or_( 4162 exp.EQ(this=expression.copy(), expression=search), 4163 exp.and_( 4164 exp.Is(this=expression.copy(), expression=exp.Null()), 4165 exp.Is(this=search.copy(), expression=exp.Null()), 4166 copy=False, 4167 ), 4168 copy=False, 4169 ) 4170 ifs.append(exp.If(this=cond, true=result)) 4171 4172 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4173 4174 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4175 self._match_text_seq("KEY") 4176 key = self._parse_column() 4177 self._match_set((TokenType.COLON, TokenType.COMMA)) 4178 self._match_text_seq("VALUE") 4179 value = self._parse_bitwise() 4180 4181 if not key and not value: 4182 return None 4183 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4184 4185 def _parse_json_object(self) -> exp.JSONObject: 4186 star = self._parse_star() 4187 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4188 4189 null_handling = None 4190 if self._match_text_seq("NULL", "ON", "NULL"): 4191 null_handling = "NULL ON NULL" 4192 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4193 null_handling = "ABSENT ON NULL" 4194 4195 unique_keys = None 4196 if self._match_text_seq("WITH", "UNIQUE"): 4197 unique_keys = True 4198 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4199 unique_keys = False 4200 4201 self._match_text_seq("KEYS") 4202 4203 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4204 format_json = self._match_text_seq("FORMAT", "JSON") 4205 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4206 4207 return self.expression( 4208 exp.JSONObject, 4209 expressions=expressions, 4210 null_handling=null_handling, 4211 unique_keys=unique_keys, 4212 return_type=return_type, 4213 format_json=format_json, 4214 encoding=encoding, 4215 ) 4216 4217 def _parse_logarithm(self) -> exp.Func: 4218 # Default argument order is base, expression 4219 args = self._parse_csv(self._parse_range) 4220 4221 if len(args) > 1: 4222 if not self.LOG_BASE_FIRST: 4223 args.reverse() 4224 return exp.Log.from_arg_list(args) 4225 4226 return self.expression( 4227 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4228 ) 4229 4230 def _parse_match_against(self) -> exp.MatchAgainst: 4231 expressions = self._parse_csv(self._parse_column) 4232 4233 self._match_text_seq(")", "AGAINST", "(") 4234 4235 this = self._parse_string() 4236 4237 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4238 modifier = "IN NATURAL LANGUAGE MODE" 4239 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4240 modifier = f"{modifier} WITH QUERY EXPANSION" 4241 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4242 modifier = "IN BOOLEAN MODE" 4243 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4244 modifier = "WITH QUERY EXPANSION" 4245 else: 4246 modifier = None 4247 4248 return self.expression( 4249 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4250 ) 4251 4252 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4253 def _parse_open_json(self) -> exp.OpenJSON: 4254 this = self._parse_bitwise() 4255 path = self._match(TokenType.COMMA) and self._parse_string() 4256 4257 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4258 this = self._parse_field(any_token=True) 4259 kind = self._parse_types() 4260 path = self._parse_string() 4261 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4262 4263 return self.expression( 4264 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4265 ) 4266 4267 expressions = None 4268 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4269 self._match_l_paren() 4270 expressions = self._parse_csv(_parse_open_json_column_def) 4271 4272 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4273 4274 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4275 args = self._parse_csv(self._parse_bitwise) 4276 4277 if self._match(TokenType.IN): 4278 return self.expression( 4279 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4280 ) 4281 4282 if haystack_first: 4283 haystack = seq_get(args, 0) 4284 needle = seq_get(args, 1) 4285 else: 4286 needle = seq_get(args, 0) 4287 haystack = seq_get(args, 1) 4288 4289 return self.expression( 4290 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4291 ) 4292 4293 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4294 args = self._parse_csv(self._parse_table) 4295 return exp.JoinHint(this=func_name.upper(), expressions=args) 4296 4297 def _parse_substring(self) -> exp.Substring: 4298 # Postgres supports the form: substring(string [from int] [for int]) 4299 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4300 4301 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4302 4303 if self._match(TokenType.FROM): 4304 args.append(self._parse_bitwise()) 4305 if self._match(TokenType.FOR): 4306 args.append(self._parse_bitwise()) 4307 4308 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4309 4310 def _parse_trim(self) -> exp.Trim: 4311 # https://www.w3resource.com/sql/character-functions/trim.php 4312 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4313 4314 position = None 4315 collation = None 4316 4317 if self._match_texts(self.TRIM_TYPES): 4318 position = self._prev.text.upper() 4319 4320 expression = self._parse_bitwise() 4321 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4322 this = self._parse_bitwise() 4323 else: 4324 this = expression 4325 expression = None 4326 4327 if self._match(TokenType.COLLATE): 4328 collation = self._parse_bitwise() 4329 4330 return self.expression( 4331 exp.Trim, this=this, position=position, expression=expression, collation=collation 4332 ) 4333 4334 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4335 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4336 4337 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4338 return self._parse_window(self._parse_id_var(), alias=True) 4339 4340 def _parse_respect_or_ignore_nulls( 4341 self, this: t.Optional[exp.Expression] 4342 ) -> t.Optional[exp.Expression]: 4343 if self._match_text_seq("IGNORE", "NULLS"): 4344 return self.expression(exp.IgnoreNulls, this=this) 4345 if self._match_text_seq("RESPECT", "NULLS"): 4346 return self.expression(exp.RespectNulls, this=this) 4347 return this 4348 4349 def _parse_window( 4350 self, this: t.Optional[exp.Expression], alias: bool = False 4351 ) -> t.Optional[exp.Expression]: 4352 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4353 self._match(TokenType.WHERE) 4354 this = self.expression( 4355 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4356 ) 4357 self._match_r_paren() 4358 4359 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4360 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4361 if self._match_text_seq("WITHIN", "GROUP"): 4362 order = self._parse_wrapped(self._parse_order) 4363 this = self.expression(exp.WithinGroup, this=this, expression=order) 4364 4365 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4366 # Some dialects choose to implement and some do not. 4367 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4368 4369 # There is some code above in _parse_lambda that handles 4370 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4371 4372 # The below changes handle 4373 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4374 4375 # Oracle allows both formats 4376 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4377 # and Snowflake chose to do the same for familiarity 4378 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4379 this = self._parse_respect_or_ignore_nulls(this) 4380 4381 # bigquery select from window x AS (partition by ...) 4382 if alias: 4383 over = None 4384 self._match(TokenType.ALIAS) 4385 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4386 return this 4387 else: 4388 over = self._prev.text.upper() 4389 4390 if not self._match(TokenType.L_PAREN): 4391 return self.expression( 4392 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4393 ) 4394 4395 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4396 4397 first = self._match(TokenType.FIRST) 4398 if self._match_text_seq("LAST"): 4399 first = False 4400 4401 partition, order = self._parse_partition_and_order() 4402 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4403 4404 if kind: 4405 self._match(TokenType.BETWEEN) 4406 start = self._parse_window_spec() 4407 self._match(TokenType.AND) 4408 end = self._parse_window_spec() 4409 4410 spec = self.expression( 4411 exp.WindowSpec, 4412 kind=kind, 4413 start=start["value"], 4414 start_side=start["side"], 4415 end=end["value"], 4416 end_side=end["side"], 4417 ) 4418 else: 4419 spec = None 4420 4421 self._match_r_paren() 4422 4423 window = self.expression( 4424 exp.Window, 4425 this=this, 4426 partition_by=partition, 4427 order=order, 4428 spec=spec, 4429 alias=window_alias, 4430 over=over, 4431 first=first, 4432 ) 4433 4434 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4435 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4436 return self._parse_window(window, alias=alias) 4437 4438 return window 4439 4440 def _parse_partition_and_order( 4441 self, 4442 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4443 return self._parse_partition_by(), self._parse_order() 4444 4445 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4446 self._match(TokenType.BETWEEN) 4447 4448 return { 4449 "value": ( 4450 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4451 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4452 or self._parse_bitwise() 4453 ), 4454 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4455 } 4456 4457 def _parse_alias( 4458 self, this: t.Optional[exp.Expression], explicit: bool = False 4459 ) -> t.Optional[exp.Expression]: 4460 any_token = self._match(TokenType.ALIAS) 4461 4462 if explicit and not any_token: 4463 return this 4464 4465 if self._match(TokenType.L_PAREN): 4466 aliases = self.expression( 4467 exp.Aliases, 4468 this=this, 4469 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4470 ) 4471 self._match_r_paren(aliases) 4472 return aliases 4473 4474 alias = self._parse_id_var(any_token) 4475 4476 if alias: 4477 return self.expression(exp.Alias, this=this, alias=alias) 4478 4479 return this 4480 4481 def _parse_id_var( 4482 self, 4483 any_token: bool = True, 4484 tokens: t.Optional[t.Collection[TokenType]] = None, 4485 ) -> t.Optional[exp.Expression]: 4486 identifier = self._parse_identifier() 4487 4488 if identifier: 4489 return identifier 4490 4491 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4492 quoted = self._prev.token_type == TokenType.STRING 4493 return exp.Identifier(this=self._prev.text, quoted=quoted) 4494 4495 return None 4496 4497 def _parse_string(self) -> t.Optional[exp.Expression]: 4498 if self._match(TokenType.STRING): 4499 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4500 return self._parse_placeholder() 4501 4502 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4503 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4504 4505 def _parse_number(self) -> t.Optional[exp.Expression]: 4506 if self._match(TokenType.NUMBER): 4507 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4508 return self._parse_placeholder() 4509 4510 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4511 if self._match(TokenType.IDENTIFIER): 4512 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4513 return self._parse_placeholder() 4514 4515 def _parse_var( 4516 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4517 ) -> t.Optional[exp.Expression]: 4518 if ( 4519 (any_token and self._advance_any()) 4520 or self._match(TokenType.VAR) 4521 or (self._match_set(tokens) if tokens else False) 4522 ): 4523 return self.expression(exp.Var, this=self._prev.text) 4524 return self._parse_placeholder() 4525 4526 def _advance_any(self) -> t.Optional[Token]: 4527 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4528 self._advance() 4529 return self._prev 4530 return None 4531 4532 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4533 return self._parse_var() or self._parse_string() 4534 4535 def _parse_null(self) -> t.Optional[exp.Expression]: 4536 if self._match(TokenType.NULL): 4537 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4538 return self._parse_placeholder() 4539 4540 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4541 if self._match(TokenType.TRUE): 4542 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4543 if self._match(TokenType.FALSE): 4544 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4545 return self._parse_placeholder() 4546 4547 def _parse_star(self) -> t.Optional[exp.Expression]: 4548 if self._match(TokenType.STAR): 4549 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4550 return self._parse_placeholder() 4551 4552 def _parse_parameter(self) -> exp.Parameter: 4553 wrapped = self._match(TokenType.L_BRACE) 4554 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4555 self._match(TokenType.R_BRACE) 4556 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4557 4558 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4559 if self._match_set(self.PLACEHOLDER_PARSERS): 4560 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4561 if placeholder: 4562 return placeholder 4563 self._advance(-1) 4564 return None 4565 4566 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4567 if not self._match(TokenType.EXCEPT): 4568 return None 4569 if self._match(TokenType.L_PAREN, advance=False): 4570 return self._parse_wrapped_csv(self._parse_column) 4571 return self._parse_csv(self._parse_column) 4572 4573 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4574 if not self._match(TokenType.REPLACE): 4575 return None 4576 if self._match(TokenType.L_PAREN, advance=False): 4577 return self._parse_wrapped_csv(self._parse_expression) 4578 return self._parse_expressions() 4579 4580 def _parse_csv( 4581 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4582 ) -> t.List[exp.Expression]: 4583 parse_result = parse_method() 4584 items = [parse_result] if parse_result is not None else [] 4585 4586 while self._match(sep): 4587 self._add_comments(parse_result) 4588 parse_result = parse_method() 4589 if parse_result is not None: 4590 items.append(parse_result) 4591 4592 return items 4593 4594 def _parse_tokens( 4595 self, parse_method: t.Callable, expressions: t.Dict 4596 ) -> t.Optional[exp.Expression]: 4597 this = parse_method() 4598 4599 while self._match_set(expressions): 4600 this = self.expression( 4601 expressions[self._prev.token_type], 4602 this=this, 4603 comments=self._prev_comments, 4604 expression=parse_method(), 4605 ) 4606 4607 return this 4608 4609 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4610 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4611 4612 def _parse_wrapped_csv( 4613 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4614 ) -> t.List[exp.Expression]: 4615 return self._parse_wrapped( 4616 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4617 ) 4618 4619 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4620 wrapped = self._match(TokenType.L_PAREN) 4621 if not wrapped and not optional: 4622 self.raise_error("Expecting (") 4623 parse_result = parse_method() 4624 if wrapped: 4625 self._match_r_paren() 4626 return parse_result 4627 4628 def _parse_expressions(self) -> t.List[exp.Expression]: 4629 return self._parse_csv(self._parse_expression) 4630 4631 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4632 return self._parse_select() or self._parse_set_operations( 4633 self._parse_expression() if alias else self._parse_conjunction() 4634 ) 4635 4636 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4637 return self._parse_query_modifiers( 4638 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4639 ) 4640 4641 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4642 this = None 4643 if self._match_texts(self.TRANSACTION_KIND): 4644 this = self._prev.text 4645 4646 self._match_texts({"TRANSACTION", "WORK"}) 4647 4648 modes = [] 4649 while True: 4650 mode = [] 4651 while self._match(TokenType.VAR): 4652 mode.append(self._prev.text) 4653 4654 if mode: 4655 modes.append(" ".join(mode)) 4656 if not self._match(TokenType.COMMA): 4657 break 4658 4659 return self.expression(exp.Transaction, this=this, modes=modes) 4660 4661 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4662 chain = None 4663 savepoint = None 4664 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4665 4666 self._match_texts({"TRANSACTION", "WORK"}) 4667 4668 if self._match_text_seq("TO"): 4669 self._match_text_seq("SAVEPOINT") 4670 savepoint = self._parse_id_var() 4671 4672 if self._match(TokenType.AND): 4673 chain = not self._match_text_seq("NO") 4674 self._match_text_seq("CHAIN") 4675 4676 if is_rollback: 4677 return self.expression(exp.Rollback, savepoint=savepoint) 4678 4679 return self.expression(exp.Commit, chain=chain) 4680 4681 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4682 if not self._match_text_seq("ADD"): 4683 return None 4684 4685 self._match(TokenType.COLUMN) 4686 exists_column = self._parse_exists(not_=True) 4687 expression = self._parse_field_def() 4688 4689 if expression: 4690 expression.set("exists", exists_column) 4691 4692 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4693 if self._match_texts(("FIRST", "AFTER")): 4694 position = self._prev.text 4695 column_position = self.expression( 4696 exp.ColumnPosition, this=self._parse_column(), position=position 4697 ) 4698 expression.set("position", column_position) 4699 4700 return expression 4701 4702 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4703 drop = self._match(TokenType.DROP) and self._parse_drop() 4704 if drop and not isinstance(drop, exp.Command): 4705 drop.set("kind", drop.args.get("kind", "COLUMN")) 4706 return drop 4707 4708 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4709 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4710 return self.expression( 4711 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4712 ) 4713 4714 def _parse_add_constraint(self) -> exp.AddConstraint: 4715 this = None 4716 kind = self._prev.token_type 4717 4718 if kind == TokenType.CONSTRAINT: 4719 this = self._parse_id_var() 4720 4721 if self._match_text_seq("CHECK"): 4722 expression = self._parse_wrapped(self._parse_conjunction) 4723 enforced = self._match_text_seq("ENFORCED") 4724 4725 return self.expression( 4726 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4727 ) 4728 4729 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4730 expression = self._parse_foreign_key() 4731 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4732 expression = self._parse_primary_key() 4733 else: 4734 expression = None 4735 4736 return self.expression(exp.AddConstraint, this=this, expression=expression) 4737 4738 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4739 index = self._index - 1 4740 4741 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4742 return self._parse_csv(self._parse_add_constraint) 4743 4744 self._retreat(index) 4745 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4746 return self._parse_csv(self._parse_field_def) 4747 4748 return self._parse_csv(self._parse_add_column) 4749 4750 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4751 self._match(TokenType.COLUMN) 4752 column = self._parse_field(any_token=True) 4753 4754 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4755 return self.expression(exp.AlterColumn, this=column, drop=True) 4756 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4757 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4758 4759 self._match_text_seq("SET", "DATA") 4760 return self.expression( 4761 exp.AlterColumn, 4762 this=column, 4763 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4764 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4765 using=self._match(TokenType.USING) and self._parse_conjunction(), 4766 ) 4767 4768 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4769 index = self._index - 1 4770 4771 partition_exists = self._parse_exists() 4772 if self._match(TokenType.PARTITION, advance=False): 4773 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4774 4775 self._retreat(index) 4776 return self._parse_csv(self._parse_drop_column) 4777 4778 def _parse_alter_table_rename(self) -> exp.RenameTable: 4779 self._match_text_seq("TO") 4780 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4781 4782 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4783 start = self._prev 4784 4785 if not self._match(TokenType.TABLE): 4786 return self._parse_as_command(start) 4787 4788 exists = self._parse_exists() 4789 only = self._match_text_seq("ONLY") 4790 this = self._parse_table(schema=True) 4791 4792 if self._next: 4793 self._advance() 4794 4795 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4796 if parser: 4797 actions = ensure_list(parser(self)) 4798 4799 if not self._curr: 4800 return self.expression( 4801 exp.AlterTable, 4802 this=this, 4803 exists=exists, 4804 actions=actions, 4805 only=only, 4806 ) 4807 4808 return self._parse_as_command(start) 4809 4810 def _parse_merge(self) -> exp.Merge: 4811 self._match(TokenType.INTO) 4812 target = self._parse_table() 4813 4814 if target and self._match(TokenType.ALIAS, advance=False): 4815 target.set("alias", self._parse_table_alias()) 4816 4817 self._match(TokenType.USING) 4818 using = self._parse_table() 4819 4820 self._match(TokenType.ON) 4821 on = self._parse_conjunction() 4822 4823 whens = [] 4824 while self._match(TokenType.WHEN): 4825 matched = not self._match(TokenType.NOT) 4826 self._match_text_seq("MATCHED") 4827 source = ( 4828 False 4829 if self._match_text_seq("BY", "TARGET") 4830 else self._match_text_seq("BY", "SOURCE") 4831 ) 4832 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4833 4834 self._match(TokenType.THEN) 4835 4836 if self._match(TokenType.INSERT): 4837 _this = self._parse_star() 4838 if _this: 4839 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4840 else: 4841 then = self.expression( 4842 exp.Insert, 4843 this=self._parse_value(), 4844 expression=self._match(TokenType.VALUES) and self._parse_value(), 4845 ) 4846 elif self._match(TokenType.UPDATE): 4847 expressions = self._parse_star() 4848 if expressions: 4849 then = self.expression(exp.Update, expressions=expressions) 4850 else: 4851 then = self.expression( 4852 exp.Update, 4853 expressions=self._match(TokenType.SET) 4854 and self._parse_csv(self._parse_equality), 4855 ) 4856 elif self._match(TokenType.DELETE): 4857 then = self.expression(exp.Var, this=self._prev.text) 4858 else: 4859 then = None 4860 4861 whens.append( 4862 self.expression( 4863 exp.When, 4864 matched=matched, 4865 source=source, 4866 condition=condition, 4867 then=then, 4868 ) 4869 ) 4870 4871 return self.expression( 4872 exp.Merge, 4873 this=target, 4874 using=using, 4875 on=on, 4876 expressions=whens, 4877 ) 4878 4879 def _parse_show(self) -> t.Optional[exp.Expression]: 4880 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4881 if parser: 4882 return parser(self) 4883 return self._parse_as_command(self._prev) 4884 4885 def _parse_set_item_assignment( 4886 self, kind: t.Optional[str] = None 4887 ) -> t.Optional[exp.Expression]: 4888 index = self._index 4889 4890 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4891 return self._parse_set_transaction(global_=kind == "GLOBAL") 4892 4893 left = self._parse_primary() or self._parse_id_var() 4894 4895 if not self._match_texts(("=", "TO")): 4896 self._retreat(index) 4897 return None 4898 4899 right = self._parse_statement() or self._parse_id_var() 4900 this = self.expression(exp.EQ, this=left, expression=right) 4901 4902 return self.expression(exp.SetItem, this=this, kind=kind) 4903 4904 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4905 self._match_text_seq("TRANSACTION") 4906 characteristics = self._parse_csv( 4907 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4908 ) 4909 return self.expression( 4910 exp.SetItem, 4911 expressions=characteristics, 4912 kind="TRANSACTION", 4913 **{"global": global_}, # type: ignore 4914 ) 4915 4916 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4917 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4918 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4919 4920 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4921 index = self._index 4922 set_ = self.expression( 4923 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4924 ) 4925 4926 if self._curr: 4927 self._retreat(index) 4928 return self._parse_as_command(self._prev) 4929 4930 return set_ 4931 4932 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4933 for option in options: 4934 if self._match_text_seq(*option.split(" ")): 4935 return exp.var(option) 4936 return None 4937 4938 def _parse_as_command(self, start: Token) -> exp.Command: 4939 while self._curr: 4940 self._advance() 4941 text = self._find_sql(start, self._prev) 4942 size = len(start.text) 4943 return exp.Command(this=text[:size], expression=text[size:]) 4944 4945 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4946 settings = [] 4947 4948 self._match_l_paren() 4949 kind = self._parse_id_var() 4950 4951 if self._match(TokenType.L_PAREN): 4952 while True: 4953 key = self._parse_id_var() 4954 value = self._parse_primary() 4955 4956 if not key and value is None: 4957 break 4958 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4959 self._match(TokenType.R_PAREN) 4960 4961 self._match_r_paren() 4962 4963 return self.expression( 4964 exp.DictProperty, 4965 this=this, 4966 kind=kind.this if kind else None, 4967 settings=settings, 4968 ) 4969 4970 def _parse_dict_range(self, this: str) -> exp.DictRange: 4971 self._match_l_paren() 4972 has_min = self._match_text_seq("MIN") 4973 if has_min: 4974 min = self._parse_var() or self._parse_primary() 4975 self._match_text_seq("MAX") 4976 max = self._parse_var() or self._parse_primary() 4977 else: 4978 max = self._parse_var() or self._parse_primary() 4979 min = exp.Literal.number(0) 4980 self._match_r_paren() 4981 return self.expression(exp.DictRange, this=this, min=min, max=max) 4982 4983 def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: 4984 expression = self._parse_column() 4985 self._match(TokenType.IN) 4986 iterator = self._parse_column() 4987 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 4988 return self.expression( 4989 exp.Comprehension, 4990 this=this, 4991 expression=expression, 4992 iterator=iterator, 4993 condition=condition, 4994 ) 4995 4996 def _find_parser( 4997 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4998 ) -> t.Optional[t.Callable]: 4999 if not self._curr: 5000 return None 5001 5002 index = self._index 5003 this = [] 5004 while True: 5005 # The current token might be multiple words 5006 curr = self._curr.text.upper() 5007 key = curr.split(" ") 5008 this.append(curr) 5009 5010 self._advance() 5011 result, trie = in_trie(trie, key) 5012 if result == TrieResult.FAILED: 5013 break 5014 5015 if result == TrieResult.EXISTS: 5016 subparser = parsers[" ".join(this)] 5017 return subparser 5018 5019 self._retreat(index) 5020 return None 5021 5022 def _match(self, token_type, advance=True, expression=None): 5023 if not self._curr: 5024 return None 5025 5026 if self._curr.token_type == token_type: 5027 if advance: 5028 self._advance() 5029 self._add_comments(expression) 5030 return True 5031 5032 return None 5033 5034 def _match_set(self, types, advance=True): 5035 if not self._curr: 5036 return None 5037 5038 if self._curr.token_type in types: 5039 if advance: 5040 self._advance() 5041 return True 5042 5043 return None 5044 5045 def _match_pair(self, token_type_a, token_type_b, advance=True): 5046 if not self._curr or not self._next: 5047 return None 5048 5049 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5050 if advance: 5051 self._advance(2) 5052 return True 5053 5054 return None 5055 5056 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5057 if not self._match(TokenType.L_PAREN, expression=expression): 5058 self.raise_error("Expecting (") 5059 5060 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5061 if not self._match(TokenType.R_PAREN, expression=expression): 5062 self.raise_error("Expecting )") 5063 5064 def _match_texts(self, texts, advance=True): 5065 if self._curr and self._curr.text.upper() in texts: 5066 if advance: 5067 self._advance() 5068 return True 5069 return False 5070 5071 def _match_text_seq(self, *texts, advance=True): 5072 index = self._index 5073 for text in texts: 5074 if self._curr and self._curr.text.upper() == text: 5075 self._advance() 5076 else: 5077 self._retreat(index) 5078 return False 5079 5080 if not advance: 5081 self._retreat(index) 5082 5083 return True 5084 5085 @t.overload 5086 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5087 ... 5088 5089 @t.overload 5090 def _replace_columns_with_dots( 5091 self, this: t.Optional[exp.Expression] 5092 ) -> t.Optional[exp.Expression]: 5093 ... 5094 5095 def _replace_columns_with_dots(self, this): 5096 if isinstance(this, exp.Dot): 5097 exp.replace_children(this, self._replace_columns_with_dots) 5098 elif isinstance(this, exp.Column): 5099 exp.replace_children(this, self._replace_columns_with_dots) 5100 table = this.args.get("table") 5101 this = ( 5102 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5103 ) 5104 5105 return this 5106 5107 def _replace_lambda( 5108 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5109 ) -> t.Optional[exp.Expression]: 5110 if not node: 5111 return node 5112 5113 for column in node.find_all(exp.Column): 5114 if column.parts[0].name in lambda_variables: 5115 dot_or_id = column.to_dot() if column.table else column.this 5116 parent = column.parent 5117 5118 while isinstance(parent, exp.Dot): 5119 if not isinstance(parent.parent, exp.Dot): 5120 parent.replace(dot_or_id) 5121 break 5122 parent = parent.parent 5123 else: 5124 if column is node: 5125 node = dot_or_id 5126 else: 5127 column.replace(dot_or_id) 5128 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
902 def __init__( 903 self, 904 error_level: t.Optional[ErrorLevel] = None, 905 error_message_context: int = 100, 906 max_errors: int = 3, 907 ): 908 self.error_level = error_level or ErrorLevel.IMMEDIATE 909 self.error_message_context = error_message_context 910 self.max_errors = max_errors 911 self._tokenizer = self.TOKENIZER_CLASS() 912 self.reset()
924 def parse( 925 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 926 ) -> t.List[t.Optional[exp.Expression]]: 927 """ 928 Parses a list of tokens and returns a list of syntax trees, one tree 929 per parsed SQL statement. 930 931 Args: 932 raw_tokens: The list of tokens. 933 sql: The original SQL string, used to produce helpful debug messages. 934 935 Returns: 936 The list of the produced syntax trees. 937 """ 938 return self._parse( 939 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 940 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
942 def parse_into( 943 self, 944 expression_types: exp.IntoType, 945 raw_tokens: t.List[Token], 946 sql: t.Optional[str] = None, 947 ) -> t.List[t.Optional[exp.Expression]]: 948 """ 949 Parses a list of tokens into a given Expression type. If a collection of Expression 950 types is given instead, this method will try to parse the token list into each one 951 of them, stopping at the first for which the parsing succeeds. 952 953 Args: 954 expression_types: The expression type(s) to try and parse the token list into. 955 raw_tokens: The list of tokens. 956 sql: The original SQL string, used to produce helpful debug messages. 957 958 Returns: 959 The target Expression. 960 """ 961 errors = [] 962 for expression_type in ensure_list(expression_types): 963 parser = self.EXPRESSION_PARSERS.get(expression_type) 964 if not parser: 965 raise TypeError(f"No parser registered for {expression_type}") 966 967 try: 968 return self._parse(parser, raw_tokens, sql) 969 except ParseError as e: 970 e.errors[0]["into_expression"] = expression_type 971 errors.append(e) 972 973 raise ParseError( 974 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 975 errors=merge_errors(errors), 976 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1013 def check_errors(self) -> None: 1014 """Logs or raises any found errors, depending on the chosen error level setting.""" 1015 if self.error_level == ErrorLevel.WARN: 1016 for error in self.errors: 1017 logger.error(str(error)) 1018 elif self.error_level == ErrorLevel.RAISE and self.errors: 1019 raise ParseError( 1020 concat_messages(self.errors, self.max_errors), 1021 errors=merge_errors(self.errors), 1022 )
Logs or raises any found errors, depending on the chosen error level setting.
1024 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1025 """ 1026 Appends an error in the list of recorded errors or raises it, depending on the chosen 1027 error level setting. 1028 """ 1029 token = token or self._curr or self._prev or Token.string("") 1030 start = token.start 1031 end = token.end + 1 1032 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1033 highlight = self.sql[start:end] 1034 end_context = self.sql[end : end + self.error_message_context] 1035 1036 error = ParseError.new( 1037 f"{message}. Line {token.line}, Col: {token.col}.\n" 1038 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1039 description=message, 1040 line=token.line, 1041 col=token.col, 1042 start_context=start_context, 1043 highlight=highlight, 1044 end_context=end_context, 1045 ) 1046 1047 if self.error_level == ErrorLevel.IMMEDIATE: 1048 raise error 1049 1050 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1052 def expression( 1053 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1054 ) -> E: 1055 """ 1056 Creates a new, validated Expression. 1057 1058 Args: 1059 exp_class: The expression class to instantiate. 1060 comments: An optional list of comments to attach to the expression. 1061 kwargs: The arguments to set for the expression along with their respective values. 1062 1063 Returns: 1064 The target expression. 1065 """ 1066 instance = exp_class(**kwargs) 1067 instance.add_comments(comments) if comments else self._add_comments(instance) 1068 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1075 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1076 """ 1077 Validates an Expression, making sure that all its mandatory arguments are set. 1078 1079 Args: 1080 expression: The expression to validate. 1081 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1082 1083 Returns: 1084 The validated expression. 1085 """ 1086 if self.error_level != ErrorLevel.IGNORE: 1087 for error_message in expression.error_messages(args): 1088 self.raise_error(error_message) 1089 1090 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.