sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.BIGDECIMAL, 182 TokenType.UUID, 183 TokenType.GEOGRAPHY, 184 TokenType.GEOMETRY, 185 TokenType.HLLSKETCH, 186 TokenType.HSTORE, 187 TokenType.PSEUDO_TYPE, 188 TokenType.SUPER, 189 TokenType.SERIAL, 190 TokenType.SMALLSERIAL, 191 TokenType.BIGSERIAL, 192 TokenType.XML, 193 TokenType.YEAR, 194 TokenType.UNIQUEIDENTIFIER, 195 TokenType.USERDEFINED, 196 TokenType.MONEY, 197 TokenType.SMALLMONEY, 198 TokenType.ROWVERSION, 199 TokenType.IMAGE, 200 TokenType.VARIANT, 201 TokenType.OBJECT, 202 TokenType.OBJECT_IDENTIFIER, 203 TokenType.INET, 204 TokenType.IPADDRESS, 205 TokenType.IPPREFIX, 206 TokenType.UNKNOWN, 207 TokenType.NULL, 208 *ENUM_TYPE_TOKENS, 209 *NESTED_TYPE_TOKENS, 210 } 211 212 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 213 TokenType.BIGINT: TokenType.UBIGINT, 214 TokenType.INT: TokenType.UINT, 215 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 216 TokenType.SMALLINT: TokenType.USMALLINT, 217 TokenType.TINYINT: TokenType.UTINYINT, 218 } 219 220 SUBQUERY_PREDICATES = { 221 TokenType.ANY: exp.Any, 222 TokenType.ALL: exp.All, 223 TokenType.EXISTS: exp.Exists, 224 TokenType.SOME: exp.Any, 225 } 226 227 RESERVED_KEYWORDS = { 228 *Tokenizer.SINGLE_TOKENS.values(), 229 TokenType.SELECT, 230 } 231 232 DB_CREATABLES = { 233 TokenType.DATABASE, 234 TokenType.SCHEMA, 235 TokenType.TABLE, 236 TokenType.VIEW, 237 TokenType.DICTIONARY, 238 } 239 240 CREATABLES = { 241 TokenType.COLUMN, 242 TokenType.FUNCTION, 243 TokenType.INDEX, 244 TokenType.PROCEDURE, 245 *DB_CREATABLES, 246 } 247 248 # Tokens that can represent identifiers 249 ID_VAR_TOKENS = { 250 TokenType.VAR, 251 TokenType.ANTI, 252 TokenType.APPLY, 253 TokenType.ASC, 254 TokenType.AUTO_INCREMENT, 255 TokenType.BEGIN, 256 TokenType.CACHE, 257 TokenType.CASE, 258 TokenType.COLLATE, 259 TokenType.COMMAND, 260 TokenType.COMMENT, 261 TokenType.COMMIT, 262 TokenType.CONSTRAINT, 263 TokenType.DEFAULT, 264 TokenType.DELETE, 265 TokenType.DESC, 266 TokenType.DESCRIBE, 267 TokenType.DICTIONARY, 268 TokenType.DIV, 269 TokenType.END, 270 TokenType.EXECUTE, 271 TokenType.ESCAPE, 272 TokenType.FALSE, 273 TokenType.FIRST, 274 TokenType.FILTER, 275 TokenType.FORMAT, 276 TokenType.FULL, 277 TokenType.IS, 278 TokenType.ISNULL, 279 TokenType.INTERVAL, 280 TokenType.KEEP, 281 TokenType.LEFT, 282 TokenType.LOAD, 283 TokenType.MERGE, 284 TokenType.NATURAL, 285 TokenType.NEXT, 286 TokenType.OFFSET, 287 TokenType.ORDINALITY, 288 TokenType.OVERWRITE, 289 TokenType.PARTITION, 290 TokenType.PERCENT, 291 TokenType.PIVOT, 292 TokenType.PRAGMA, 293 TokenType.RANGE, 294 TokenType.REFERENCES, 295 TokenType.RIGHT, 296 TokenType.ROW, 297 TokenType.ROWS, 298 TokenType.SEMI, 299 TokenType.SET, 300 TokenType.SETTINGS, 301 TokenType.SHOW, 302 TokenType.TEMPORARY, 303 TokenType.TOP, 304 TokenType.TRUE, 305 TokenType.UNIQUE, 306 TokenType.UNPIVOT, 307 TokenType.UPDATE, 308 TokenType.VOLATILE, 309 TokenType.WINDOW, 310 *CREATABLES, 311 *SUBQUERY_PREDICATES, 312 *TYPE_TOKENS, 313 *NO_PAREN_FUNCTIONS, 314 } 315 316 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 317 318 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 319 TokenType.APPLY, 320 TokenType.ASOF, 321 TokenType.FULL, 322 TokenType.LEFT, 323 TokenType.LOCK, 324 TokenType.NATURAL, 325 TokenType.OFFSET, 326 TokenType.RIGHT, 327 TokenType.WINDOW, 328 } 329 330 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 331 332 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 333 334 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 335 336 FUNC_TOKENS = { 337 TokenType.COMMAND, 338 TokenType.CURRENT_DATE, 339 TokenType.CURRENT_DATETIME, 340 TokenType.CURRENT_TIMESTAMP, 341 TokenType.CURRENT_TIME, 342 TokenType.CURRENT_USER, 343 TokenType.FILTER, 344 TokenType.FIRST, 345 TokenType.FORMAT, 346 TokenType.GLOB, 347 TokenType.IDENTIFIER, 348 TokenType.INDEX, 349 TokenType.ISNULL, 350 TokenType.ILIKE, 351 TokenType.INSERT, 352 TokenType.LIKE, 353 TokenType.MERGE, 354 TokenType.OFFSET, 355 TokenType.PRIMARY_KEY, 356 TokenType.RANGE, 357 TokenType.REPLACE, 358 TokenType.RLIKE, 359 TokenType.ROW, 360 TokenType.UNNEST, 361 TokenType.VAR, 362 TokenType.LEFT, 363 TokenType.RIGHT, 364 TokenType.DATE, 365 TokenType.DATETIME, 366 TokenType.TABLE, 367 TokenType.TIMESTAMP, 368 TokenType.TIMESTAMPTZ, 369 TokenType.WINDOW, 370 TokenType.XOR, 371 *TYPE_TOKENS, 372 *SUBQUERY_PREDICATES, 373 } 374 375 CONJUNCTION = { 376 TokenType.AND: exp.And, 377 TokenType.OR: exp.Or, 378 } 379 380 EQUALITY = { 381 TokenType.EQ: exp.EQ, 382 TokenType.NEQ: exp.NEQ, 383 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 384 } 385 386 COMPARISON = { 387 TokenType.GT: exp.GT, 388 TokenType.GTE: exp.GTE, 389 TokenType.LT: exp.LT, 390 TokenType.LTE: exp.LTE, 391 } 392 393 BITWISE = { 394 TokenType.AMP: exp.BitwiseAnd, 395 TokenType.CARET: exp.BitwiseXor, 396 TokenType.PIPE: exp.BitwiseOr, 397 TokenType.DPIPE: exp.DPipe, 398 } 399 400 TERM = { 401 TokenType.DASH: exp.Sub, 402 TokenType.PLUS: exp.Add, 403 TokenType.MOD: exp.Mod, 404 TokenType.COLLATE: exp.Collate, 405 } 406 407 FACTOR = { 408 TokenType.DIV: exp.IntDiv, 409 TokenType.LR_ARROW: exp.Distance, 410 TokenType.SLASH: exp.Div, 411 TokenType.STAR: exp.Mul, 412 } 413 414 TIMES = { 415 TokenType.TIME, 416 TokenType.TIMETZ, 417 } 418 419 TIMESTAMPS = { 420 TokenType.TIMESTAMP, 421 TokenType.TIMESTAMPTZ, 422 TokenType.TIMESTAMPLTZ, 423 *TIMES, 424 } 425 426 SET_OPERATIONS = { 427 TokenType.UNION, 428 TokenType.INTERSECT, 429 TokenType.EXCEPT, 430 } 431 432 JOIN_METHODS = { 433 TokenType.NATURAL, 434 TokenType.ASOF, 435 } 436 437 JOIN_SIDES = { 438 TokenType.LEFT, 439 TokenType.RIGHT, 440 TokenType.FULL, 441 } 442 443 JOIN_KINDS = { 444 TokenType.INNER, 445 TokenType.OUTER, 446 TokenType.CROSS, 447 TokenType.SEMI, 448 TokenType.ANTI, 449 } 450 451 JOIN_HINTS: t.Set[str] = set() 452 453 LAMBDAS = { 454 TokenType.ARROW: lambda self, expressions: self.expression( 455 exp.Lambda, 456 this=self._replace_lambda( 457 self._parse_conjunction(), 458 {node.name for node in expressions}, 459 ), 460 expressions=expressions, 461 ), 462 TokenType.FARROW: lambda self, expressions: self.expression( 463 exp.Kwarg, 464 this=exp.var(expressions[0].name), 465 expression=self._parse_conjunction(), 466 ), 467 } 468 469 COLUMN_OPERATORS = { 470 TokenType.DOT: None, 471 TokenType.DCOLON: lambda self, this, to: self.expression( 472 exp.Cast if self.STRICT_CAST else exp.TryCast, 473 this=this, 474 to=to, 475 ), 476 TokenType.ARROW: lambda self, this, path: self.expression( 477 exp.JSONExtract, 478 this=this, 479 expression=path, 480 ), 481 TokenType.DARROW: lambda self, this, path: self.expression( 482 exp.JSONExtractScalar, 483 this=this, 484 expression=path, 485 ), 486 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 487 exp.JSONBExtract, 488 this=this, 489 expression=path, 490 ), 491 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 492 exp.JSONBExtractScalar, 493 this=this, 494 expression=path, 495 ), 496 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 497 exp.JSONBContains, 498 this=this, 499 expression=key, 500 ), 501 } 502 503 EXPRESSION_PARSERS = { 504 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 505 exp.Column: lambda self: self._parse_column(), 506 exp.Condition: lambda self: self._parse_conjunction(), 507 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 508 exp.Expression: lambda self: self._parse_statement(), 509 exp.From: lambda self: self._parse_from(), 510 exp.Group: lambda self: self._parse_group(), 511 exp.Having: lambda self: self._parse_having(), 512 exp.Identifier: lambda self: self._parse_id_var(), 513 exp.Join: lambda self: self._parse_join(), 514 exp.Lambda: lambda self: self._parse_lambda(), 515 exp.Lateral: lambda self: self._parse_lateral(), 516 exp.Limit: lambda self: self._parse_limit(), 517 exp.Offset: lambda self: self._parse_offset(), 518 exp.Order: lambda self: self._parse_order(), 519 exp.Ordered: lambda self: self._parse_ordered(), 520 exp.Properties: lambda self: self._parse_properties(), 521 exp.Qualify: lambda self: self._parse_qualify(), 522 exp.Returning: lambda self: self._parse_returning(), 523 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 524 exp.Table: lambda self: self._parse_table_parts(), 525 exp.TableAlias: lambda self: self._parse_table_alias(), 526 exp.Where: lambda self: self._parse_where(), 527 exp.Window: lambda self: self._parse_named_window(), 528 exp.With: lambda self: self._parse_with(), 529 "JOIN_TYPE": lambda self: self._parse_join_parts(), 530 } 531 532 STATEMENT_PARSERS = { 533 TokenType.ALTER: lambda self: self._parse_alter(), 534 TokenType.BEGIN: lambda self: self._parse_transaction(), 535 TokenType.CACHE: lambda self: self._parse_cache(), 536 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 537 TokenType.COMMENT: lambda self: self._parse_comment(), 538 TokenType.CREATE: lambda self: self._parse_create(), 539 TokenType.DELETE: lambda self: self._parse_delete(), 540 TokenType.DESC: lambda self: self._parse_describe(), 541 TokenType.DESCRIBE: lambda self: self._parse_describe(), 542 TokenType.DROP: lambda self: self._parse_drop(), 543 TokenType.INSERT: lambda self: self._parse_insert(), 544 TokenType.LOAD: lambda self: self._parse_load(), 545 TokenType.MERGE: lambda self: self._parse_merge(), 546 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 547 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 548 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 549 TokenType.SET: lambda self: self._parse_set(), 550 TokenType.UNCACHE: lambda self: self._parse_uncache(), 551 TokenType.UPDATE: lambda self: self._parse_update(), 552 TokenType.USE: lambda self: self.expression( 553 exp.Use, 554 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 555 and exp.var(self._prev.text), 556 this=self._parse_table(schema=False), 557 ), 558 } 559 560 UNARY_PARSERS = { 561 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 562 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 563 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 564 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 565 } 566 567 PRIMARY_PARSERS = { 568 TokenType.STRING: lambda self, token: self.expression( 569 exp.Literal, this=token.text, is_string=True 570 ), 571 TokenType.NUMBER: lambda self, token: self.expression( 572 exp.Literal, this=token.text, is_string=False 573 ), 574 TokenType.STAR: lambda self, _: self.expression( 575 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 576 ), 577 TokenType.NULL: lambda self, _: self.expression(exp.Null), 578 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 579 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 580 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 581 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 582 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 583 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 584 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 585 exp.National, this=token.text 586 ), 587 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 588 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 589 } 590 591 PLACEHOLDER_PARSERS = { 592 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 593 TokenType.PARAMETER: lambda self: self._parse_parameter(), 594 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 595 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 596 else None, 597 } 598 599 RANGE_PARSERS = { 600 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 601 TokenType.GLOB: binary_range_parser(exp.Glob), 602 TokenType.ILIKE: binary_range_parser(exp.ILike), 603 TokenType.IN: lambda self, this: self._parse_in(this), 604 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 605 TokenType.IS: lambda self, this: self._parse_is(this), 606 TokenType.LIKE: binary_range_parser(exp.Like), 607 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 608 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 609 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 610 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 611 } 612 613 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 614 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 615 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 616 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 617 "CHARACTER SET": lambda self: self._parse_character_set(), 618 "CHECKSUM": lambda self: self._parse_checksum(), 619 "CLUSTER BY": lambda self: self._parse_cluster(), 620 "CLUSTERED": lambda self: self._parse_clustered_by(), 621 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 622 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 623 "COPY": lambda self: self._parse_copy_property(), 624 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 625 "DEFINER": lambda self: self._parse_definer(), 626 "DETERMINISTIC": lambda self: self.expression( 627 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 628 ), 629 "DISTKEY": lambda self: self._parse_distkey(), 630 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 631 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 632 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 633 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 634 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 635 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 636 "FREESPACE": lambda self: self._parse_freespace(), 637 "HEAP": lambda self: self.expression(exp.HeapProperty), 638 "IMMUTABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 640 ), 641 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 642 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 643 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 644 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 645 "LIKE": lambda self: self._parse_create_like(), 646 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 647 "LOCK": lambda self: self._parse_locking(), 648 "LOCKING": lambda self: self._parse_locking(), 649 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 650 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 651 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 652 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 653 "NO": lambda self: self._parse_no_property(), 654 "ON": lambda self: self._parse_on_property(), 655 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 656 "PARTITION BY": lambda self: self._parse_partitioned_by(), 657 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 658 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 659 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 660 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 661 "RETURNS": lambda self: self._parse_returns(), 662 "ROW": lambda self: self._parse_row(), 663 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 664 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 665 "SETTINGS": lambda self: self.expression( 666 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 667 ), 668 "SORTKEY": lambda self: self._parse_sortkey(), 669 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 670 "STABLE": lambda self: self.expression( 671 exp.StabilityProperty, this=exp.Literal.string("STABLE") 672 ), 673 "STORED": lambda self: self._parse_stored(), 674 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 675 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 676 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 677 "TO": lambda self: self._parse_to_table(), 678 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 679 "TTL": lambda self: self._parse_ttl(), 680 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 681 "VOLATILE": lambda self: self._parse_volatile_property(), 682 "WITH": lambda self: self._parse_with_property(), 683 } 684 685 CONSTRAINT_PARSERS = { 686 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 687 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 688 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 689 "CHARACTER SET": lambda self: self.expression( 690 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "CHECK": lambda self: self.expression( 693 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 694 ), 695 "COLLATE": lambda self: self.expression( 696 exp.CollateColumnConstraint, this=self._parse_var() 697 ), 698 "COMMENT": lambda self: self.expression( 699 exp.CommentColumnConstraint, this=self._parse_string() 700 ), 701 "COMPRESS": lambda self: self._parse_compress(), 702 "CLUSTERED": lambda self: self.expression( 703 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 704 ), 705 "NONCLUSTERED": lambda self: self.expression( 706 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 707 ), 708 "DEFAULT": lambda self: self.expression( 709 exp.DefaultColumnConstraint, this=self._parse_bitwise() 710 ), 711 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 712 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 713 "FORMAT": lambda self: self.expression( 714 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 715 ), 716 "GENERATED": lambda self: self._parse_generated_as_identity(), 717 "IDENTITY": lambda self: self._parse_auto_increment(), 718 "INLINE": lambda self: self._parse_inline(), 719 "LIKE": lambda self: self._parse_create_like(), 720 "NOT": lambda self: self._parse_not_constraint(), 721 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 722 "ON": lambda self: ( 723 self._match(TokenType.UPDATE) 724 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 725 ) 726 or self.expression(exp.OnProperty, this=self._parse_id_var()), 727 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 728 "PRIMARY KEY": lambda self: self._parse_primary_key(), 729 "REFERENCES": lambda self: self._parse_references(match=False), 730 "TITLE": lambda self: self.expression( 731 exp.TitleColumnConstraint, this=self._parse_var_or_string() 732 ), 733 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 734 "UNIQUE": lambda self: self._parse_unique(), 735 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 736 "WITH": lambda self: self.expression( 737 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 738 ), 739 } 740 741 ALTER_PARSERS = { 742 "ADD": lambda self: self._parse_alter_table_add(), 743 "ALTER": lambda self: self._parse_alter_table_alter(), 744 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 745 "DROP": lambda self: self._parse_alter_table_drop(), 746 "RENAME": lambda self: self._parse_alter_table_rename(), 747 } 748 749 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 750 751 NO_PAREN_FUNCTION_PARSERS = { 752 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 753 "CASE": lambda self: self._parse_case(), 754 "IF": lambda self: self._parse_if(), 755 "NEXT": lambda self: self._parse_next_value_for(), 756 } 757 758 INVALID_FUNC_NAME_TOKENS = { 759 TokenType.IDENTIFIER, 760 TokenType.STRING, 761 } 762 763 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 764 765 FUNCTION_PARSERS = { 766 "ANY_VALUE": lambda self: self._parse_any_value(), 767 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 768 "CONCAT": lambda self: self._parse_concat(), 769 "CONCAT_WS": lambda self: self._parse_concat_ws(), 770 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 771 "DECODE": lambda self: self._parse_decode(), 772 "EXTRACT": lambda self: self._parse_extract(), 773 "JSON_OBJECT": lambda self: self._parse_json_object(), 774 "LOG": lambda self: self._parse_logarithm(), 775 "MATCH": lambda self: self._parse_match_against(), 776 "OPENJSON": lambda self: self._parse_open_json(), 777 "POSITION": lambda self: self._parse_position(), 778 "SAFE_CAST": lambda self: self._parse_cast(False), 779 "STRING_AGG": lambda self: self._parse_string_agg(), 780 "SUBSTRING": lambda self: self._parse_substring(), 781 "TRIM": lambda self: self._parse_trim(), 782 "TRY_CAST": lambda self: self._parse_cast(False), 783 "TRY_CONVERT": lambda self: self._parse_convert(False), 784 } 785 786 QUERY_MODIFIER_PARSERS = { 787 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 788 TokenType.WHERE: lambda self: ("where", self._parse_where()), 789 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 790 TokenType.HAVING: lambda self: ("having", self._parse_having()), 791 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 792 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 793 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 794 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 795 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 796 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 797 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 798 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 799 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 800 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 801 TokenType.CLUSTER_BY: lambda self: ( 802 "cluster", 803 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 804 ), 805 TokenType.DISTRIBUTE_BY: lambda self: ( 806 "distribute", 807 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 808 ), 809 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 810 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 811 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 812 } 813 814 SET_PARSERS = { 815 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 816 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 817 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 818 "TRANSACTION": lambda self: self._parse_set_transaction(), 819 } 820 821 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 822 823 TYPE_LITERAL_PARSERS = { 824 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 825 } 826 827 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 828 829 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 830 831 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 832 833 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 834 TRANSACTION_CHARACTERISTICS = { 835 "ISOLATION LEVEL REPEATABLE READ", 836 "ISOLATION LEVEL READ COMMITTED", 837 "ISOLATION LEVEL READ UNCOMMITTED", 838 "ISOLATION LEVEL SERIALIZABLE", 839 "READ WRITE", 840 "READ ONLY", 841 } 842 843 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 844 845 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 846 847 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 848 849 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 850 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 851 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 852 853 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 854 855 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 856 857 DISTINCT_TOKENS = {TokenType.DISTINCT} 858 859 STRICT_CAST = True 860 861 # A NULL arg in CONCAT yields NULL by default 862 CONCAT_NULL_OUTPUTS_STRING = False 863 864 PREFIXED_PIVOT_COLUMNS = False 865 IDENTIFY_PIVOT_STRINGS = False 866 867 LOG_BASE_FIRST = True 868 LOG_DEFAULTS_TO_LN = False 869 870 # Whether or not ADD is present for each column added by ALTER TABLE 871 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 872 873 # Whether or not the table sample clause expects CSV syntax 874 TABLESAMPLE_CSV = False 875 876 __slots__ = ( 877 "error_level", 878 "error_message_context", 879 "max_errors", 880 "sql", 881 "errors", 882 "_tokens", 883 "_index", 884 "_curr", 885 "_next", 886 "_prev", 887 "_prev_comments", 888 "_tokenizer", 889 ) 890 891 # Autofilled 892 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 893 INDEX_OFFSET: int = 0 894 UNNEST_COLUMN_ONLY: bool = False 895 ALIAS_POST_TABLESAMPLE: bool = False 896 STRICT_STRING_CONCAT = False 897 SUPPORTS_USER_DEFINED_TYPES = True 898 NORMALIZE_FUNCTIONS = "upper" 899 NULL_ORDERING: str = "nulls_are_small" 900 SHOW_TRIE: t.Dict = {} 901 SET_TRIE: t.Dict = {} 902 FORMAT_MAPPING: t.Dict[str, str] = {} 903 FORMAT_TRIE: t.Dict = {} 904 TIME_MAPPING: t.Dict[str, str] = {} 905 TIME_TRIE: t.Dict = {} 906 907 def __init__( 908 self, 909 error_level: t.Optional[ErrorLevel] = None, 910 error_message_context: int = 100, 911 max_errors: int = 3, 912 ): 913 self.error_level = error_level or ErrorLevel.IMMEDIATE 914 self.error_message_context = error_message_context 915 self.max_errors = max_errors 916 self._tokenizer = self.TOKENIZER_CLASS() 917 self.reset() 918 919 def reset(self): 920 self.sql = "" 921 self.errors = [] 922 self._tokens = [] 923 self._index = 0 924 self._curr = None 925 self._next = None 926 self._prev = None 927 self._prev_comments = None 928 929 def parse( 930 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 931 ) -> t.List[t.Optional[exp.Expression]]: 932 """ 933 Parses a list of tokens and returns a list of syntax trees, one tree 934 per parsed SQL statement. 935 936 Args: 937 raw_tokens: The list of tokens. 938 sql: The original SQL string, used to produce helpful debug messages. 939 940 Returns: 941 The list of the produced syntax trees. 942 """ 943 return self._parse( 944 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 945 ) 946 947 def parse_into( 948 self, 949 expression_types: exp.IntoType, 950 raw_tokens: t.List[Token], 951 sql: t.Optional[str] = None, 952 ) -> t.List[t.Optional[exp.Expression]]: 953 """ 954 Parses a list of tokens into a given Expression type. If a collection of Expression 955 types is given instead, this method will try to parse the token list into each one 956 of them, stopping at the first for which the parsing succeeds. 957 958 Args: 959 expression_types: The expression type(s) to try and parse the token list into. 960 raw_tokens: The list of tokens. 961 sql: The original SQL string, used to produce helpful debug messages. 962 963 Returns: 964 The target Expression. 965 """ 966 errors = [] 967 for expression_type in ensure_list(expression_types): 968 parser = self.EXPRESSION_PARSERS.get(expression_type) 969 if not parser: 970 raise TypeError(f"No parser registered for {expression_type}") 971 972 try: 973 return self._parse(parser, raw_tokens, sql) 974 except ParseError as e: 975 e.errors[0]["into_expression"] = expression_type 976 errors.append(e) 977 978 raise ParseError( 979 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 980 errors=merge_errors(errors), 981 ) from errors[-1] 982 983 def _parse( 984 self, 985 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 986 raw_tokens: t.List[Token], 987 sql: t.Optional[str] = None, 988 ) -> t.List[t.Optional[exp.Expression]]: 989 self.reset() 990 self.sql = sql or "" 991 992 total = len(raw_tokens) 993 chunks: t.List[t.List[Token]] = [[]] 994 995 for i, token in enumerate(raw_tokens): 996 if token.token_type == TokenType.SEMICOLON: 997 if i < total - 1: 998 chunks.append([]) 999 else: 1000 chunks[-1].append(token) 1001 1002 expressions = [] 1003 1004 for tokens in chunks: 1005 self._index = -1 1006 self._tokens = tokens 1007 self._advance() 1008 1009 expressions.append(parse_method(self)) 1010 1011 if self._index < len(self._tokens): 1012 self.raise_error("Invalid expression / Unexpected token") 1013 1014 self.check_errors() 1015 1016 return expressions 1017 1018 def check_errors(self) -> None: 1019 """Logs or raises any found errors, depending on the chosen error level setting.""" 1020 if self.error_level == ErrorLevel.WARN: 1021 for error in self.errors: 1022 logger.error(str(error)) 1023 elif self.error_level == ErrorLevel.RAISE and self.errors: 1024 raise ParseError( 1025 concat_messages(self.errors, self.max_errors), 1026 errors=merge_errors(self.errors), 1027 ) 1028 1029 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1030 """ 1031 Appends an error in the list of recorded errors or raises it, depending on the chosen 1032 error level setting. 1033 """ 1034 token = token or self._curr or self._prev or Token.string("") 1035 start = token.start 1036 end = token.end + 1 1037 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1038 highlight = self.sql[start:end] 1039 end_context = self.sql[end : end + self.error_message_context] 1040 1041 error = ParseError.new( 1042 f"{message}. Line {token.line}, Col: {token.col}.\n" 1043 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1044 description=message, 1045 line=token.line, 1046 col=token.col, 1047 start_context=start_context, 1048 highlight=highlight, 1049 end_context=end_context, 1050 ) 1051 1052 if self.error_level == ErrorLevel.IMMEDIATE: 1053 raise error 1054 1055 self.errors.append(error) 1056 1057 def expression( 1058 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1059 ) -> E: 1060 """ 1061 Creates a new, validated Expression. 1062 1063 Args: 1064 exp_class: The expression class to instantiate. 1065 comments: An optional list of comments to attach to the expression. 1066 kwargs: The arguments to set for the expression along with their respective values. 1067 1068 Returns: 1069 The target expression. 1070 """ 1071 instance = exp_class(**kwargs) 1072 instance.add_comments(comments) if comments else self._add_comments(instance) 1073 return self.validate_expression(instance) 1074 1075 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1076 if expression and self._prev_comments: 1077 expression.add_comments(self._prev_comments) 1078 self._prev_comments = None 1079 1080 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1081 """ 1082 Validates an Expression, making sure that all its mandatory arguments are set. 1083 1084 Args: 1085 expression: The expression to validate. 1086 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1087 1088 Returns: 1089 The validated expression. 1090 """ 1091 if self.error_level != ErrorLevel.IGNORE: 1092 for error_message in expression.error_messages(args): 1093 self.raise_error(error_message) 1094 1095 return expression 1096 1097 def _find_sql(self, start: Token, end: Token) -> str: 1098 return self.sql[start.start : end.end + 1] 1099 1100 def _advance(self, times: int = 1) -> None: 1101 self._index += times 1102 self._curr = seq_get(self._tokens, self._index) 1103 self._next = seq_get(self._tokens, self._index + 1) 1104 1105 if self._index > 0: 1106 self._prev = self._tokens[self._index - 1] 1107 self._prev_comments = self._prev.comments 1108 else: 1109 self._prev = None 1110 self._prev_comments = None 1111 1112 def _retreat(self, index: int) -> None: 1113 if index != self._index: 1114 self._advance(index - self._index) 1115 1116 def _parse_command(self) -> exp.Command: 1117 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1118 1119 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1120 start = self._prev 1121 exists = self._parse_exists() if allow_exists else None 1122 1123 self._match(TokenType.ON) 1124 1125 kind = self._match_set(self.CREATABLES) and self._prev 1126 if not kind: 1127 return self._parse_as_command(start) 1128 1129 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1130 this = self._parse_user_defined_function(kind=kind.token_type) 1131 elif kind.token_type == TokenType.TABLE: 1132 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1133 elif kind.token_type == TokenType.COLUMN: 1134 this = self._parse_column() 1135 else: 1136 this = self._parse_id_var() 1137 1138 self._match(TokenType.IS) 1139 1140 return self.expression( 1141 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1142 ) 1143 1144 def _parse_to_table( 1145 self, 1146 ) -> exp.ToTableProperty: 1147 table = self._parse_table_parts(schema=True) 1148 return self.expression(exp.ToTableProperty, this=table) 1149 1150 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1151 def _parse_ttl(self) -> exp.Expression: 1152 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1153 this = self._parse_bitwise() 1154 1155 if self._match_text_seq("DELETE"): 1156 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1157 if self._match_text_seq("RECOMPRESS"): 1158 return self.expression( 1159 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1160 ) 1161 if self._match_text_seq("TO", "DISK"): 1162 return self.expression( 1163 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1164 ) 1165 if self._match_text_seq("TO", "VOLUME"): 1166 return self.expression( 1167 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1168 ) 1169 1170 return this 1171 1172 expressions = self._parse_csv(_parse_ttl_action) 1173 where = self._parse_where() 1174 group = self._parse_group() 1175 1176 aggregates = None 1177 if group and self._match(TokenType.SET): 1178 aggregates = self._parse_csv(self._parse_set_item) 1179 1180 return self.expression( 1181 exp.MergeTreeTTL, 1182 expressions=expressions, 1183 where=where, 1184 group=group, 1185 aggregates=aggregates, 1186 ) 1187 1188 def _parse_statement(self) -> t.Optional[exp.Expression]: 1189 if self._curr is None: 1190 return None 1191 1192 if self._match_set(self.STATEMENT_PARSERS): 1193 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1194 1195 if self._match_set(Tokenizer.COMMANDS): 1196 return self._parse_command() 1197 1198 expression = self._parse_expression() 1199 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1200 return self._parse_query_modifiers(expression) 1201 1202 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1203 start = self._prev 1204 temporary = self._match(TokenType.TEMPORARY) 1205 materialized = self._match_text_seq("MATERIALIZED") 1206 1207 kind = self._match_set(self.CREATABLES) and self._prev.text 1208 if not kind: 1209 return self._parse_as_command(start) 1210 1211 return self.expression( 1212 exp.Drop, 1213 comments=start.comments, 1214 exists=exists or self._parse_exists(), 1215 this=self._parse_table(schema=True), 1216 kind=kind, 1217 temporary=temporary, 1218 materialized=materialized, 1219 cascade=self._match_text_seq("CASCADE"), 1220 constraints=self._match_text_seq("CONSTRAINTS"), 1221 purge=self._match_text_seq("PURGE"), 1222 ) 1223 1224 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1225 return ( 1226 self._match_text_seq("IF") 1227 and (not not_ or self._match(TokenType.NOT)) 1228 and self._match(TokenType.EXISTS) 1229 ) 1230 1231 def _parse_create(self) -> exp.Create | exp.Command: 1232 # Note: this can't be None because we've matched a statement parser 1233 start = self._prev 1234 comments = self._prev_comments 1235 1236 replace = start.text.upper() == "REPLACE" or self._match_pair( 1237 TokenType.OR, TokenType.REPLACE 1238 ) 1239 unique = self._match(TokenType.UNIQUE) 1240 1241 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1242 self._advance() 1243 1244 properties = None 1245 create_token = self._match_set(self.CREATABLES) and self._prev 1246 1247 if not create_token: 1248 # exp.Properties.Location.POST_CREATE 1249 properties = self._parse_properties() 1250 create_token = self._match_set(self.CREATABLES) and self._prev 1251 1252 if not properties or not create_token: 1253 return self._parse_as_command(start) 1254 1255 exists = self._parse_exists(not_=True) 1256 this = None 1257 expression: t.Optional[exp.Expression] = None 1258 indexes = None 1259 no_schema_binding = None 1260 begin = None 1261 clone = None 1262 1263 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1264 nonlocal properties 1265 if properties and temp_props: 1266 properties.expressions.extend(temp_props.expressions) 1267 elif temp_props: 1268 properties = temp_props 1269 1270 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1271 this = self._parse_user_defined_function(kind=create_token.token_type) 1272 1273 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1274 extend_props(self._parse_properties()) 1275 1276 self._match(TokenType.ALIAS) 1277 1278 if self._match(TokenType.COMMAND): 1279 expression = self._parse_as_command(self._prev) 1280 else: 1281 begin = self._match(TokenType.BEGIN) 1282 return_ = self._match_text_seq("RETURN") 1283 expression = self._parse_statement() 1284 1285 if return_: 1286 expression = self.expression(exp.Return, this=expression) 1287 elif create_token.token_type == TokenType.INDEX: 1288 this = self._parse_index(index=self._parse_id_var()) 1289 elif create_token.token_type in self.DB_CREATABLES: 1290 table_parts = self._parse_table_parts(schema=True) 1291 1292 # exp.Properties.Location.POST_NAME 1293 self._match(TokenType.COMMA) 1294 extend_props(self._parse_properties(before=True)) 1295 1296 this = self._parse_schema(this=table_parts) 1297 1298 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1299 extend_props(self._parse_properties()) 1300 1301 self._match(TokenType.ALIAS) 1302 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1303 # exp.Properties.Location.POST_ALIAS 1304 extend_props(self._parse_properties()) 1305 1306 expression = self._parse_ddl_select() 1307 1308 if create_token.token_type == TokenType.TABLE: 1309 # exp.Properties.Location.POST_EXPRESSION 1310 extend_props(self._parse_properties()) 1311 1312 indexes = [] 1313 while True: 1314 index = self._parse_index() 1315 1316 # exp.Properties.Location.POST_INDEX 1317 extend_props(self._parse_properties()) 1318 1319 if not index: 1320 break 1321 else: 1322 self._match(TokenType.COMMA) 1323 indexes.append(index) 1324 elif create_token.token_type == TokenType.VIEW: 1325 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1326 no_schema_binding = True 1327 1328 shallow = self._match_text_seq("SHALLOW") 1329 1330 if self._match_text_seq("CLONE"): 1331 clone = self._parse_table(schema=True) 1332 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1333 clone_kind = ( 1334 self._match(TokenType.L_PAREN) 1335 and self._match_texts(self.CLONE_KINDS) 1336 and self._prev.text.upper() 1337 ) 1338 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1339 self._match(TokenType.R_PAREN) 1340 clone = self.expression( 1341 exp.Clone, 1342 this=clone, 1343 when=when, 1344 kind=clone_kind, 1345 shallow=shallow, 1346 expression=clone_expression, 1347 ) 1348 1349 return self.expression( 1350 exp.Create, 1351 comments=comments, 1352 this=this, 1353 kind=create_token.text, 1354 replace=replace, 1355 unique=unique, 1356 expression=expression, 1357 exists=exists, 1358 properties=properties, 1359 indexes=indexes, 1360 no_schema_binding=no_schema_binding, 1361 begin=begin, 1362 clone=clone, 1363 ) 1364 1365 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1366 # only used for teradata currently 1367 self._match(TokenType.COMMA) 1368 1369 kwargs = { 1370 "no": self._match_text_seq("NO"), 1371 "dual": self._match_text_seq("DUAL"), 1372 "before": self._match_text_seq("BEFORE"), 1373 "default": self._match_text_seq("DEFAULT"), 1374 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1375 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1376 "after": self._match_text_seq("AFTER"), 1377 "minimum": self._match_texts(("MIN", "MINIMUM")), 1378 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1379 } 1380 1381 if self._match_texts(self.PROPERTY_PARSERS): 1382 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1383 try: 1384 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1385 except TypeError: 1386 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1387 1388 return None 1389 1390 def _parse_property(self) -> t.Optional[exp.Expression]: 1391 if self._match_texts(self.PROPERTY_PARSERS): 1392 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1393 1394 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1395 return self._parse_character_set(default=True) 1396 1397 if self._match_text_seq("COMPOUND", "SORTKEY"): 1398 return self._parse_sortkey(compound=True) 1399 1400 if self._match_text_seq("SQL", "SECURITY"): 1401 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1402 1403 assignment = self._match_pair( 1404 TokenType.VAR, TokenType.EQ, advance=False 1405 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1406 1407 if assignment: 1408 key = self._parse_var_or_string() 1409 self._match(TokenType.EQ) 1410 return self.expression( 1411 exp.Property, 1412 this=key, 1413 value=self._parse_column() or self._parse_var(any_token=True), 1414 ) 1415 1416 return None 1417 1418 def _parse_stored(self) -> exp.FileFormatProperty: 1419 self._match(TokenType.ALIAS) 1420 1421 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1422 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1423 1424 return self.expression( 1425 exp.FileFormatProperty, 1426 this=self.expression( 1427 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1428 ) 1429 if input_format or output_format 1430 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1431 ) 1432 1433 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1434 self._match(TokenType.EQ) 1435 self._match(TokenType.ALIAS) 1436 return self.expression(exp_class, this=self._parse_field()) 1437 1438 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1439 properties = [] 1440 while True: 1441 if before: 1442 prop = self._parse_property_before() 1443 else: 1444 prop = self._parse_property() 1445 1446 if not prop: 1447 break 1448 for p in ensure_list(prop): 1449 properties.append(p) 1450 1451 if properties: 1452 return self.expression(exp.Properties, expressions=properties) 1453 1454 return None 1455 1456 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1457 return self.expression( 1458 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1459 ) 1460 1461 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1462 if self._index >= 2: 1463 pre_volatile_token = self._tokens[self._index - 2] 1464 else: 1465 pre_volatile_token = None 1466 1467 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1468 return exp.VolatileProperty() 1469 1470 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1471 1472 def _parse_with_property( 1473 self, 1474 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1475 if self._match(TokenType.L_PAREN, advance=False): 1476 return self._parse_wrapped_csv(self._parse_property) 1477 1478 if self._match_text_seq("JOURNAL"): 1479 return self._parse_withjournaltable() 1480 1481 if self._match_text_seq("DATA"): 1482 return self._parse_withdata(no=False) 1483 elif self._match_text_seq("NO", "DATA"): 1484 return self._parse_withdata(no=True) 1485 1486 if not self._next: 1487 return None 1488 1489 return self._parse_withisolatedloading() 1490 1491 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1492 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1493 self._match(TokenType.EQ) 1494 1495 user = self._parse_id_var() 1496 self._match(TokenType.PARAMETER) 1497 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1498 1499 if not user or not host: 1500 return None 1501 1502 return exp.DefinerProperty(this=f"{user}@{host}") 1503 1504 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1505 self._match(TokenType.TABLE) 1506 self._match(TokenType.EQ) 1507 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1508 1509 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1510 return self.expression(exp.LogProperty, no=no) 1511 1512 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1513 return self.expression(exp.JournalProperty, **kwargs) 1514 1515 def _parse_checksum(self) -> exp.ChecksumProperty: 1516 self._match(TokenType.EQ) 1517 1518 on = None 1519 if self._match(TokenType.ON): 1520 on = True 1521 elif self._match_text_seq("OFF"): 1522 on = False 1523 1524 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1525 1526 def _parse_cluster(self) -> exp.Cluster: 1527 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1528 1529 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1530 self._match_text_seq("BY") 1531 1532 self._match_l_paren() 1533 expressions = self._parse_csv(self._parse_column) 1534 self._match_r_paren() 1535 1536 if self._match_text_seq("SORTED", "BY"): 1537 self._match_l_paren() 1538 sorted_by = self._parse_csv(self._parse_ordered) 1539 self._match_r_paren() 1540 else: 1541 sorted_by = None 1542 1543 self._match(TokenType.INTO) 1544 buckets = self._parse_number() 1545 self._match_text_seq("BUCKETS") 1546 1547 return self.expression( 1548 exp.ClusteredByProperty, 1549 expressions=expressions, 1550 sorted_by=sorted_by, 1551 buckets=buckets, 1552 ) 1553 1554 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1555 if not self._match_text_seq("GRANTS"): 1556 self._retreat(self._index - 1) 1557 return None 1558 1559 return self.expression(exp.CopyGrantsProperty) 1560 1561 def _parse_freespace(self) -> exp.FreespaceProperty: 1562 self._match(TokenType.EQ) 1563 return self.expression( 1564 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1565 ) 1566 1567 def _parse_mergeblockratio( 1568 self, no: bool = False, default: bool = False 1569 ) -> exp.MergeBlockRatioProperty: 1570 if self._match(TokenType.EQ): 1571 return self.expression( 1572 exp.MergeBlockRatioProperty, 1573 this=self._parse_number(), 1574 percent=self._match(TokenType.PERCENT), 1575 ) 1576 1577 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1578 1579 def _parse_datablocksize( 1580 self, 1581 default: t.Optional[bool] = None, 1582 minimum: t.Optional[bool] = None, 1583 maximum: t.Optional[bool] = None, 1584 ) -> exp.DataBlocksizeProperty: 1585 self._match(TokenType.EQ) 1586 size = self._parse_number() 1587 1588 units = None 1589 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1590 units = self._prev.text 1591 1592 return self.expression( 1593 exp.DataBlocksizeProperty, 1594 size=size, 1595 units=units, 1596 default=default, 1597 minimum=minimum, 1598 maximum=maximum, 1599 ) 1600 1601 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1602 self._match(TokenType.EQ) 1603 always = self._match_text_seq("ALWAYS") 1604 manual = self._match_text_seq("MANUAL") 1605 never = self._match_text_seq("NEVER") 1606 default = self._match_text_seq("DEFAULT") 1607 1608 autotemp = None 1609 if self._match_text_seq("AUTOTEMP"): 1610 autotemp = self._parse_schema() 1611 1612 return self.expression( 1613 exp.BlockCompressionProperty, 1614 always=always, 1615 manual=manual, 1616 never=never, 1617 default=default, 1618 autotemp=autotemp, 1619 ) 1620 1621 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1622 no = self._match_text_seq("NO") 1623 concurrent = self._match_text_seq("CONCURRENT") 1624 self._match_text_seq("ISOLATED", "LOADING") 1625 for_all = self._match_text_seq("FOR", "ALL") 1626 for_insert = self._match_text_seq("FOR", "INSERT") 1627 for_none = self._match_text_seq("FOR", "NONE") 1628 return self.expression( 1629 exp.IsolatedLoadingProperty, 1630 no=no, 1631 concurrent=concurrent, 1632 for_all=for_all, 1633 for_insert=for_insert, 1634 for_none=for_none, 1635 ) 1636 1637 def _parse_locking(self) -> exp.LockingProperty: 1638 if self._match(TokenType.TABLE): 1639 kind = "TABLE" 1640 elif self._match(TokenType.VIEW): 1641 kind = "VIEW" 1642 elif self._match(TokenType.ROW): 1643 kind = "ROW" 1644 elif self._match_text_seq("DATABASE"): 1645 kind = "DATABASE" 1646 else: 1647 kind = None 1648 1649 if kind in ("DATABASE", "TABLE", "VIEW"): 1650 this = self._parse_table_parts() 1651 else: 1652 this = None 1653 1654 if self._match(TokenType.FOR): 1655 for_or_in = "FOR" 1656 elif self._match(TokenType.IN): 1657 for_or_in = "IN" 1658 else: 1659 for_or_in = None 1660 1661 if self._match_text_seq("ACCESS"): 1662 lock_type = "ACCESS" 1663 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1664 lock_type = "EXCLUSIVE" 1665 elif self._match_text_seq("SHARE"): 1666 lock_type = "SHARE" 1667 elif self._match_text_seq("READ"): 1668 lock_type = "READ" 1669 elif self._match_text_seq("WRITE"): 1670 lock_type = "WRITE" 1671 elif self._match_text_seq("CHECKSUM"): 1672 lock_type = "CHECKSUM" 1673 else: 1674 lock_type = None 1675 1676 override = self._match_text_seq("OVERRIDE") 1677 1678 return self.expression( 1679 exp.LockingProperty, 1680 this=this, 1681 kind=kind, 1682 for_or_in=for_or_in, 1683 lock_type=lock_type, 1684 override=override, 1685 ) 1686 1687 def _parse_partition_by(self) -> t.List[exp.Expression]: 1688 if self._match(TokenType.PARTITION_BY): 1689 return self._parse_csv(self._parse_conjunction) 1690 return [] 1691 1692 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1693 self._match(TokenType.EQ) 1694 return self.expression( 1695 exp.PartitionedByProperty, 1696 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1697 ) 1698 1699 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1700 if self._match_text_seq("AND", "STATISTICS"): 1701 statistics = True 1702 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1703 statistics = False 1704 else: 1705 statistics = None 1706 1707 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1708 1709 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1710 if self._match_text_seq("PRIMARY", "INDEX"): 1711 return exp.NoPrimaryIndexProperty() 1712 return None 1713 1714 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1715 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1716 return exp.OnCommitProperty() 1717 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1718 return exp.OnCommitProperty(delete=True) 1719 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1720 1721 def _parse_distkey(self) -> exp.DistKeyProperty: 1722 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1723 1724 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1725 table = self._parse_table(schema=True) 1726 1727 options = [] 1728 while self._match_texts(("INCLUDING", "EXCLUDING")): 1729 this = self._prev.text.upper() 1730 1731 id_var = self._parse_id_var() 1732 if not id_var: 1733 return None 1734 1735 options.append( 1736 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1737 ) 1738 1739 return self.expression(exp.LikeProperty, this=table, expressions=options) 1740 1741 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1742 return self.expression( 1743 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1744 ) 1745 1746 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1747 self._match(TokenType.EQ) 1748 return self.expression( 1749 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1750 ) 1751 1752 def _parse_returns(self) -> exp.ReturnsProperty: 1753 value: t.Optional[exp.Expression] 1754 is_table = self._match(TokenType.TABLE) 1755 1756 if is_table: 1757 if self._match(TokenType.LT): 1758 value = self.expression( 1759 exp.Schema, 1760 this="TABLE", 1761 expressions=self._parse_csv(self._parse_struct_types), 1762 ) 1763 if not self._match(TokenType.GT): 1764 self.raise_error("Expecting >") 1765 else: 1766 value = self._parse_schema(exp.var("TABLE")) 1767 else: 1768 value = self._parse_types() 1769 1770 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1771 1772 def _parse_describe(self) -> exp.Describe: 1773 kind = self._match_set(self.CREATABLES) and self._prev.text 1774 this = self._parse_table(schema=True) 1775 properties = self._parse_properties() 1776 expressions = properties.expressions if properties else None 1777 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1778 1779 def _parse_insert(self) -> exp.Insert: 1780 comments = ensure_list(self._prev_comments) 1781 overwrite = self._match(TokenType.OVERWRITE) 1782 ignore = self._match(TokenType.IGNORE) 1783 local = self._match_text_seq("LOCAL") 1784 alternative = None 1785 1786 if self._match_text_seq("DIRECTORY"): 1787 this: t.Optional[exp.Expression] = self.expression( 1788 exp.Directory, 1789 this=self._parse_var_or_string(), 1790 local=local, 1791 row_format=self._parse_row_format(match_row=True), 1792 ) 1793 else: 1794 if self._match(TokenType.OR): 1795 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1796 1797 self._match(TokenType.INTO) 1798 comments += ensure_list(self._prev_comments) 1799 self._match(TokenType.TABLE) 1800 this = self._parse_table(schema=True) 1801 1802 returning = self._parse_returning() 1803 1804 return self.expression( 1805 exp.Insert, 1806 comments=comments, 1807 this=this, 1808 by_name=self._match_text_seq("BY", "NAME"), 1809 exists=self._parse_exists(), 1810 partition=self._parse_partition(), 1811 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1812 and self._parse_conjunction(), 1813 expression=self._parse_ddl_select(), 1814 conflict=self._parse_on_conflict(), 1815 returning=returning or self._parse_returning(), 1816 overwrite=overwrite, 1817 alternative=alternative, 1818 ignore=ignore, 1819 ) 1820 1821 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1822 conflict = self._match_text_seq("ON", "CONFLICT") 1823 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1824 1825 if not conflict and not duplicate: 1826 return None 1827 1828 nothing = None 1829 expressions = None 1830 key = None 1831 constraint = None 1832 1833 if conflict: 1834 if self._match_text_seq("ON", "CONSTRAINT"): 1835 constraint = self._parse_id_var() 1836 else: 1837 key = self._parse_csv(self._parse_value) 1838 1839 self._match_text_seq("DO") 1840 if self._match_text_seq("NOTHING"): 1841 nothing = True 1842 else: 1843 self._match(TokenType.UPDATE) 1844 self._match(TokenType.SET) 1845 expressions = self._parse_csv(self._parse_equality) 1846 1847 return self.expression( 1848 exp.OnConflict, 1849 duplicate=duplicate, 1850 expressions=expressions, 1851 nothing=nothing, 1852 key=key, 1853 constraint=constraint, 1854 ) 1855 1856 def _parse_returning(self) -> t.Optional[exp.Returning]: 1857 if not self._match(TokenType.RETURNING): 1858 return None 1859 return self.expression( 1860 exp.Returning, 1861 expressions=self._parse_csv(self._parse_expression), 1862 into=self._match(TokenType.INTO) and self._parse_table_part(), 1863 ) 1864 1865 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1866 if not self._match(TokenType.FORMAT): 1867 return None 1868 return self._parse_row_format() 1869 1870 def _parse_row_format( 1871 self, match_row: bool = False 1872 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1873 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1874 return None 1875 1876 if self._match_text_seq("SERDE"): 1877 this = self._parse_string() 1878 1879 serde_properties = None 1880 if self._match(TokenType.SERDE_PROPERTIES): 1881 serde_properties = self.expression( 1882 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1883 ) 1884 1885 return self.expression( 1886 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1887 ) 1888 1889 self._match_text_seq("DELIMITED") 1890 1891 kwargs = {} 1892 1893 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1894 kwargs["fields"] = self._parse_string() 1895 if self._match_text_seq("ESCAPED", "BY"): 1896 kwargs["escaped"] = self._parse_string() 1897 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1898 kwargs["collection_items"] = self._parse_string() 1899 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1900 kwargs["map_keys"] = self._parse_string() 1901 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1902 kwargs["lines"] = self._parse_string() 1903 if self._match_text_seq("NULL", "DEFINED", "AS"): 1904 kwargs["null"] = self._parse_string() 1905 1906 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1907 1908 def _parse_load(self) -> exp.LoadData | exp.Command: 1909 if self._match_text_seq("DATA"): 1910 local = self._match_text_seq("LOCAL") 1911 self._match_text_seq("INPATH") 1912 inpath = self._parse_string() 1913 overwrite = self._match(TokenType.OVERWRITE) 1914 self._match_pair(TokenType.INTO, TokenType.TABLE) 1915 1916 return self.expression( 1917 exp.LoadData, 1918 this=self._parse_table(schema=True), 1919 local=local, 1920 overwrite=overwrite, 1921 inpath=inpath, 1922 partition=self._parse_partition(), 1923 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1924 serde=self._match_text_seq("SERDE") and self._parse_string(), 1925 ) 1926 return self._parse_as_command(self._prev) 1927 1928 def _parse_delete(self) -> exp.Delete: 1929 # This handles MySQL's "Multiple-Table Syntax" 1930 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1931 tables = None 1932 comments = self._prev_comments 1933 if not self._match(TokenType.FROM, advance=False): 1934 tables = self._parse_csv(self._parse_table) or None 1935 1936 returning = self._parse_returning() 1937 1938 return self.expression( 1939 exp.Delete, 1940 comments=comments, 1941 tables=tables, 1942 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1943 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1944 where=self._parse_where(), 1945 returning=returning or self._parse_returning(), 1946 limit=self._parse_limit(), 1947 ) 1948 1949 def _parse_update(self) -> exp.Update: 1950 comments = self._prev_comments 1951 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1952 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1953 returning = self._parse_returning() 1954 return self.expression( 1955 exp.Update, 1956 comments=comments, 1957 **{ # type: ignore 1958 "this": this, 1959 "expressions": expressions, 1960 "from": self._parse_from(joins=True), 1961 "where": self._parse_where(), 1962 "returning": returning or self._parse_returning(), 1963 "order": self._parse_order(), 1964 "limit": self._parse_limit(), 1965 }, 1966 ) 1967 1968 def _parse_uncache(self) -> exp.Uncache: 1969 if not self._match(TokenType.TABLE): 1970 self.raise_error("Expecting TABLE after UNCACHE") 1971 1972 return self.expression( 1973 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1974 ) 1975 1976 def _parse_cache(self) -> exp.Cache: 1977 lazy = self._match_text_seq("LAZY") 1978 self._match(TokenType.TABLE) 1979 table = self._parse_table(schema=True) 1980 1981 options = [] 1982 if self._match_text_seq("OPTIONS"): 1983 self._match_l_paren() 1984 k = self._parse_string() 1985 self._match(TokenType.EQ) 1986 v = self._parse_string() 1987 options = [k, v] 1988 self._match_r_paren() 1989 1990 self._match(TokenType.ALIAS) 1991 return self.expression( 1992 exp.Cache, 1993 this=table, 1994 lazy=lazy, 1995 options=options, 1996 expression=self._parse_select(nested=True), 1997 ) 1998 1999 def _parse_partition(self) -> t.Optional[exp.Partition]: 2000 if not self._match(TokenType.PARTITION): 2001 return None 2002 2003 return self.expression( 2004 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2005 ) 2006 2007 def _parse_value(self) -> exp.Tuple: 2008 if self._match(TokenType.L_PAREN): 2009 expressions = self._parse_csv(self._parse_conjunction) 2010 self._match_r_paren() 2011 return self.expression(exp.Tuple, expressions=expressions) 2012 2013 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2014 # https://prestodb.io/docs/current/sql/values.html 2015 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2016 2017 def _parse_projections(self) -> t.List[exp.Expression]: 2018 return self._parse_expressions() 2019 2020 def _parse_select( 2021 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2022 ) -> t.Optional[exp.Expression]: 2023 cte = self._parse_with() 2024 2025 if cte: 2026 this = self._parse_statement() 2027 2028 if not this: 2029 self.raise_error("Failed to parse any statement following CTE") 2030 return cte 2031 2032 if "with" in this.arg_types: 2033 this.set("with", cte) 2034 else: 2035 self.raise_error(f"{this.key} does not support CTE") 2036 this = cte 2037 2038 return this 2039 2040 # duckdb supports leading with FROM x 2041 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2042 2043 if self._match(TokenType.SELECT): 2044 comments = self._prev_comments 2045 2046 hint = self._parse_hint() 2047 all_ = self._match(TokenType.ALL) 2048 distinct = self._match_set(self.DISTINCT_TOKENS) 2049 2050 kind = ( 2051 self._match(TokenType.ALIAS) 2052 and self._match_texts(("STRUCT", "VALUE")) 2053 and self._prev.text 2054 ) 2055 2056 if distinct: 2057 distinct = self.expression( 2058 exp.Distinct, 2059 on=self._parse_value() if self._match(TokenType.ON) else None, 2060 ) 2061 2062 if all_ and distinct: 2063 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2064 2065 limit = self._parse_limit(top=True) 2066 projections = self._parse_projections() 2067 2068 this = self.expression( 2069 exp.Select, 2070 kind=kind, 2071 hint=hint, 2072 distinct=distinct, 2073 expressions=projections, 2074 limit=limit, 2075 ) 2076 this.comments = comments 2077 2078 into = self._parse_into() 2079 if into: 2080 this.set("into", into) 2081 2082 if not from_: 2083 from_ = self._parse_from() 2084 2085 if from_: 2086 this.set("from", from_) 2087 2088 this = self._parse_query_modifiers(this) 2089 elif (table or nested) and self._match(TokenType.L_PAREN): 2090 if self._match(TokenType.PIVOT): 2091 this = self._parse_simplified_pivot() 2092 elif self._match(TokenType.FROM): 2093 this = exp.select("*").from_( 2094 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2095 ) 2096 else: 2097 this = self._parse_table() if table else self._parse_select(nested=True) 2098 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2099 2100 self._match_r_paren() 2101 2102 # We return early here so that the UNION isn't attached to the subquery by the 2103 # following call to _parse_set_operations, but instead becomes the parent node 2104 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2105 elif self._match(TokenType.VALUES): 2106 this = self.expression( 2107 exp.Values, 2108 expressions=self._parse_csv(self._parse_value), 2109 alias=self._parse_table_alias(), 2110 ) 2111 elif from_: 2112 this = exp.select("*").from_(from_.this, copy=False) 2113 else: 2114 this = None 2115 2116 return self._parse_set_operations(this) 2117 2118 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2119 if not skip_with_token and not self._match(TokenType.WITH): 2120 return None 2121 2122 comments = self._prev_comments 2123 recursive = self._match(TokenType.RECURSIVE) 2124 2125 expressions = [] 2126 while True: 2127 expressions.append(self._parse_cte()) 2128 2129 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2130 break 2131 else: 2132 self._match(TokenType.WITH) 2133 2134 return self.expression( 2135 exp.With, comments=comments, expressions=expressions, recursive=recursive 2136 ) 2137 2138 def _parse_cte(self) -> exp.CTE: 2139 alias = self._parse_table_alias() 2140 if not alias or not alias.this: 2141 self.raise_error("Expected CTE to have alias") 2142 2143 self._match(TokenType.ALIAS) 2144 return self.expression( 2145 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2146 ) 2147 2148 def _parse_table_alias( 2149 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2150 ) -> t.Optional[exp.TableAlias]: 2151 any_token = self._match(TokenType.ALIAS) 2152 alias = ( 2153 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2154 or self._parse_string_as_identifier() 2155 ) 2156 2157 index = self._index 2158 if self._match(TokenType.L_PAREN): 2159 columns = self._parse_csv(self._parse_function_parameter) 2160 self._match_r_paren() if columns else self._retreat(index) 2161 else: 2162 columns = None 2163 2164 if not alias and not columns: 2165 return None 2166 2167 return self.expression(exp.TableAlias, this=alias, columns=columns) 2168 2169 def _parse_subquery( 2170 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2171 ) -> t.Optional[exp.Subquery]: 2172 if not this: 2173 return None 2174 2175 return self.expression( 2176 exp.Subquery, 2177 this=this, 2178 pivots=self._parse_pivots(), 2179 alias=self._parse_table_alias() if parse_alias else None, 2180 ) 2181 2182 def _parse_query_modifiers( 2183 self, this: t.Optional[exp.Expression] 2184 ) -> t.Optional[exp.Expression]: 2185 if isinstance(this, self.MODIFIABLES): 2186 for join in iter(self._parse_join, None): 2187 this.append("joins", join) 2188 for lateral in iter(self._parse_lateral, None): 2189 this.append("laterals", lateral) 2190 2191 while True: 2192 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2193 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2194 key, expression = parser(self) 2195 2196 if expression: 2197 this.set(key, expression) 2198 if key == "limit": 2199 offset = expression.args.pop("offset", None) 2200 if offset: 2201 this.set("offset", exp.Offset(expression=offset)) 2202 continue 2203 break 2204 return this 2205 2206 def _parse_hint(self) -> t.Optional[exp.Hint]: 2207 if self._match(TokenType.HINT): 2208 hints = [] 2209 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2210 hints.extend(hint) 2211 2212 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2213 self.raise_error("Expected */ after HINT") 2214 2215 return self.expression(exp.Hint, expressions=hints) 2216 2217 return None 2218 2219 def _parse_into(self) -> t.Optional[exp.Into]: 2220 if not self._match(TokenType.INTO): 2221 return None 2222 2223 temp = self._match(TokenType.TEMPORARY) 2224 unlogged = self._match_text_seq("UNLOGGED") 2225 self._match(TokenType.TABLE) 2226 2227 return self.expression( 2228 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2229 ) 2230 2231 def _parse_from( 2232 self, joins: bool = False, skip_from_token: bool = False 2233 ) -> t.Optional[exp.From]: 2234 if not skip_from_token and not self._match(TokenType.FROM): 2235 return None 2236 2237 return self.expression( 2238 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2239 ) 2240 2241 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2242 if not self._match(TokenType.MATCH_RECOGNIZE): 2243 return None 2244 2245 self._match_l_paren() 2246 2247 partition = self._parse_partition_by() 2248 order = self._parse_order() 2249 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2250 2251 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2252 rows = exp.var("ONE ROW PER MATCH") 2253 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2254 text = "ALL ROWS PER MATCH" 2255 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2256 text += f" SHOW EMPTY MATCHES" 2257 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2258 text += f" OMIT EMPTY MATCHES" 2259 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2260 text += f" WITH UNMATCHED ROWS" 2261 rows = exp.var(text) 2262 else: 2263 rows = None 2264 2265 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2266 text = "AFTER MATCH SKIP" 2267 if self._match_text_seq("PAST", "LAST", "ROW"): 2268 text += f" PAST LAST ROW" 2269 elif self._match_text_seq("TO", "NEXT", "ROW"): 2270 text += f" TO NEXT ROW" 2271 elif self._match_text_seq("TO", "FIRST"): 2272 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2273 elif self._match_text_seq("TO", "LAST"): 2274 text += f" TO LAST {self._advance_any().text}" # type: ignore 2275 after = exp.var(text) 2276 else: 2277 after = None 2278 2279 if self._match_text_seq("PATTERN"): 2280 self._match_l_paren() 2281 2282 if not self._curr: 2283 self.raise_error("Expecting )", self._curr) 2284 2285 paren = 1 2286 start = self._curr 2287 2288 while self._curr and paren > 0: 2289 if self._curr.token_type == TokenType.L_PAREN: 2290 paren += 1 2291 if self._curr.token_type == TokenType.R_PAREN: 2292 paren -= 1 2293 2294 end = self._prev 2295 self._advance() 2296 2297 if paren > 0: 2298 self.raise_error("Expecting )", self._curr) 2299 2300 pattern = exp.var(self._find_sql(start, end)) 2301 else: 2302 pattern = None 2303 2304 define = ( 2305 self._parse_csv( 2306 lambda: self.expression( 2307 exp.Alias, 2308 alias=self._parse_id_var(any_token=True), 2309 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2310 ) 2311 ) 2312 if self._match_text_seq("DEFINE") 2313 else None 2314 ) 2315 2316 self._match_r_paren() 2317 2318 return self.expression( 2319 exp.MatchRecognize, 2320 partition_by=partition, 2321 order=order, 2322 measures=measures, 2323 rows=rows, 2324 after=after, 2325 pattern=pattern, 2326 define=define, 2327 alias=self._parse_table_alias(), 2328 ) 2329 2330 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2331 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2332 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2333 2334 if outer_apply or cross_apply: 2335 this = self._parse_select(table=True) 2336 view = None 2337 outer = not cross_apply 2338 elif self._match(TokenType.LATERAL): 2339 this = self._parse_select(table=True) 2340 view = self._match(TokenType.VIEW) 2341 outer = self._match(TokenType.OUTER) 2342 else: 2343 return None 2344 2345 if not this: 2346 this = ( 2347 self._parse_unnest() 2348 or self._parse_function() 2349 or self._parse_id_var(any_token=False) 2350 ) 2351 2352 while self._match(TokenType.DOT): 2353 this = exp.Dot( 2354 this=this, 2355 expression=self._parse_function() or self._parse_id_var(any_token=False), 2356 ) 2357 2358 if view: 2359 table = self._parse_id_var(any_token=False) 2360 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2361 table_alias: t.Optional[exp.TableAlias] = self.expression( 2362 exp.TableAlias, this=table, columns=columns 2363 ) 2364 elif isinstance(this, exp.Subquery) and this.alias: 2365 # Ensures parity between the Subquery's and the Lateral's "alias" args 2366 table_alias = this.args["alias"].copy() 2367 else: 2368 table_alias = self._parse_table_alias() 2369 2370 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2371 2372 def _parse_join_parts( 2373 self, 2374 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2375 return ( 2376 self._match_set(self.JOIN_METHODS) and self._prev, 2377 self._match_set(self.JOIN_SIDES) and self._prev, 2378 self._match_set(self.JOIN_KINDS) and self._prev, 2379 ) 2380 2381 def _parse_join( 2382 self, skip_join_token: bool = False, parse_bracket: bool = False 2383 ) -> t.Optional[exp.Join]: 2384 if self._match(TokenType.COMMA): 2385 return self.expression(exp.Join, this=self._parse_table()) 2386 2387 index = self._index 2388 method, side, kind = self._parse_join_parts() 2389 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2390 join = self._match(TokenType.JOIN) 2391 2392 if not skip_join_token and not join: 2393 self._retreat(index) 2394 kind = None 2395 method = None 2396 side = None 2397 2398 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2399 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2400 2401 if not skip_join_token and not join and not outer_apply and not cross_apply: 2402 return None 2403 2404 if outer_apply: 2405 side = Token(TokenType.LEFT, "LEFT") 2406 2407 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2408 2409 if method: 2410 kwargs["method"] = method.text 2411 if side: 2412 kwargs["side"] = side.text 2413 if kind: 2414 kwargs["kind"] = kind.text 2415 if hint: 2416 kwargs["hint"] = hint 2417 2418 if self._match(TokenType.ON): 2419 kwargs["on"] = self._parse_conjunction() 2420 elif self._match(TokenType.USING): 2421 kwargs["using"] = self._parse_wrapped_id_vars() 2422 elif not (kind and kind.token_type == TokenType.CROSS): 2423 index = self._index 2424 joins = self._parse_joins() 2425 2426 if joins and self._match(TokenType.ON): 2427 kwargs["on"] = self._parse_conjunction() 2428 elif joins and self._match(TokenType.USING): 2429 kwargs["using"] = self._parse_wrapped_id_vars() 2430 else: 2431 joins = None 2432 self._retreat(index) 2433 2434 kwargs["this"].set("joins", joins) 2435 2436 comments = [c for token in (method, side, kind) if token for c in token.comments] 2437 return self.expression(exp.Join, comments=comments, **kwargs) 2438 2439 def _parse_index( 2440 self, 2441 index: t.Optional[exp.Expression] = None, 2442 ) -> t.Optional[exp.Index]: 2443 if index: 2444 unique = None 2445 primary = None 2446 amp = None 2447 2448 self._match(TokenType.ON) 2449 self._match(TokenType.TABLE) # hive 2450 table = self._parse_table_parts(schema=True) 2451 else: 2452 unique = self._match(TokenType.UNIQUE) 2453 primary = self._match_text_seq("PRIMARY") 2454 amp = self._match_text_seq("AMP") 2455 2456 if not self._match(TokenType.INDEX): 2457 return None 2458 2459 index = self._parse_id_var() 2460 table = None 2461 2462 using = self._parse_field() if self._match(TokenType.USING) else None 2463 2464 if self._match(TokenType.L_PAREN, advance=False): 2465 columns = self._parse_wrapped_csv(self._parse_ordered) 2466 else: 2467 columns = None 2468 2469 return self.expression( 2470 exp.Index, 2471 this=index, 2472 table=table, 2473 using=using, 2474 columns=columns, 2475 unique=unique, 2476 primary=primary, 2477 amp=amp, 2478 partition_by=self._parse_partition_by(), 2479 ) 2480 2481 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2482 hints: t.List[exp.Expression] = [] 2483 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2484 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2485 hints.append( 2486 self.expression( 2487 exp.WithTableHint, 2488 expressions=self._parse_csv( 2489 lambda: self._parse_function() or self._parse_var(any_token=True) 2490 ), 2491 ) 2492 ) 2493 self._match_r_paren() 2494 else: 2495 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2496 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2497 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2498 2499 self._match_texts({"INDEX", "KEY"}) 2500 if self._match(TokenType.FOR): 2501 hint.set("target", self._advance_any() and self._prev.text.upper()) 2502 2503 hint.set("expressions", self._parse_wrapped_id_vars()) 2504 hints.append(hint) 2505 2506 return hints or None 2507 2508 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2509 return ( 2510 (not schema and self._parse_function(optional_parens=False)) 2511 or self._parse_id_var(any_token=False) 2512 or self._parse_string_as_identifier() 2513 or self._parse_placeholder() 2514 ) 2515 2516 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2517 catalog = None 2518 db = None 2519 table = self._parse_table_part(schema=schema) 2520 2521 while self._match(TokenType.DOT): 2522 if catalog: 2523 # This allows nesting the table in arbitrarily many dot expressions if needed 2524 table = self.expression( 2525 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2526 ) 2527 else: 2528 catalog = db 2529 db = table 2530 table = self._parse_table_part(schema=schema) 2531 2532 if not table: 2533 self.raise_error(f"Expected table name but got {self._curr}") 2534 2535 return self.expression( 2536 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2537 ) 2538 2539 def _parse_table( 2540 self, 2541 schema: bool = False, 2542 joins: bool = False, 2543 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2544 parse_bracket: bool = False, 2545 ) -> t.Optional[exp.Expression]: 2546 lateral = self._parse_lateral() 2547 if lateral: 2548 return lateral 2549 2550 unnest = self._parse_unnest() 2551 if unnest: 2552 return unnest 2553 2554 values = self._parse_derived_table_values() 2555 if values: 2556 return values 2557 2558 subquery = self._parse_select(table=True) 2559 if subquery: 2560 if not subquery.args.get("pivots"): 2561 subquery.set("pivots", self._parse_pivots()) 2562 return subquery 2563 2564 bracket = parse_bracket and self._parse_bracket(None) 2565 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2566 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2567 2568 if schema: 2569 return self._parse_schema(this=this) 2570 2571 version = self._parse_version() 2572 2573 if version: 2574 this.set("version", version) 2575 2576 if self.ALIAS_POST_TABLESAMPLE: 2577 table_sample = self._parse_table_sample() 2578 2579 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2580 if alias: 2581 this.set("alias", alias) 2582 2583 this.set("hints", self._parse_table_hints()) 2584 2585 if not this.args.get("pivots"): 2586 this.set("pivots", self._parse_pivots()) 2587 2588 if not self.ALIAS_POST_TABLESAMPLE: 2589 table_sample = self._parse_table_sample() 2590 2591 if table_sample: 2592 table_sample.set("this", this) 2593 this = table_sample 2594 2595 if joins: 2596 for join in iter(self._parse_join, None): 2597 this.append("joins", join) 2598 2599 return this 2600 2601 def _parse_version(self) -> t.Optional[exp.Version]: 2602 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2603 this = "TIMESTAMP" 2604 elif self._match(TokenType.VERSION_SNAPSHOT): 2605 this = "VERSION" 2606 else: 2607 return None 2608 2609 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2610 kind = self._prev.text.upper() 2611 start = self._parse_bitwise() 2612 self._match_texts(("TO", "AND")) 2613 end = self._parse_bitwise() 2614 expression: t.Optional[exp.Expression] = self.expression( 2615 exp.Tuple, expressions=[start, end] 2616 ) 2617 elif self._match_text_seq("CONTAINED", "IN"): 2618 kind = "CONTAINED IN" 2619 expression = self.expression( 2620 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2621 ) 2622 elif self._match(TokenType.ALL): 2623 kind = "ALL" 2624 expression = None 2625 else: 2626 self._match_text_seq("AS", "OF") 2627 kind = "AS OF" 2628 expression = self._parse_type() 2629 2630 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2631 2632 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2633 if not self._match(TokenType.UNNEST): 2634 return None 2635 2636 expressions = self._parse_wrapped_csv(self._parse_type) 2637 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2638 2639 alias = self._parse_table_alias() if with_alias else None 2640 2641 if alias and self.UNNEST_COLUMN_ONLY: 2642 if alias.args.get("columns"): 2643 self.raise_error("Unexpected extra column alias in unnest.") 2644 2645 alias.set("columns", [alias.this]) 2646 alias.set("this", None) 2647 2648 offset = None 2649 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2650 self._match(TokenType.ALIAS) 2651 offset = self._parse_id_var() or exp.to_identifier("offset") 2652 2653 return self.expression( 2654 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2655 ) 2656 2657 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2658 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2659 if not is_derived and not self._match(TokenType.VALUES): 2660 return None 2661 2662 expressions = self._parse_csv(self._parse_value) 2663 alias = self._parse_table_alias() 2664 2665 if is_derived: 2666 self._match_r_paren() 2667 2668 return self.expression( 2669 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2670 ) 2671 2672 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2673 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2674 as_modifier and self._match_text_seq("USING", "SAMPLE") 2675 ): 2676 return None 2677 2678 bucket_numerator = None 2679 bucket_denominator = None 2680 bucket_field = None 2681 percent = None 2682 rows = None 2683 size = None 2684 seed = None 2685 2686 kind = ( 2687 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2688 ) 2689 method = self._parse_var(tokens=(TokenType.ROW,)) 2690 2691 self._match(TokenType.L_PAREN) 2692 2693 if self.TABLESAMPLE_CSV: 2694 num = None 2695 expressions = self._parse_csv(self._parse_primary) 2696 else: 2697 expressions = None 2698 num = self._parse_primary() 2699 2700 if self._match_text_seq("BUCKET"): 2701 bucket_numerator = self._parse_number() 2702 self._match_text_seq("OUT", "OF") 2703 bucket_denominator = bucket_denominator = self._parse_number() 2704 self._match(TokenType.ON) 2705 bucket_field = self._parse_field() 2706 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2707 percent = num 2708 elif self._match(TokenType.ROWS): 2709 rows = num 2710 elif num: 2711 size = num 2712 2713 self._match(TokenType.R_PAREN) 2714 2715 if self._match(TokenType.L_PAREN): 2716 method = self._parse_var() 2717 seed = self._match(TokenType.COMMA) and self._parse_number() 2718 self._match_r_paren() 2719 elif self._match_texts(("SEED", "REPEATABLE")): 2720 seed = self._parse_wrapped(self._parse_number) 2721 2722 return self.expression( 2723 exp.TableSample, 2724 expressions=expressions, 2725 method=method, 2726 bucket_numerator=bucket_numerator, 2727 bucket_denominator=bucket_denominator, 2728 bucket_field=bucket_field, 2729 percent=percent, 2730 rows=rows, 2731 size=size, 2732 seed=seed, 2733 kind=kind, 2734 ) 2735 2736 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2737 return list(iter(self._parse_pivot, None)) or None 2738 2739 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2740 return list(iter(self._parse_join, None)) or None 2741 2742 # https://duckdb.org/docs/sql/statements/pivot 2743 def _parse_simplified_pivot(self) -> exp.Pivot: 2744 def _parse_on() -> t.Optional[exp.Expression]: 2745 this = self._parse_bitwise() 2746 return self._parse_in(this) if self._match(TokenType.IN) else this 2747 2748 this = self._parse_table() 2749 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2750 using = self._match(TokenType.USING) and self._parse_csv( 2751 lambda: self._parse_alias(self._parse_function()) 2752 ) 2753 group = self._parse_group() 2754 return self.expression( 2755 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2756 ) 2757 2758 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2759 index = self._index 2760 include_nulls = None 2761 2762 if self._match(TokenType.PIVOT): 2763 unpivot = False 2764 elif self._match(TokenType.UNPIVOT): 2765 unpivot = True 2766 2767 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2768 if self._match_text_seq("INCLUDE", "NULLS"): 2769 include_nulls = True 2770 elif self._match_text_seq("EXCLUDE", "NULLS"): 2771 include_nulls = False 2772 else: 2773 return None 2774 2775 expressions = [] 2776 field = None 2777 2778 if not self._match(TokenType.L_PAREN): 2779 self._retreat(index) 2780 return None 2781 2782 if unpivot: 2783 expressions = self._parse_csv(self._parse_column) 2784 else: 2785 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2786 2787 if not expressions: 2788 self.raise_error("Failed to parse PIVOT's aggregation list") 2789 2790 if not self._match(TokenType.FOR): 2791 self.raise_error("Expecting FOR") 2792 2793 value = self._parse_column() 2794 2795 if not self._match(TokenType.IN): 2796 self.raise_error("Expecting IN") 2797 2798 field = self._parse_in(value, alias=True) 2799 2800 self._match_r_paren() 2801 2802 pivot = self.expression( 2803 exp.Pivot, 2804 expressions=expressions, 2805 field=field, 2806 unpivot=unpivot, 2807 include_nulls=include_nulls, 2808 ) 2809 2810 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2811 pivot.set("alias", self._parse_table_alias()) 2812 2813 if not unpivot: 2814 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2815 2816 columns: t.List[exp.Expression] = [] 2817 for fld in pivot.args["field"].expressions: 2818 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2819 for name in names: 2820 if self.PREFIXED_PIVOT_COLUMNS: 2821 name = f"{name}_{field_name}" if name else field_name 2822 else: 2823 name = f"{field_name}_{name}" if name else field_name 2824 2825 columns.append(exp.to_identifier(name)) 2826 2827 pivot.set("columns", columns) 2828 2829 return pivot 2830 2831 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2832 return [agg.alias for agg in aggregations] 2833 2834 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2835 if not skip_where_token and not self._match(TokenType.WHERE): 2836 return None 2837 2838 return self.expression( 2839 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2840 ) 2841 2842 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2843 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2844 return None 2845 2846 elements = defaultdict(list) 2847 2848 if self._match(TokenType.ALL): 2849 return self.expression(exp.Group, all=True) 2850 2851 while True: 2852 expressions = self._parse_csv(self._parse_conjunction) 2853 if expressions: 2854 elements["expressions"].extend(expressions) 2855 2856 grouping_sets = self._parse_grouping_sets() 2857 if grouping_sets: 2858 elements["grouping_sets"].extend(grouping_sets) 2859 2860 rollup = None 2861 cube = None 2862 totals = None 2863 2864 with_ = self._match(TokenType.WITH) 2865 if self._match(TokenType.ROLLUP): 2866 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2867 elements["rollup"].extend(ensure_list(rollup)) 2868 2869 if self._match(TokenType.CUBE): 2870 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2871 elements["cube"].extend(ensure_list(cube)) 2872 2873 if self._match_text_seq("TOTALS"): 2874 totals = True 2875 elements["totals"] = True # type: ignore 2876 2877 if not (grouping_sets or rollup or cube or totals): 2878 break 2879 2880 return self.expression(exp.Group, **elements) # type: ignore 2881 2882 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2883 if not self._match(TokenType.GROUPING_SETS): 2884 return None 2885 2886 return self._parse_wrapped_csv(self._parse_grouping_set) 2887 2888 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2889 if self._match(TokenType.L_PAREN): 2890 grouping_set = self._parse_csv(self._parse_column) 2891 self._match_r_paren() 2892 return self.expression(exp.Tuple, expressions=grouping_set) 2893 2894 return self._parse_column() 2895 2896 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2897 if not skip_having_token and not self._match(TokenType.HAVING): 2898 return None 2899 return self.expression(exp.Having, this=self._parse_conjunction()) 2900 2901 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2902 if not self._match(TokenType.QUALIFY): 2903 return None 2904 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2905 2906 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2907 if skip_start_token: 2908 start = None 2909 elif self._match(TokenType.START_WITH): 2910 start = self._parse_conjunction() 2911 else: 2912 return None 2913 2914 self._match(TokenType.CONNECT_BY) 2915 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2916 exp.Prior, this=self._parse_bitwise() 2917 ) 2918 connect = self._parse_conjunction() 2919 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2920 2921 if not start and self._match(TokenType.START_WITH): 2922 start = self._parse_conjunction() 2923 2924 return self.expression(exp.Connect, start=start, connect=connect) 2925 2926 def _parse_order( 2927 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2928 ) -> t.Optional[exp.Expression]: 2929 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2930 return this 2931 2932 return self.expression( 2933 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2934 ) 2935 2936 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2937 if not self._match(token): 2938 return None 2939 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2940 2941 def _parse_ordered(self) -> exp.Ordered: 2942 this = self._parse_conjunction() 2943 self._match(TokenType.ASC) 2944 2945 is_desc = self._match(TokenType.DESC) 2946 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2947 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2948 desc = is_desc or False 2949 asc = not desc 2950 nulls_first = is_nulls_first or False 2951 explicitly_null_ordered = is_nulls_first or is_nulls_last 2952 2953 if ( 2954 not explicitly_null_ordered 2955 and ( 2956 (asc and self.NULL_ORDERING == "nulls_are_small") 2957 or (desc and self.NULL_ORDERING != "nulls_are_small") 2958 ) 2959 and self.NULL_ORDERING != "nulls_are_last" 2960 ): 2961 nulls_first = True 2962 2963 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2964 2965 def _parse_limit( 2966 self, this: t.Optional[exp.Expression] = None, top: bool = False 2967 ) -> t.Optional[exp.Expression]: 2968 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2969 comments = self._prev_comments 2970 if top: 2971 limit_paren = self._match(TokenType.L_PAREN) 2972 expression = self._parse_number() 2973 2974 if limit_paren: 2975 self._match_r_paren() 2976 else: 2977 expression = self._parse_term() 2978 2979 if self._match(TokenType.COMMA): 2980 offset = expression 2981 expression = self._parse_term() 2982 else: 2983 offset = None 2984 2985 limit_exp = self.expression( 2986 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2987 ) 2988 2989 return limit_exp 2990 2991 if self._match(TokenType.FETCH): 2992 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2993 direction = self._prev.text if direction else "FIRST" 2994 2995 count = self._parse_field(tokens=self.FETCH_TOKENS) 2996 percent = self._match(TokenType.PERCENT) 2997 2998 self._match_set((TokenType.ROW, TokenType.ROWS)) 2999 3000 only = self._match_text_seq("ONLY") 3001 with_ties = self._match_text_seq("WITH", "TIES") 3002 3003 if only and with_ties: 3004 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3005 3006 return self.expression( 3007 exp.Fetch, 3008 direction=direction, 3009 count=count, 3010 percent=percent, 3011 with_ties=with_ties, 3012 ) 3013 3014 return this 3015 3016 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3017 if not self._match(TokenType.OFFSET): 3018 return this 3019 3020 count = self._parse_term() 3021 self._match_set((TokenType.ROW, TokenType.ROWS)) 3022 return self.expression(exp.Offset, this=this, expression=count) 3023 3024 def _parse_locks(self) -> t.List[exp.Lock]: 3025 locks = [] 3026 while True: 3027 if self._match_text_seq("FOR", "UPDATE"): 3028 update = True 3029 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3030 "LOCK", "IN", "SHARE", "MODE" 3031 ): 3032 update = False 3033 else: 3034 break 3035 3036 expressions = None 3037 if self._match_text_seq("OF"): 3038 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3039 3040 wait: t.Optional[bool | exp.Expression] = None 3041 if self._match_text_seq("NOWAIT"): 3042 wait = True 3043 elif self._match_text_seq("WAIT"): 3044 wait = self._parse_primary() 3045 elif self._match_text_seq("SKIP", "LOCKED"): 3046 wait = False 3047 3048 locks.append( 3049 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3050 ) 3051 3052 return locks 3053 3054 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3055 if not self._match_set(self.SET_OPERATIONS): 3056 return this 3057 3058 token_type = self._prev.token_type 3059 3060 if token_type == TokenType.UNION: 3061 expression = exp.Union 3062 elif token_type == TokenType.EXCEPT: 3063 expression = exp.Except 3064 else: 3065 expression = exp.Intersect 3066 3067 return self.expression( 3068 expression, 3069 this=this, 3070 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3071 by_name=self._match_text_seq("BY", "NAME"), 3072 expression=self._parse_set_operations(self._parse_select(nested=True)), 3073 ) 3074 3075 def _parse_expression(self) -> t.Optional[exp.Expression]: 3076 return self._parse_alias(self._parse_conjunction()) 3077 3078 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3079 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3080 3081 def _parse_equality(self) -> t.Optional[exp.Expression]: 3082 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3083 3084 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3085 return self._parse_tokens(self._parse_range, self.COMPARISON) 3086 3087 def _parse_range(self) -> t.Optional[exp.Expression]: 3088 this = self._parse_bitwise() 3089 negate = self._match(TokenType.NOT) 3090 3091 if self._match_set(self.RANGE_PARSERS): 3092 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3093 if not expression: 3094 return this 3095 3096 this = expression 3097 elif self._match(TokenType.ISNULL): 3098 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3099 3100 # Postgres supports ISNULL and NOTNULL for conditions. 3101 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3102 if self._match(TokenType.NOTNULL): 3103 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3104 this = self.expression(exp.Not, this=this) 3105 3106 if negate: 3107 this = self.expression(exp.Not, this=this) 3108 3109 if self._match(TokenType.IS): 3110 this = self._parse_is(this) 3111 3112 return this 3113 3114 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3115 index = self._index - 1 3116 negate = self._match(TokenType.NOT) 3117 3118 if self._match_text_seq("DISTINCT", "FROM"): 3119 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3120 return self.expression(klass, this=this, expression=self._parse_expression()) 3121 3122 expression = self._parse_null() or self._parse_boolean() 3123 if not expression: 3124 self._retreat(index) 3125 return None 3126 3127 this = self.expression(exp.Is, this=this, expression=expression) 3128 return self.expression(exp.Not, this=this) if negate else this 3129 3130 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3131 unnest = self._parse_unnest(with_alias=False) 3132 if unnest: 3133 this = self.expression(exp.In, this=this, unnest=unnest) 3134 elif self._match(TokenType.L_PAREN): 3135 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3136 3137 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3138 this = self.expression(exp.In, this=this, query=expressions[0]) 3139 else: 3140 this = self.expression(exp.In, this=this, expressions=expressions) 3141 3142 self._match_r_paren(this) 3143 else: 3144 this = self.expression(exp.In, this=this, field=self._parse_field()) 3145 3146 return this 3147 3148 def _parse_between(self, this: exp.Expression) -> exp.Between: 3149 low = self._parse_bitwise() 3150 self._match(TokenType.AND) 3151 high = self._parse_bitwise() 3152 return self.expression(exp.Between, this=this, low=low, high=high) 3153 3154 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3155 if not self._match(TokenType.ESCAPE): 3156 return this 3157 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3158 3159 def _parse_interval(self) -> t.Optional[exp.Interval]: 3160 index = self._index 3161 3162 if not self._match(TokenType.INTERVAL): 3163 return None 3164 3165 if self._match(TokenType.STRING, advance=False): 3166 this = self._parse_primary() 3167 else: 3168 this = self._parse_term() 3169 3170 if not this: 3171 self._retreat(index) 3172 return None 3173 3174 unit = self._parse_function() or self._parse_var(any_token=True) 3175 3176 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3177 # each INTERVAL expression into this canonical form so it's easy to transpile 3178 if this and this.is_number: 3179 this = exp.Literal.string(this.name) 3180 elif this and this.is_string: 3181 parts = this.name.split() 3182 3183 if len(parts) == 2: 3184 if unit: 3185 # This is not actually a unit, it's something else (e.g. a "window side") 3186 unit = None 3187 self._retreat(self._index - 1) 3188 3189 this = exp.Literal.string(parts[0]) 3190 unit = self.expression(exp.Var, this=parts[1]) 3191 3192 return self.expression(exp.Interval, this=this, unit=unit) 3193 3194 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3195 this = self._parse_term() 3196 3197 while True: 3198 if self._match_set(self.BITWISE): 3199 this = self.expression( 3200 self.BITWISE[self._prev.token_type], 3201 this=this, 3202 expression=self._parse_term(), 3203 ) 3204 elif self._match(TokenType.DQMARK): 3205 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3206 elif self._match_pair(TokenType.LT, TokenType.LT): 3207 this = self.expression( 3208 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3209 ) 3210 elif self._match_pair(TokenType.GT, TokenType.GT): 3211 this = self.expression( 3212 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3213 ) 3214 else: 3215 break 3216 3217 return this 3218 3219 def _parse_term(self) -> t.Optional[exp.Expression]: 3220 return self._parse_tokens(self._parse_factor, self.TERM) 3221 3222 def _parse_factor(self) -> t.Optional[exp.Expression]: 3223 return self._parse_tokens(self._parse_unary, self.FACTOR) 3224 3225 def _parse_unary(self) -> t.Optional[exp.Expression]: 3226 if self._match_set(self.UNARY_PARSERS): 3227 return self.UNARY_PARSERS[self._prev.token_type](self) 3228 return self._parse_at_time_zone(self._parse_type()) 3229 3230 def _parse_type(self) -> t.Optional[exp.Expression]: 3231 interval = self._parse_interval() 3232 if interval: 3233 return interval 3234 3235 index = self._index 3236 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3237 this = self._parse_column() 3238 3239 if data_type: 3240 if isinstance(this, exp.Literal): 3241 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3242 if parser: 3243 return parser(self, this, data_type) 3244 return self.expression(exp.Cast, this=this, to=data_type) 3245 if not data_type.expressions: 3246 self._retreat(index) 3247 return self._parse_column() 3248 return self._parse_column_ops(data_type) 3249 3250 return this 3251 3252 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3253 this = self._parse_type() 3254 if not this: 3255 return None 3256 3257 return self.expression( 3258 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3259 ) 3260 3261 def _parse_types( 3262 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3263 ) -> t.Optional[exp.Expression]: 3264 index = self._index 3265 3266 prefix = self._match_text_seq("SYSUDTLIB", ".") 3267 3268 if not self._match_set(self.TYPE_TOKENS): 3269 identifier = allow_identifiers and self._parse_id_var( 3270 any_token=False, tokens=(TokenType.VAR,) 3271 ) 3272 3273 if identifier: 3274 tokens = self._tokenizer.tokenize(identifier.name) 3275 3276 if len(tokens) != 1: 3277 self.raise_error("Unexpected identifier", self._prev) 3278 3279 if tokens[0].token_type in self.TYPE_TOKENS: 3280 self._prev = tokens[0] 3281 elif self.SUPPORTS_USER_DEFINED_TYPES: 3282 type_name = identifier.name 3283 3284 while self._match(TokenType.DOT): 3285 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3286 3287 return exp.DataType.build(type_name, udt=True) 3288 else: 3289 return None 3290 else: 3291 return None 3292 3293 type_token = self._prev.token_type 3294 3295 if type_token == TokenType.PSEUDO_TYPE: 3296 return self.expression(exp.PseudoType, this=self._prev.text) 3297 3298 if type_token == TokenType.OBJECT_IDENTIFIER: 3299 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3300 3301 nested = type_token in self.NESTED_TYPE_TOKENS 3302 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3303 expressions = None 3304 maybe_func = False 3305 3306 if self._match(TokenType.L_PAREN): 3307 if is_struct: 3308 expressions = self._parse_csv(self._parse_struct_types) 3309 elif nested: 3310 expressions = self._parse_csv( 3311 lambda: self._parse_types( 3312 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3313 ) 3314 ) 3315 elif type_token in self.ENUM_TYPE_TOKENS: 3316 expressions = self._parse_csv(self._parse_equality) 3317 else: 3318 expressions = self._parse_csv(self._parse_type_size) 3319 3320 if not expressions or not self._match(TokenType.R_PAREN): 3321 self._retreat(index) 3322 return None 3323 3324 maybe_func = True 3325 3326 this: t.Optional[exp.Expression] = None 3327 values: t.Optional[t.List[exp.Expression]] = None 3328 3329 if nested and self._match(TokenType.LT): 3330 if is_struct: 3331 expressions = self._parse_csv(self._parse_struct_types) 3332 else: 3333 expressions = self._parse_csv( 3334 lambda: self._parse_types( 3335 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3336 ) 3337 ) 3338 3339 if not self._match(TokenType.GT): 3340 self.raise_error("Expecting >") 3341 3342 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3343 values = self._parse_csv(self._parse_conjunction) 3344 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3345 3346 if type_token in self.TIMESTAMPS: 3347 if self._match_text_seq("WITH", "TIME", "ZONE"): 3348 maybe_func = False 3349 tz_type = ( 3350 exp.DataType.Type.TIMETZ 3351 if type_token in self.TIMES 3352 else exp.DataType.Type.TIMESTAMPTZ 3353 ) 3354 this = exp.DataType(this=tz_type, expressions=expressions) 3355 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3356 maybe_func = False 3357 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3358 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3359 maybe_func = False 3360 elif type_token == TokenType.INTERVAL: 3361 unit = self._parse_var() 3362 3363 if self._match_text_seq("TO"): 3364 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3365 else: 3366 span = None 3367 3368 if span or not unit: 3369 this = self.expression( 3370 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3371 ) 3372 else: 3373 this = self.expression(exp.Interval, unit=unit) 3374 3375 if maybe_func and check_func: 3376 index2 = self._index 3377 peek = self._parse_string() 3378 3379 if not peek: 3380 self._retreat(index) 3381 return None 3382 3383 self._retreat(index2) 3384 3385 if not this: 3386 if self._match_text_seq("UNSIGNED"): 3387 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3388 if not unsigned_type_token: 3389 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3390 3391 type_token = unsigned_type_token or type_token 3392 3393 this = exp.DataType( 3394 this=exp.DataType.Type[type_token.value], 3395 expressions=expressions, 3396 nested=nested, 3397 values=values, 3398 prefix=prefix, 3399 ) 3400 3401 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3402 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3403 3404 return this 3405 3406 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3407 this = self._parse_type() or self._parse_id_var() 3408 self._match(TokenType.COLON) 3409 return self._parse_column_def(this) 3410 3411 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3412 if not self._match_text_seq("AT", "TIME", "ZONE"): 3413 return this 3414 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3415 3416 def _parse_column(self) -> t.Optional[exp.Expression]: 3417 this = self._parse_field() 3418 if isinstance(this, exp.Identifier): 3419 this = self.expression(exp.Column, this=this) 3420 elif not this: 3421 return self._parse_bracket(this) 3422 return self._parse_column_ops(this) 3423 3424 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3425 this = self._parse_bracket(this) 3426 3427 while self._match_set(self.COLUMN_OPERATORS): 3428 op_token = self._prev.token_type 3429 op = self.COLUMN_OPERATORS.get(op_token) 3430 3431 if op_token == TokenType.DCOLON: 3432 field = self._parse_types() 3433 if not field: 3434 self.raise_error("Expected type") 3435 elif op and self._curr: 3436 self._advance() 3437 value = self._prev.text 3438 field = ( 3439 exp.Literal.number(value) 3440 if self._prev.token_type == TokenType.NUMBER 3441 else exp.Literal.string(value) 3442 ) 3443 else: 3444 field = self._parse_field(anonymous_func=True, any_token=True) 3445 3446 if isinstance(field, exp.Func): 3447 # bigquery allows function calls like x.y.count(...) 3448 # SAFE.SUBSTR(...) 3449 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3450 this = self._replace_columns_with_dots(this) 3451 3452 if op: 3453 this = op(self, this, field) 3454 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3455 this = self.expression( 3456 exp.Column, 3457 this=field, 3458 table=this.this, 3459 db=this.args.get("table"), 3460 catalog=this.args.get("db"), 3461 ) 3462 else: 3463 this = self.expression(exp.Dot, this=this, expression=field) 3464 this = self._parse_bracket(this) 3465 return this 3466 3467 def _parse_primary(self) -> t.Optional[exp.Expression]: 3468 if self._match_set(self.PRIMARY_PARSERS): 3469 token_type = self._prev.token_type 3470 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3471 3472 if token_type == TokenType.STRING: 3473 expressions = [primary] 3474 while self._match(TokenType.STRING): 3475 expressions.append(exp.Literal.string(self._prev.text)) 3476 3477 if len(expressions) > 1: 3478 return self.expression(exp.Concat, expressions=expressions) 3479 3480 return primary 3481 3482 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3483 return exp.Literal.number(f"0.{self._prev.text}") 3484 3485 if self._match(TokenType.L_PAREN): 3486 comments = self._prev_comments 3487 query = self._parse_select() 3488 3489 if query: 3490 expressions = [query] 3491 else: 3492 expressions = self._parse_expressions() 3493 3494 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3495 3496 if isinstance(this, exp.Subqueryable): 3497 this = self._parse_set_operations( 3498 self._parse_subquery(this=this, parse_alias=False) 3499 ) 3500 elif len(expressions) > 1: 3501 this = self.expression(exp.Tuple, expressions=expressions) 3502 else: 3503 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3504 3505 if this: 3506 this.add_comments(comments) 3507 3508 self._match_r_paren(expression=this) 3509 return this 3510 3511 return None 3512 3513 def _parse_field( 3514 self, 3515 any_token: bool = False, 3516 tokens: t.Optional[t.Collection[TokenType]] = None, 3517 anonymous_func: bool = False, 3518 ) -> t.Optional[exp.Expression]: 3519 return ( 3520 self._parse_primary() 3521 or self._parse_function(anonymous=anonymous_func) 3522 or self._parse_id_var(any_token=any_token, tokens=tokens) 3523 ) 3524 3525 def _parse_function( 3526 self, 3527 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3528 anonymous: bool = False, 3529 optional_parens: bool = True, 3530 ) -> t.Optional[exp.Expression]: 3531 if not self._curr: 3532 return None 3533 3534 token_type = self._curr.token_type 3535 this = self._curr.text 3536 upper = this.upper() 3537 3538 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3539 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3540 self._advance() 3541 return parser(self) 3542 3543 if not self._next or self._next.token_type != TokenType.L_PAREN: 3544 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3545 self._advance() 3546 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3547 3548 return None 3549 3550 if token_type not in self.FUNC_TOKENS: 3551 return None 3552 3553 self._advance(2) 3554 3555 parser = self.FUNCTION_PARSERS.get(upper) 3556 if parser and not anonymous: 3557 this = parser(self) 3558 else: 3559 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3560 3561 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3562 this = self.expression(subquery_predicate, this=self._parse_select()) 3563 self._match_r_paren() 3564 return this 3565 3566 if functions is None: 3567 functions = self.FUNCTIONS 3568 3569 function = functions.get(upper) 3570 3571 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3572 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3573 3574 if function and not anonymous: 3575 func = self.validate_expression(function(args), args) 3576 if not self.NORMALIZE_FUNCTIONS: 3577 func.meta["name"] = this 3578 this = func 3579 else: 3580 this = self.expression(exp.Anonymous, this=this, expressions=args) 3581 3582 self._match_r_paren(this) 3583 return self._parse_window(this) 3584 3585 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3586 return self._parse_column_def(self._parse_id_var()) 3587 3588 def _parse_user_defined_function( 3589 self, kind: t.Optional[TokenType] = None 3590 ) -> t.Optional[exp.Expression]: 3591 this = self._parse_id_var() 3592 3593 while self._match(TokenType.DOT): 3594 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3595 3596 if not self._match(TokenType.L_PAREN): 3597 return this 3598 3599 expressions = self._parse_csv(self._parse_function_parameter) 3600 self._match_r_paren() 3601 return self.expression( 3602 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3603 ) 3604 3605 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3606 literal = self._parse_primary() 3607 if literal: 3608 return self.expression(exp.Introducer, this=token.text, expression=literal) 3609 3610 return self.expression(exp.Identifier, this=token.text) 3611 3612 def _parse_session_parameter(self) -> exp.SessionParameter: 3613 kind = None 3614 this = self._parse_id_var() or self._parse_primary() 3615 3616 if this and self._match(TokenType.DOT): 3617 kind = this.name 3618 this = self._parse_var() or self._parse_primary() 3619 3620 return self.expression(exp.SessionParameter, this=this, kind=kind) 3621 3622 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3623 index = self._index 3624 3625 if self._match(TokenType.L_PAREN): 3626 expressions = t.cast( 3627 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3628 ) 3629 3630 if not self._match(TokenType.R_PAREN): 3631 self._retreat(index) 3632 else: 3633 expressions = [self._parse_id_var()] 3634 3635 if self._match_set(self.LAMBDAS): 3636 return self.LAMBDAS[self._prev.token_type](self, expressions) 3637 3638 self._retreat(index) 3639 3640 this: t.Optional[exp.Expression] 3641 3642 if self._match(TokenType.DISTINCT): 3643 this = self.expression( 3644 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3645 ) 3646 else: 3647 this = self._parse_select_or_expression(alias=alias) 3648 3649 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3650 3651 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3652 index = self._index 3653 3654 if not self.errors: 3655 try: 3656 if self._parse_select(nested=True): 3657 return this 3658 except ParseError: 3659 pass 3660 finally: 3661 self.errors.clear() 3662 self._retreat(index) 3663 3664 if not self._match(TokenType.L_PAREN): 3665 return this 3666 3667 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3668 3669 self._match_r_paren() 3670 return self.expression(exp.Schema, this=this, expressions=args) 3671 3672 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3673 return self._parse_column_def(self._parse_field(any_token=True)) 3674 3675 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3676 # column defs are not really columns, they're identifiers 3677 if isinstance(this, exp.Column): 3678 this = this.this 3679 3680 kind = self._parse_types(schema=True) 3681 3682 if self._match_text_seq("FOR", "ORDINALITY"): 3683 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3684 3685 constraints: t.List[exp.Expression] = [] 3686 3687 if not kind and self._match(TokenType.ALIAS): 3688 constraints.append( 3689 self.expression( 3690 exp.ComputedColumnConstraint, 3691 this=self._parse_conjunction(), 3692 persisted=self._match_text_seq("PERSISTED"), 3693 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3694 ) 3695 ) 3696 3697 while True: 3698 constraint = self._parse_column_constraint() 3699 if not constraint: 3700 break 3701 constraints.append(constraint) 3702 3703 if not kind and not constraints: 3704 return this 3705 3706 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3707 3708 def _parse_auto_increment( 3709 self, 3710 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3711 start = None 3712 increment = None 3713 3714 if self._match(TokenType.L_PAREN, advance=False): 3715 args = self._parse_wrapped_csv(self._parse_bitwise) 3716 start = seq_get(args, 0) 3717 increment = seq_get(args, 1) 3718 elif self._match_text_seq("START"): 3719 start = self._parse_bitwise() 3720 self._match_text_seq("INCREMENT") 3721 increment = self._parse_bitwise() 3722 3723 if start and increment: 3724 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3725 3726 return exp.AutoIncrementColumnConstraint() 3727 3728 def _parse_compress(self) -> exp.CompressColumnConstraint: 3729 if self._match(TokenType.L_PAREN, advance=False): 3730 return self.expression( 3731 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3732 ) 3733 3734 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3735 3736 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3737 if self._match_text_seq("BY", "DEFAULT"): 3738 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3739 this = self.expression( 3740 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3741 ) 3742 else: 3743 self._match_text_seq("ALWAYS") 3744 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3745 3746 self._match(TokenType.ALIAS) 3747 identity = self._match_text_seq("IDENTITY") 3748 3749 if self._match(TokenType.L_PAREN): 3750 if self._match(TokenType.START_WITH): 3751 this.set("start", self._parse_bitwise()) 3752 if self._match_text_seq("INCREMENT", "BY"): 3753 this.set("increment", self._parse_bitwise()) 3754 if self._match_text_seq("MINVALUE"): 3755 this.set("minvalue", self._parse_bitwise()) 3756 if self._match_text_seq("MAXVALUE"): 3757 this.set("maxvalue", self._parse_bitwise()) 3758 3759 if self._match_text_seq("CYCLE"): 3760 this.set("cycle", True) 3761 elif self._match_text_seq("NO", "CYCLE"): 3762 this.set("cycle", False) 3763 3764 if not identity: 3765 this.set("expression", self._parse_bitwise()) 3766 3767 self._match_r_paren() 3768 3769 return this 3770 3771 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3772 self._match_text_seq("LENGTH") 3773 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3774 3775 def _parse_not_constraint( 3776 self, 3777 ) -> t.Optional[exp.Expression]: 3778 if self._match_text_seq("NULL"): 3779 return self.expression(exp.NotNullColumnConstraint) 3780 if self._match_text_seq("CASESPECIFIC"): 3781 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3782 if self._match_text_seq("FOR", "REPLICATION"): 3783 return self.expression(exp.NotForReplicationColumnConstraint) 3784 return None 3785 3786 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3787 if self._match(TokenType.CONSTRAINT): 3788 this = self._parse_id_var() 3789 else: 3790 this = None 3791 3792 if self._match_texts(self.CONSTRAINT_PARSERS): 3793 return self.expression( 3794 exp.ColumnConstraint, 3795 this=this, 3796 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3797 ) 3798 3799 return this 3800 3801 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3802 if not self._match(TokenType.CONSTRAINT): 3803 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3804 3805 this = self._parse_id_var() 3806 expressions = [] 3807 3808 while True: 3809 constraint = self._parse_unnamed_constraint() or self._parse_function() 3810 if not constraint: 3811 break 3812 expressions.append(constraint) 3813 3814 return self.expression(exp.Constraint, this=this, expressions=expressions) 3815 3816 def _parse_unnamed_constraint( 3817 self, constraints: t.Optional[t.Collection[str]] = None 3818 ) -> t.Optional[exp.Expression]: 3819 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3820 return None 3821 3822 constraint = self._prev.text.upper() 3823 if constraint not in self.CONSTRAINT_PARSERS: 3824 self.raise_error(f"No parser found for schema constraint {constraint}.") 3825 3826 return self.CONSTRAINT_PARSERS[constraint](self) 3827 3828 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3829 self._match_text_seq("KEY") 3830 return self.expression( 3831 exp.UniqueColumnConstraint, 3832 this=self._parse_schema(self._parse_id_var(any_token=False)), 3833 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3834 ) 3835 3836 def _parse_key_constraint_options(self) -> t.List[str]: 3837 options = [] 3838 while True: 3839 if not self._curr: 3840 break 3841 3842 if self._match(TokenType.ON): 3843 action = None 3844 on = self._advance_any() and self._prev.text 3845 3846 if self._match_text_seq("NO", "ACTION"): 3847 action = "NO ACTION" 3848 elif self._match_text_seq("CASCADE"): 3849 action = "CASCADE" 3850 elif self._match_pair(TokenType.SET, TokenType.NULL): 3851 action = "SET NULL" 3852 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3853 action = "SET DEFAULT" 3854 else: 3855 self.raise_error("Invalid key constraint") 3856 3857 options.append(f"ON {on} {action}") 3858 elif self._match_text_seq("NOT", "ENFORCED"): 3859 options.append("NOT ENFORCED") 3860 elif self._match_text_seq("DEFERRABLE"): 3861 options.append("DEFERRABLE") 3862 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3863 options.append("INITIALLY DEFERRED") 3864 elif self._match_text_seq("NORELY"): 3865 options.append("NORELY") 3866 elif self._match_text_seq("MATCH", "FULL"): 3867 options.append("MATCH FULL") 3868 else: 3869 break 3870 3871 return options 3872 3873 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3874 if match and not self._match(TokenType.REFERENCES): 3875 return None 3876 3877 expressions = None 3878 this = self._parse_table(schema=True) 3879 options = self._parse_key_constraint_options() 3880 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3881 3882 def _parse_foreign_key(self) -> exp.ForeignKey: 3883 expressions = self._parse_wrapped_id_vars() 3884 reference = self._parse_references() 3885 options = {} 3886 3887 while self._match(TokenType.ON): 3888 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3889 self.raise_error("Expected DELETE or UPDATE") 3890 3891 kind = self._prev.text.lower() 3892 3893 if self._match_text_seq("NO", "ACTION"): 3894 action = "NO ACTION" 3895 elif self._match(TokenType.SET): 3896 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3897 action = "SET " + self._prev.text.upper() 3898 else: 3899 self._advance() 3900 action = self._prev.text.upper() 3901 3902 options[kind] = action 3903 3904 return self.expression( 3905 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3906 ) 3907 3908 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3909 return self._parse_field() 3910 3911 def _parse_primary_key( 3912 self, wrapped_optional: bool = False, in_props: bool = False 3913 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3914 desc = ( 3915 self._match_set((TokenType.ASC, TokenType.DESC)) 3916 and self._prev.token_type == TokenType.DESC 3917 ) 3918 3919 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3920 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3921 3922 expressions = self._parse_wrapped_csv( 3923 self._parse_primary_key_part, optional=wrapped_optional 3924 ) 3925 options = self._parse_key_constraint_options() 3926 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3927 3928 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3929 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3930 return this 3931 3932 bracket_kind = self._prev.token_type 3933 3934 if self._match(TokenType.COLON): 3935 expressions: t.List[exp.Expression] = [ 3936 self.expression(exp.Slice, expression=self._parse_conjunction()) 3937 ] 3938 else: 3939 expressions = self._parse_csv( 3940 lambda: self._parse_slice( 3941 self._parse_alias(self._parse_conjunction(), explicit=True) 3942 ) 3943 ) 3944 3945 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3946 if bracket_kind == TokenType.L_BRACE: 3947 this = self.expression(exp.Struct, expressions=expressions) 3948 elif not this or this.name.upper() == "ARRAY": 3949 this = self.expression(exp.Array, expressions=expressions) 3950 else: 3951 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3952 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3953 3954 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3955 self.raise_error("Expected ]") 3956 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3957 self.raise_error("Expected }") 3958 3959 self._add_comments(this) 3960 return self._parse_bracket(this) 3961 3962 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3963 if self._match(TokenType.COLON): 3964 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3965 return this 3966 3967 def _parse_case(self) -> t.Optional[exp.Expression]: 3968 ifs = [] 3969 default = None 3970 3971 comments = self._prev_comments 3972 expression = self._parse_conjunction() 3973 3974 while self._match(TokenType.WHEN): 3975 this = self._parse_conjunction() 3976 self._match(TokenType.THEN) 3977 then = self._parse_conjunction() 3978 ifs.append(self.expression(exp.If, this=this, true=then)) 3979 3980 if self._match(TokenType.ELSE): 3981 default = self._parse_conjunction() 3982 3983 if not self._match(TokenType.END): 3984 self.raise_error("Expected END after CASE", self._prev) 3985 3986 return self._parse_window( 3987 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3988 ) 3989 3990 def _parse_if(self) -> t.Optional[exp.Expression]: 3991 if self._match(TokenType.L_PAREN): 3992 args = self._parse_csv(self._parse_conjunction) 3993 this = self.validate_expression(exp.If.from_arg_list(args), args) 3994 self._match_r_paren() 3995 else: 3996 index = self._index - 1 3997 condition = self._parse_conjunction() 3998 3999 if not condition: 4000 self._retreat(index) 4001 return None 4002 4003 self._match(TokenType.THEN) 4004 true = self._parse_conjunction() 4005 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4006 self._match(TokenType.END) 4007 this = self.expression(exp.If, this=condition, true=true, false=false) 4008 4009 return self._parse_window(this) 4010 4011 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4012 if not self._match_text_seq("VALUE", "FOR"): 4013 self._retreat(self._index - 1) 4014 return None 4015 4016 return self.expression( 4017 exp.NextValueFor, 4018 this=self._parse_column(), 4019 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4020 ) 4021 4022 def _parse_extract(self) -> exp.Extract: 4023 this = self._parse_function() or self._parse_var() or self._parse_type() 4024 4025 if self._match(TokenType.FROM): 4026 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4027 4028 if not self._match(TokenType.COMMA): 4029 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4030 4031 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4032 4033 def _parse_any_value(self) -> exp.AnyValue: 4034 this = self._parse_lambda() 4035 is_max = None 4036 having = None 4037 4038 if self._match(TokenType.HAVING): 4039 self._match_texts(("MAX", "MIN")) 4040 is_max = self._prev.text == "MAX" 4041 having = self._parse_column() 4042 4043 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4044 4045 def _parse_cast(self, strict: bool) -> exp.Expression: 4046 this = self._parse_conjunction() 4047 4048 if not self._match(TokenType.ALIAS): 4049 if self._match(TokenType.COMMA): 4050 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4051 4052 self.raise_error("Expected AS after CAST") 4053 4054 fmt = None 4055 to = self._parse_types() 4056 4057 if not to: 4058 self.raise_error("Expected TYPE after CAST") 4059 elif isinstance(to, exp.Identifier): 4060 to = exp.DataType.build(to.name, udt=True) 4061 elif to.this == exp.DataType.Type.CHAR: 4062 if self._match(TokenType.CHARACTER_SET): 4063 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4064 elif self._match(TokenType.FORMAT): 4065 fmt_string = self._parse_string() 4066 fmt = self._parse_at_time_zone(fmt_string) 4067 4068 if to.this in exp.DataType.TEMPORAL_TYPES: 4069 this = self.expression( 4070 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4071 this=this, 4072 format=exp.Literal.string( 4073 format_time( 4074 fmt_string.this if fmt_string else "", 4075 self.FORMAT_MAPPING or self.TIME_MAPPING, 4076 self.FORMAT_TRIE or self.TIME_TRIE, 4077 ) 4078 ), 4079 ) 4080 4081 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4082 this.set("zone", fmt.args["zone"]) 4083 4084 return this 4085 4086 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4087 4088 def _parse_concat(self) -> t.Optional[exp.Expression]: 4089 args = self._parse_csv(self._parse_conjunction) 4090 if self.CONCAT_NULL_OUTPUTS_STRING: 4091 args = self._ensure_string_if_null(args) 4092 4093 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4094 # we find such a call we replace it with its argument. 4095 if len(args) == 1: 4096 return args[0] 4097 4098 return self.expression( 4099 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4100 ) 4101 4102 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4103 args = self._parse_csv(self._parse_conjunction) 4104 if len(args) < 2: 4105 return self.expression(exp.ConcatWs, expressions=args) 4106 delim, *values = args 4107 if self.CONCAT_NULL_OUTPUTS_STRING: 4108 values = self._ensure_string_if_null(values) 4109 4110 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4111 4112 def _parse_string_agg(self) -> exp.Expression: 4113 if self._match(TokenType.DISTINCT): 4114 args: t.List[t.Optional[exp.Expression]] = [ 4115 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4116 ] 4117 if self._match(TokenType.COMMA): 4118 args.extend(self._parse_csv(self._parse_conjunction)) 4119 else: 4120 args = self._parse_csv(self._parse_conjunction) # type: ignore 4121 4122 index = self._index 4123 if not self._match(TokenType.R_PAREN) and args: 4124 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4125 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4126 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4127 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4128 4129 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4130 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4131 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4132 if not self._match_text_seq("WITHIN", "GROUP"): 4133 self._retreat(index) 4134 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4135 4136 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4137 order = self._parse_order(this=seq_get(args, 0)) 4138 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4139 4140 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4141 this = self._parse_bitwise() 4142 4143 if self._match(TokenType.USING): 4144 to: t.Optional[exp.Expression] = self.expression( 4145 exp.CharacterSet, this=self._parse_var() 4146 ) 4147 elif self._match(TokenType.COMMA): 4148 to = self._parse_types() 4149 else: 4150 to = None 4151 4152 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4153 4154 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4155 """ 4156 There are generally two variants of the DECODE function: 4157 4158 - DECODE(bin, charset) 4159 - DECODE(expression, search, result [, search, result] ... [, default]) 4160 4161 The second variant will always be parsed into a CASE expression. Note that NULL 4162 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4163 instead of relying on pattern matching. 4164 """ 4165 args = self._parse_csv(self._parse_conjunction) 4166 4167 if len(args) < 3: 4168 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4169 4170 expression, *expressions = args 4171 if not expression: 4172 return None 4173 4174 ifs = [] 4175 for search, result in zip(expressions[::2], expressions[1::2]): 4176 if not search or not result: 4177 return None 4178 4179 if isinstance(search, exp.Literal): 4180 ifs.append( 4181 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4182 ) 4183 elif isinstance(search, exp.Null): 4184 ifs.append( 4185 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4186 ) 4187 else: 4188 cond = exp.or_( 4189 exp.EQ(this=expression.copy(), expression=search), 4190 exp.and_( 4191 exp.Is(this=expression.copy(), expression=exp.Null()), 4192 exp.Is(this=search.copy(), expression=exp.Null()), 4193 copy=False, 4194 ), 4195 copy=False, 4196 ) 4197 ifs.append(exp.If(this=cond, true=result)) 4198 4199 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4200 4201 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4202 self._match_text_seq("KEY") 4203 key = self._parse_column() 4204 self._match_set((TokenType.COLON, TokenType.COMMA)) 4205 self._match_text_seq("VALUE") 4206 value = self._parse_bitwise() 4207 4208 if not key and not value: 4209 return None 4210 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4211 4212 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4213 if not this or not self._match_text_seq("FORMAT", "JSON"): 4214 return this 4215 4216 return self.expression(exp.FormatJson, this=this) 4217 4218 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4219 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4220 for value in values: 4221 if self._match_text_seq(value, "ON", on): 4222 return f"{value} ON {on}" 4223 4224 return None 4225 4226 def _parse_json_object(self) -> exp.JSONObject: 4227 star = self._parse_star() 4228 expressions = ( 4229 [star] 4230 if star 4231 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4232 ) 4233 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4234 4235 unique_keys = None 4236 if self._match_text_seq("WITH", "UNIQUE"): 4237 unique_keys = True 4238 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4239 unique_keys = False 4240 4241 self._match_text_seq("KEYS") 4242 4243 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4244 self._parse_type() 4245 ) 4246 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4247 4248 return self.expression( 4249 exp.JSONObject, 4250 expressions=expressions, 4251 null_handling=null_handling, 4252 unique_keys=unique_keys, 4253 return_type=return_type, 4254 encoding=encoding, 4255 ) 4256 4257 def _parse_logarithm(self) -> exp.Func: 4258 # Default argument order is base, expression 4259 args = self._parse_csv(self._parse_range) 4260 4261 if len(args) > 1: 4262 if not self.LOG_BASE_FIRST: 4263 args.reverse() 4264 return exp.Log.from_arg_list(args) 4265 4266 return self.expression( 4267 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4268 ) 4269 4270 def _parse_match_against(self) -> exp.MatchAgainst: 4271 expressions = self._parse_csv(self._parse_column) 4272 4273 self._match_text_seq(")", "AGAINST", "(") 4274 4275 this = self._parse_string() 4276 4277 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4278 modifier = "IN NATURAL LANGUAGE MODE" 4279 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4280 modifier = f"{modifier} WITH QUERY EXPANSION" 4281 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4282 modifier = "IN BOOLEAN MODE" 4283 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4284 modifier = "WITH QUERY EXPANSION" 4285 else: 4286 modifier = None 4287 4288 return self.expression( 4289 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4290 ) 4291 4292 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4293 def _parse_open_json(self) -> exp.OpenJSON: 4294 this = self._parse_bitwise() 4295 path = self._match(TokenType.COMMA) and self._parse_string() 4296 4297 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4298 this = self._parse_field(any_token=True) 4299 kind = self._parse_types() 4300 path = self._parse_string() 4301 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4302 4303 return self.expression( 4304 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4305 ) 4306 4307 expressions = None 4308 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4309 self._match_l_paren() 4310 expressions = self._parse_csv(_parse_open_json_column_def) 4311 4312 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4313 4314 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4315 args = self._parse_csv(self._parse_bitwise) 4316 4317 if self._match(TokenType.IN): 4318 return self.expression( 4319 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4320 ) 4321 4322 if haystack_first: 4323 haystack = seq_get(args, 0) 4324 needle = seq_get(args, 1) 4325 else: 4326 needle = seq_get(args, 0) 4327 haystack = seq_get(args, 1) 4328 4329 return self.expression( 4330 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4331 ) 4332 4333 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4334 args = self._parse_csv(self._parse_table) 4335 return exp.JoinHint(this=func_name.upper(), expressions=args) 4336 4337 def _parse_substring(self) -> exp.Substring: 4338 # Postgres supports the form: substring(string [from int] [for int]) 4339 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4340 4341 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4342 4343 if self._match(TokenType.FROM): 4344 args.append(self._parse_bitwise()) 4345 if self._match(TokenType.FOR): 4346 args.append(self._parse_bitwise()) 4347 4348 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4349 4350 def _parse_trim(self) -> exp.Trim: 4351 # https://www.w3resource.com/sql/character-functions/trim.php 4352 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4353 4354 position = None 4355 collation = None 4356 4357 if self._match_texts(self.TRIM_TYPES): 4358 position = self._prev.text.upper() 4359 4360 expression = self._parse_bitwise() 4361 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4362 this = self._parse_bitwise() 4363 else: 4364 this = expression 4365 expression = None 4366 4367 if self._match(TokenType.COLLATE): 4368 collation = self._parse_bitwise() 4369 4370 return self.expression( 4371 exp.Trim, this=this, position=position, expression=expression, collation=collation 4372 ) 4373 4374 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4375 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4376 4377 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4378 return self._parse_window(self._parse_id_var(), alias=True) 4379 4380 def _parse_respect_or_ignore_nulls( 4381 self, this: t.Optional[exp.Expression] 4382 ) -> t.Optional[exp.Expression]: 4383 if self._match_text_seq("IGNORE", "NULLS"): 4384 return self.expression(exp.IgnoreNulls, this=this) 4385 if self._match_text_seq("RESPECT", "NULLS"): 4386 return self.expression(exp.RespectNulls, this=this) 4387 return this 4388 4389 def _parse_window( 4390 self, this: t.Optional[exp.Expression], alias: bool = False 4391 ) -> t.Optional[exp.Expression]: 4392 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4393 self._match(TokenType.WHERE) 4394 this = self.expression( 4395 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4396 ) 4397 self._match_r_paren() 4398 4399 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4400 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4401 if self._match_text_seq("WITHIN", "GROUP"): 4402 order = self._parse_wrapped(self._parse_order) 4403 this = self.expression(exp.WithinGroup, this=this, expression=order) 4404 4405 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4406 # Some dialects choose to implement and some do not. 4407 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4408 4409 # There is some code above in _parse_lambda that handles 4410 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4411 4412 # The below changes handle 4413 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4414 4415 # Oracle allows both formats 4416 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4417 # and Snowflake chose to do the same for familiarity 4418 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4419 this = self._parse_respect_or_ignore_nulls(this) 4420 4421 # bigquery select from window x AS (partition by ...) 4422 if alias: 4423 over = None 4424 self._match(TokenType.ALIAS) 4425 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4426 return this 4427 else: 4428 over = self._prev.text.upper() 4429 4430 if not self._match(TokenType.L_PAREN): 4431 return self.expression( 4432 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4433 ) 4434 4435 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4436 4437 first = self._match(TokenType.FIRST) 4438 if self._match_text_seq("LAST"): 4439 first = False 4440 4441 partition, order = self._parse_partition_and_order() 4442 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4443 4444 if kind: 4445 self._match(TokenType.BETWEEN) 4446 start = self._parse_window_spec() 4447 self._match(TokenType.AND) 4448 end = self._parse_window_spec() 4449 4450 spec = self.expression( 4451 exp.WindowSpec, 4452 kind=kind, 4453 start=start["value"], 4454 start_side=start["side"], 4455 end=end["value"], 4456 end_side=end["side"], 4457 ) 4458 else: 4459 spec = None 4460 4461 self._match_r_paren() 4462 4463 window = self.expression( 4464 exp.Window, 4465 this=this, 4466 partition_by=partition, 4467 order=order, 4468 spec=spec, 4469 alias=window_alias, 4470 over=over, 4471 first=first, 4472 ) 4473 4474 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4475 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4476 return self._parse_window(window, alias=alias) 4477 4478 return window 4479 4480 def _parse_partition_and_order( 4481 self, 4482 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4483 return self._parse_partition_by(), self._parse_order() 4484 4485 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4486 self._match(TokenType.BETWEEN) 4487 4488 return { 4489 "value": ( 4490 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4491 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4492 or self._parse_bitwise() 4493 ), 4494 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4495 } 4496 4497 def _parse_alias( 4498 self, this: t.Optional[exp.Expression], explicit: bool = False 4499 ) -> t.Optional[exp.Expression]: 4500 any_token = self._match(TokenType.ALIAS) 4501 4502 if explicit and not any_token: 4503 return this 4504 4505 if self._match(TokenType.L_PAREN): 4506 aliases = self.expression( 4507 exp.Aliases, 4508 this=this, 4509 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4510 ) 4511 self._match_r_paren(aliases) 4512 return aliases 4513 4514 alias = self._parse_id_var(any_token) 4515 4516 if alias: 4517 return self.expression(exp.Alias, this=this, alias=alias) 4518 4519 return this 4520 4521 def _parse_id_var( 4522 self, 4523 any_token: bool = True, 4524 tokens: t.Optional[t.Collection[TokenType]] = None, 4525 ) -> t.Optional[exp.Expression]: 4526 identifier = self._parse_identifier() 4527 4528 if identifier: 4529 return identifier 4530 4531 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4532 quoted = self._prev.token_type == TokenType.STRING 4533 return exp.Identifier(this=self._prev.text, quoted=quoted) 4534 4535 return None 4536 4537 def _parse_string(self) -> t.Optional[exp.Expression]: 4538 if self._match(TokenType.STRING): 4539 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4540 return self._parse_placeholder() 4541 4542 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4543 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4544 4545 def _parse_number(self) -> t.Optional[exp.Expression]: 4546 if self._match(TokenType.NUMBER): 4547 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4548 return self._parse_placeholder() 4549 4550 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4551 if self._match(TokenType.IDENTIFIER): 4552 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4553 return self._parse_placeholder() 4554 4555 def _parse_var( 4556 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4557 ) -> t.Optional[exp.Expression]: 4558 if ( 4559 (any_token and self._advance_any()) 4560 or self._match(TokenType.VAR) 4561 or (self._match_set(tokens) if tokens else False) 4562 ): 4563 return self.expression(exp.Var, this=self._prev.text) 4564 return self._parse_placeholder() 4565 4566 def _advance_any(self) -> t.Optional[Token]: 4567 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4568 self._advance() 4569 return self._prev 4570 return None 4571 4572 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4573 return self._parse_var() or self._parse_string() 4574 4575 def _parse_null(self) -> t.Optional[exp.Expression]: 4576 if self._match(TokenType.NULL): 4577 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4578 return self._parse_placeholder() 4579 4580 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4581 if self._match(TokenType.TRUE): 4582 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4583 if self._match(TokenType.FALSE): 4584 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4585 return self._parse_placeholder() 4586 4587 def _parse_star(self) -> t.Optional[exp.Expression]: 4588 if self._match(TokenType.STAR): 4589 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4590 return self._parse_placeholder() 4591 4592 def _parse_parameter(self) -> exp.Parameter: 4593 wrapped = self._match(TokenType.L_BRACE) 4594 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4595 self._match(TokenType.R_BRACE) 4596 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4597 4598 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4599 if self._match_set(self.PLACEHOLDER_PARSERS): 4600 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4601 if placeholder: 4602 return placeholder 4603 self._advance(-1) 4604 return None 4605 4606 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4607 if not self._match(TokenType.EXCEPT): 4608 return None 4609 if self._match(TokenType.L_PAREN, advance=False): 4610 return self._parse_wrapped_csv(self._parse_column) 4611 return self._parse_csv(self._parse_column) 4612 4613 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4614 if not self._match(TokenType.REPLACE): 4615 return None 4616 if self._match(TokenType.L_PAREN, advance=False): 4617 return self._parse_wrapped_csv(self._parse_expression) 4618 return self._parse_expressions() 4619 4620 def _parse_csv( 4621 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4622 ) -> t.List[exp.Expression]: 4623 parse_result = parse_method() 4624 items = [parse_result] if parse_result is not None else [] 4625 4626 while self._match(sep): 4627 self._add_comments(parse_result) 4628 parse_result = parse_method() 4629 if parse_result is not None: 4630 items.append(parse_result) 4631 4632 return items 4633 4634 def _parse_tokens( 4635 self, parse_method: t.Callable, expressions: t.Dict 4636 ) -> t.Optional[exp.Expression]: 4637 this = parse_method() 4638 4639 while self._match_set(expressions): 4640 this = self.expression( 4641 expressions[self._prev.token_type], 4642 this=this, 4643 comments=self._prev_comments, 4644 expression=parse_method(), 4645 ) 4646 4647 return this 4648 4649 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4650 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4651 4652 def _parse_wrapped_csv( 4653 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4654 ) -> t.List[exp.Expression]: 4655 return self._parse_wrapped( 4656 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4657 ) 4658 4659 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4660 wrapped = self._match(TokenType.L_PAREN) 4661 if not wrapped and not optional: 4662 self.raise_error("Expecting (") 4663 parse_result = parse_method() 4664 if wrapped: 4665 self._match_r_paren() 4666 return parse_result 4667 4668 def _parse_expressions(self) -> t.List[exp.Expression]: 4669 return self._parse_csv(self._parse_expression) 4670 4671 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4672 return self._parse_select() or self._parse_set_operations( 4673 self._parse_expression() if alias else self._parse_conjunction() 4674 ) 4675 4676 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4677 return self._parse_query_modifiers( 4678 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4679 ) 4680 4681 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4682 this = None 4683 if self._match_texts(self.TRANSACTION_KIND): 4684 this = self._prev.text 4685 4686 self._match_texts({"TRANSACTION", "WORK"}) 4687 4688 modes = [] 4689 while True: 4690 mode = [] 4691 while self._match(TokenType.VAR): 4692 mode.append(self._prev.text) 4693 4694 if mode: 4695 modes.append(" ".join(mode)) 4696 if not self._match(TokenType.COMMA): 4697 break 4698 4699 return self.expression(exp.Transaction, this=this, modes=modes) 4700 4701 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4702 chain = None 4703 savepoint = None 4704 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4705 4706 self._match_texts({"TRANSACTION", "WORK"}) 4707 4708 if self._match_text_seq("TO"): 4709 self._match_text_seq("SAVEPOINT") 4710 savepoint = self._parse_id_var() 4711 4712 if self._match(TokenType.AND): 4713 chain = not self._match_text_seq("NO") 4714 self._match_text_seq("CHAIN") 4715 4716 if is_rollback: 4717 return self.expression(exp.Rollback, savepoint=savepoint) 4718 4719 return self.expression(exp.Commit, chain=chain) 4720 4721 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4722 if not self._match_text_seq("ADD"): 4723 return None 4724 4725 self._match(TokenType.COLUMN) 4726 exists_column = self._parse_exists(not_=True) 4727 expression = self._parse_field_def() 4728 4729 if expression: 4730 expression.set("exists", exists_column) 4731 4732 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4733 if self._match_texts(("FIRST", "AFTER")): 4734 position = self._prev.text 4735 column_position = self.expression( 4736 exp.ColumnPosition, this=self._parse_column(), position=position 4737 ) 4738 expression.set("position", column_position) 4739 4740 return expression 4741 4742 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4743 drop = self._match(TokenType.DROP) and self._parse_drop() 4744 if drop and not isinstance(drop, exp.Command): 4745 drop.set("kind", drop.args.get("kind", "COLUMN")) 4746 return drop 4747 4748 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4749 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4750 return self.expression( 4751 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4752 ) 4753 4754 def _parse_add_constraint(self) -> exp.AddConstraint: 4755 this = None 4756 kind = self._prev.token_type 4757 4758 if kind == TokenType.CONSTRAINT: 4759 this = self._parse_id_var() 4760 4761 if self._match_text_seq("CHECK"): 4762 expression = self._parse_wrapped(self._parse_conjunction) 4763 enforced = self._match_text_seq("ENFORCED") 4764 4765 return self.expression( 4766 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4767 ) 4768 4769 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4770 expression = self._parse_foreign_key() 4771 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4772 expression = self._parse_primary_key() 4773 else: 4774 expression = None 4775 4776 return self.expression(exp.AddConstraint, this=this, expression=expression) 4777 4778 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4779 index = self._index - 1 4780 4781 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4782 return self._parse_csv(self._parse_add_constraint) 4783 4784 self._retreat(index) 4785 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4786 return self._parse_csv(self._parse_field_def) 4787 4788 return self._parse_csv(self._parse_add_column) 4789 4790 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4791 self._match(TokenType.COLUMN) 4792 column = self._parse_field(any_token=True) 4793 4794 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4795 return self.expression(exp.AlterColumn, this=column, drop=True) 4796 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4797 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4798 4799 self._match_text_seq("SET", "DATA") 4800 return self.expression( 4801 exp.AlterColumn, 4802 this=column, 4803 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4804 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4805 using=self._match(TokenType.USING) and self._parse_conjunction(), 4806 ) 4807 4808 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4809 index = self._index - 1 4810 4811 partition_exists = self._parse_exists() 4812 if self._match(TokenType.PARTITION, advance=False): 4813 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4814 4815 self._retreat(index) 4816 return self._parse_csv(self._parse_drop_column) 4817 4818 def _parse_alter_table_rename(self) -> exp.RenameTable: 4819 self._match_text_seq("TO") 4820 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4821 4822 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4823 start = self._prev 4824 4825 if not self._match(TokenType.TABLE): 4826 return self._parse_as_command(start) 4827 4828 exists = self._parse_exists() 4829 only = self._match_text_seq("ONLY") 4830 this = self._parse_table(schema=True) 4831 4832 if self._next: 4833 self._advance() 4834 4835 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4836 if parser: 4837 actions = ensure_list(parser(self)) 4838 4839 if not self._curr: 4840 return self.expression( 4841 exp.AlterTable, 4842 this=this, 4843 exists=exists, 4844 actions=actions, 4845 only=only, 4846 ) 4847 4848 return self._parse_as_command(start) 4849 4850 def _parse_merge(self) -> exp.Merge: 4851 self._match(TokenType.INTO) 4852 target = self._parse_table() 4853 4854 if target and self._match(TokenType.ALIAS, advance=False): 4855 target.set("alias", self._parse_table_alias()) 4856 4857 self._match(TokenType.USING) 4858 using = self._parse_table() 4859 4860 self._match(TokenType.ON) 4861 on = self._parse_conjunction() 4862 4863 whens = [] 4864 while self._match(TokenType.WHEN): 4865 matched = not self._match(TokenType.NOT) 4866 self._match_text_seq("MATCHED") 4867 source = ( 4868 False 4869 if self._match_text_seq("BY", "TARGET") 4870 else self._match_text_seq("BY", "SOURCE") 4871 ) 4872 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4873 4874 self._match(TokenType.THEN) 4875 4876 if self._match(TokenType.INSERT): 4877 _this = self._parse_star() 4878 if _this: 4879 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4880 else: 4881 then = self.expression( 4882 exp.Insert, 4883 this=self._parse_value(), 4884 expression=self._match(TokenType.VALUES) and self._parse_value(), 4885 ) 4886 elif self._match(TokenType.UPDATE): 4887 expressions = self._parse_star() 4888 if expressions: 4889 then = self.expression(exp.Update, expressions=expressions) 4890 else: 4891 then = self.expression( 4892 exp.Update, 4893 expressions=self._match(TokenType.SET) 4894 and self._parse_csv(self._parse_equality), 4895 ) 4896 elif self._match(TokenType.DELETE): 4897 then = self.expression(exp.Var, this=self._prev.text) 4898 else: 4899 then = None 4900 4901 whens.append( 4902 self.expression( 4903 exp.When, 4904 matched=matched, 4905 source=source, 4906 condition=condition, 4907 then=then, 4908 ) 4909 ) 4910 4911 return self.expression( 4912 exp.Merge, 4913 this=target, 4914 using=using, 4915 on=on, 4916 expressions=whens, 4917 ) 4918 4919 def _parse_show(self) -> t.Optional[exp.Expression]: 4920 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4921 if parser: 4922 return parser(self) 4923 return self._parse_as_command(self._prev) 4924 4925 def _parse_set_item_assignment( 4926 self, kind: t.Optional[str] = None 4927 ) -> t.Optional[exp.Expression]: 4928 index = self._index 4929 4930 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4931 return self._parse_set_transaction(global_=kind == "GLOBAL") 4932 4933 left = self._parse_primary() or self._parse_id_var() 4934 4935 if not self._match_texts(("=", "TO")): 4936 self._retreat(index) 4937 return None 4938 4939 right = self._parse_statement() or self._parse_id_var() 4940 this = self.expression(exp.EQ, this=left, expression=right) 4941 4942 return self.expression(exp.SetItem, this=this, kind=kind) 4943 4944 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4945 self._match_text_seq("TRANSACTION") 4946 characteristics = self._parse_csv( 4947 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4948 ) 4949 return self.expression( 4950 exp.SetItem, 4951 expressions=characteristics, 4952 kind="TRANSACTION", 4953 **{"global": global_}, # type: ignore 4954 ) 4955 4956 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4957 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4958 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4959 4960 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4961 index = self._index 4962 set_ = self.expression( 4963 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4964 ) 4965 4966 if self._curr: 4967 self._retreat(index) 4968 return self._parse_as_command(self._prev) 4969 4970 return set_ 4971 4972 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4973 for option in options: 4974 if self._match_text_seq(*option.split(" ")): 4975 return exp.var(option) 4976 return None 4977 4978 def _parse_as_command(self, start: Token) -> exp.Command: 4979 while self._curr: 4980 self._advance() 4981 text = self._find_sql(start, self._prev) 4982 size = len(start.text) 4983 return exp.Command(this=text[:size], expression=text[size:]) 4984 4985 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4986 settings = [] 4987 4988 self._match_l_paren() 4989 kind = self._parse_id_var() 4990 4991 if self._match(TokenType.L_PAREN): 4992 while True: 4993 key = self._parse_id_var() 4994 value = self._parse_primary() 4995 4996 if not key and value is None: 4997 break 4998 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4999 self._match(TokenType.R_PAREN) 5000 5001 self._match_r_paren() 5002 5003 return self.expression( 5004 exp.DictProperty, 5005 this=this, 5006 kind=kind.this if kind else None, 5007 settings=settings, 5008 ) 5009 5010 def _parse_dict_range(self, this: str) -> exp.DictRange: 5011 self._match_l_paren() 5012 has_min = self._match_text_seq("MIN") 5013 if has_min: 5014 min = self._parse_var() or self._parse_primary() 5015 self._match_text_seq("MAX") 5016 max = self._parse_var() or self._parse_primary() 5017 else: 5018 max = self._parse_var() or self._parse_primary() 5019 min = exp.Literal.number(0) 5020 self._match_r_paren() 5021 return self.expression(exp.DictRange, this=this, min=min, max=max) 5022 5023 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5024 index = self._index 5025 expression = self._parse_column() 5026 if not self._match(TokenType.IN): 5027 self._retreat(index - 1) 5028 return None 5029 iterator = self._parse_column() 5030 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5031 return self.expression( 5032 exp.Comprehension, 5033 this=this, 5034 expression=expression, 5035 iterator=iterator, 5036 condition=condition, 5037 ) 5038 5039 def _find_parser( 5040 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5041 ) -> t.Optional[t.Callable]: 5042 if not self._curr: 5043 return None 5044 5045 index = self._index 5046 this = [] 5047 while True: 5048 # The current token might be multiple words 5049 curr = self._curr.text.upper() 5050 key = curr.split(" ") 5051 this.append(curr) 5052 5053 self._advance() 5054 result, trie = in_trie(trie, key) 5055 if result == TrieResult.FAILED: 5056 break 5057 5058 if result == TrieResult.EXISTS: 5059 subparser = parsers[" ".join(this)] 5060 return subparser 5061 5062 self._retreat(index) 5063 return None 5064 5065 def _match(self, token_type, advance=True, expression=None): 5066 if not self._curr: 5067 return None 5068 5069 if self._curr.token_type == token_type: 5070 if advance: 5071 self._advance() 5072 self._add_comments(expression) 5073 return True 5074 5075 return None 5076 5077 def _match_set(self, types, advance=True): 5078 if not self._curr: 5079 return None 5080 5081 if self._curr.token_type in types: 5082 if advance: 5083 self._advance() 5084 return True 5085 5086 return None 5087 5088 def _match_pair(self, token_type_a, token_type_b, advance=True): 5089 if not self._curr or not self._next: 5090 return None 5091 5092 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5093 if advance: 5094 self._advance(2) 5095 return True 5096 5097 return None 5098 5099 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5100 if not self._match(TokenType.L_PAREN, expression=expression): 5101 self.raise_error("Expecting (") 5102 5103 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5104 if not self._match(TokenType.R_PAREN, expression=expression): 5105 self.raise_error("Expecting )") 5106 5107 def _match_texts(self, texts, advance=True): 5108 if self._curr and self._curr.text.upper() in texts: 5109 if advance: 5110 self._advance() 5111 return True 5112 return False 5113 5114 def _match_text_seq(self, *texts, advance=True): 5115 index = self._index 5116 for text in texts: 5117 if self._curr and self._curr.text.upper() == text: 5118 self._advance() 5119 else: 5120 self._retreat(index) 5121 return False 5122 5123 if not advance: 5124 self._retreat(index) 5125 5126 return True 5127 5128 @t.overload 5129 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5130 ... 5131 5132 @t.overload 5133 def _replace_columns_with_dots( 5134 self, this: t.Optional[exp.Expression] 5135 ) -> t.Optional[exp.Expression]: 5136 ... 5137 5138 def _replace_columns_with_dots(self, this): 5139 if isinstance(this, exp.Dot): 5140 exp.replace_children(this, self._replace_columns_with_dots) 5141 elif isinstance(this, exp.Column): 5142 exp.replace_children(this, self._replace_columns_with_dots) 5143 table = this.args.get("table") 5144 this = ( 5145 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5146 ) 5147 5148 return this 5149 5150 def _replace_lambda( 5151 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5152 ) -> t.Optional[exp.Expression]: 5153 if not node: 5154 return node 5155 5156 for column in node.find_all(exp.Column): 5157 if column.parts[0].name in lambda_variables: 5158 dot_or_id = column.to_dot() if column.table else column.this 5159 parent = column.parent 5160 5161 while isinstance(parent, exp.Dot): 5162 if not isinstance(parent.parent, exp.Dot): 5163 parent.replace(dot_or_id) 5164 break 5165 parent = parent.parent 5166 else: 5167 if column is node: 5168 node = dot_or_id 5169 else: 5170 column.replace(dot_or_id) 5171 return node 5172 5173 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5174 return [ 5175 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5176 for value in values 5177 if value 5178 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 } 220 221 SUBQUERY_PREDICATES = { 222 TokenType.ANY: exp.Any, 223 TokenType.ALL: exp.All, 224 TokenType.EXISTS: exp.Exists, 225 TokenType.SOME: exp.Any, 226 } 227 228 RESERVED_KEYWORDS = { 229 *Tokenizer.SINGLE_TOKENS.values(), 230 TokenType.SELECT, 231 } 232 233 DB_CREATABLES = { 234 TokenType.DATABASE, 235 TokenType.SCHEMA, 236 TokenType.TABLE, 237 TokenType.VIEW, 238 TokenType.DICTIONARY, 239 } 240 241 CREATABLES = { 242 TokenType.COLUMN, 243 TokenType.FUNCTION, 244 TokenType.INDEX, 245 TokenType.PROCEDURE, 246 *DB_CREATABLES, 247 } 248 249 # Tokens that can represent identifiers 250 ID_VAR_TOKENS = { 251 TokenType.VAR, 252 TokenType.ANTI, 253 TokenType.APPLY, 254 TokenType.ASC, 255 TokenType.AUTO_INCREMENT, 256 TokenType.BEGIN, 257 TokenType.CACHE, 258 TokenType.CASE, 259 TokenType.COLLATE, 260 TokenType.COMMAND, 261 TokenType.COMMENT, 262 TokenType.COMMIT, 263 TokenType.CONSTRAINT, 264 TokenType.DEFAULT, 265 TokenType.DELETE, 266 TokenType.DESC, 267 TokenType.DESCRIBE, 268 TokenType.DICTIONARY, 269 TokenType.DIV, 270 TokenType.END, 271 TokenType.EXECUTE, 272 TokenType.ESCAPE, 273 TokenType.FALSE, 274 TokenType.FIRST, 275 TokenType.FILTER, 276 TokenType.FORMAT, 277 TokenType.FULL, 278 TokenType.IS, 279 TokenType.ISNULL, 280 TokenType.INTERVAL, 281 TokenType.KEEP, 282 TokenType.LEFT, 283 TokenType.LOAD, 284 TokenType.MERGE, 285 TokenType.NATURAL, 286 TokenType.NEXT, 287 TokenType.OFFSET, 288 TokenType.ORDINALITY, 289 TokenType.OVERWRITE, 290 TokenType.PARTITION, 291 TokenType.PERCENT, 292 TokenType.PIVOT, 293 TokenType.PRAGMA, 294 TokenType.RANGE, 295 TokenType.REFERENCES, 296 TokenType.RIGHT, 297 TokenType.ROW, 298 TokenType.ROWS, 299 TokenType.SEMI, 300 TokenType.SET, 301 TokenType.SETTINGS, 302 TokenType.SHOW, 303 TokenType.TEMPORARY, 304 TokenType.TOP, 305 TokenType.TRUE, 306 TokenType.UNIQUE, 307 TokenType.UNPIVOT, 308 TokenType.UPDATE, 309 TokenType.VOLATILE, 310 TokenType.WINDOW, 311 *CREATABLES, 312 *SUBQUERY_PREDICATES, 313 *TYPE_TOKENS, 314 *NO_PAREN_FUNCTIONS, 315 } 316 317 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 318 319 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 320 TokenType.APPLY, 321 TokenType.ASOF, 322 TokenType.FULL, 323 TokenType.LEFT, 324 TokenType.LOCK, 325 TokenType.NATURAL, 326 TokenType.OFFSET, 327 TokenType.RIGHT, 328 TokenType.WINDOW, 329 } 330 331 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 332 333 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 334 335 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 336 337 FUNC_TOKENS = { 338 TokenType.COMMAND, 339 TokenType.CURRENT_DATE, 340 TokenType.CURRENT_DATETIME, 341 TokenType.CURRENT_TIMESTAMP, 342 TokenType.CURRENT_TIME, 343 TokenType.CURRENT_USER, 344 TokenType.FILTER, 345 TokenType.FIRST, 346 TokenType.FORMAT, 347 TokenType.GLOB, 348 TokenType.IDENTIFIER, 349 TokenType.INDEX, 350 TokenType.ISNULL, 351 TokenType.ILIKE, 352 TokenType.INSERT, 353 TokenType.LIKE, 354 TokenType.MERGE, 355 TokenType.OFFSET, 356 TokenType.PRIMARY_KEY, 357 TokenType.RANGE, 358 TokenType.REPLACE, 359 TokenType.RLIKE, 360 TokenType.ROW, 361 TokenType.UNNEST, 362 TokenType.VAR, 363 TokenType.LEFT, 364 TokenType.RIGHT, 365 TokenType.DATE, 366 TokenType.DATETIME, 367 TokenType.TABLE, 368 TokenType.TIMESTAMP, 369 TokenType.TIMESTAMPTZ, 370 TokenType.WINDOW, 371 TokenType.XOR, 372 *TYPE_TOKENS, 373 *SUBQUERY_PREDICATES, 374 } 375 376 CONJUNCTION = { 377 TokenType.AND: exp.And, 378 TokenType.OR: exp.Or, 379 } 380 381 EQUALITY = { 382 TokenType.EQ: exp.EQ, 383 TokenType.NEQ: exp.NEQ, 384 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 385 } 386 387 COMPARISON = { 388 TokenType.GT: exp.GT, 389 TokenType.GTE: exp.GTE, 390 TokenType.LT: exp.LT, 391 TokenType.LTE: exp.LTE, 392 } 393 394 BITWISE = { 395 TokenType.AMP: exp.BitwiseAnd, 396 TokenType.CARET: exp.BitwiseXor, 397 TokenType.PIPE: exp.BitwiseOr, 398 TokenType.DPIPE: exp.DPipe, 399 } 400 401 TERM = { 402 TokenType.DASH: exp.Sub, 403 TokenType.PLUS: exp.Add, 404 TokenType.MOD: exp.Mod, 405 TokenType.COLLATE: exp.Collate, 406 } 407 408 FACTOR = { 409 TokenType.DIV: exp.IntDiv, 410 TokenType.LR_ARROW: exp.Distance, 411 TokenType.SLASH: exp.Div, 412 TokenType.STAR: exp.Mul, 413 } 414 415 TIMES = { 416 TokenType.TIME, 417 TokenType.TIMETZ, 418 } 419 420 TIMESTAMPS = { 421 TokenType.TIMESTAMP, 422 TokenType.TIMESTAMPTZ, 423 TokenType.TIMESTAMPLTZ, 424 *TIMES, 425 } 426 427 SET_OPERATIONS = { 428 TokenType.UNION, 429 TokenType.INTERSECT, 430 TokenType.EXCEPT, 431 } 432 433 JOIN_METHODS = { 434 TokenType.NATURAL, 435 TokenType.ASOF, 436 } 437 438 JOIN_SIDES = { 439 TokenType.LEFT, 440 TokenType.RIGHT, 441 TokenType.FULL, 442 } 443 444 JOIN_KINDS = { 445 TokenType.INNER, 446 TokenType.OUTER, 447 TokenType.CROSS, 448 TokenType.SEMI, 449 TokenType.ANTI, 450 } 451 452 JOIN_HINTS: t.Set[str] = set() 453 454 LAMBDAS = { 455 TokenType.ARROW: lambda self, expressions: self.expression( 456 exp.Lambda, 457 this=self._replace_lambda( 458 self._parse_conjunction(), 459 {node.name for node in expressions}, 460 ), 461 expressions=expressions, 462 ), 463 TokenType.FARROW: lambda self, expressions: self.expression( 464 exp.Kwarg, 465 this=exp.var(expressions[0].name), 466 expression=self._parse_conjunction(), 467 ), 468 } 469 470 COLUMN_OPERATORS = { 471 TokenType.DOT: None, 472 TokenType.DCOLON: lambda self, this, to: self.expression( 473 exp.Cast if self.STRICT_CAST else exp.TryCast, 474 this=this, 475 to=to, 476 ), 477 TokenType.ARROW: lambda self, this, path: self.expression( 478 exp.JSONExtract, 479 this=this, 480 expression=path, 481 ), 482 TokenType.DARROW: lambda self, this, path: self.expression( 483 exp.JSONExtractScalar, 484 this=this, 485 expression=path, 486 ), 487 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 488 exp.JSONBExtract, 489 this=this, 490 expression=path, 491 ), 492 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 493 exp.JSONBExtractScalar, 494 this=this, 495 expression=path, 496 ), 497 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 498 exp.JSONBContains, 499 this=this, 500 expression=key, 501 ), 502 } 503 504 EXPRESSION_PARSERS = { 505 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 506 exp.Column: lambda self: self._parse_column(), 507 exp.Condition: lambda self: self._parse_conjunction(), 508 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 509 exp.Expression: lambda self: self._parse_statement(), 510 exp.From: lambda self: self._parse_from(), 511 exp.Group: lambda self: self._parse_group(), 512 exp.Having: lambda self: self._parse_having(), 513 exp.Identifier: lambda self: self._parse_id_var(), 514 exp.Join: lambda self: self._parse_join(), 515 exp.Lambda: lambda self: self._parse_lambda(), 516 exp.Lateral: lambda self: self._parse_lateral(), 517 exp.Limit: lambda self: self._parse_limit(), 518 exp.Offset: lambda self: self._parse_offset(), 519 exp.Order: lambda self: self._parse_order(), 520 exp.Ordered: lambda self: self._parse_ordered(), 521 exp.Properties: lambda self: self._parse_properties(), 522 exp.Qualify: lambda self: self._parse_qualify(), 523 exp.Returning: lambda self: self._parse_returning(), 524 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 525 exp.Table: lambda self: self._parse_table_parts(), 526 exp.TableAlias: lambda self: self._parse_table_alias(), 527 exp.Where: lambda self: self._parse_where(), 528 exp.Window: lambda self: self._parse_named_window(), 529 exp.With: lambda self: self._parse_with(), 530 "JOIN_TYPE": lambda self: self._parse_join_parts(), 531 } 532 533 STATEMENT_PARSERS = { 534 TokenType.ALTER: lambda self: self._parse_alter(), 535 TokenType.BEGIN: lambda self: self._parse_transaction(), 536 TokenType.CACHE: lambda self: self._parse_cache(), 537 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 538 TokenType.COMMENT: lambda self: self._parse_comment(), 539 TokenType.CREATE: lambda self: self._parse_create(), 540 TokenType.DELETE: lambda self: self._parse_delete(), 541 TokenType.DESC: lambda self: self._parse_describe(), 542 TokenType.DESCRIBE: lambda self: self._parse_describe(), 543 TokenType.DROP: lambda self: self._parse_drop(), 544 TokenType.INSERT: lambda self: self._parse_insert(), 545 TokenType.LOAD: lambda self: self._parse_load(), 546 TokenType.MERGE: lambda self: self._parse_merge(), 547 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 548 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 549 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 550 TokenType.SET: lambda self: self._parse_set(), 551 TokenType.UNCACHE: lambda self: self._parse_uncache(), 552 TokenType.UPDATE: lambda self: self._parse_update(), 553 TokenType.USE: lambda self: self.expression( 554 exp.Use, 555 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 556 and exp.var(self._prev.text), 557 this=self._parse_table(schema=False), 558 ), 559 } 560 561 UNARY_PARSERS = { 562 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 563 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 564 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 565 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 566 } 567 568 PRIMARY_PARSERS = { 569 TokenType.STRING: lambda self, token: self.expression( 570 exp.Literal, this=token.text, is_string=True 571 ), 572 TokenType.NUMBER: lambda self, token: self.expression( 573 exp.Literal, this=token.text, is_string=False 574 ), 575 TokenType.STAR: lambda self, _: self.expression( 576 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 577 ), 578 TokenType.NULL: lambda self, _: self.expression(exp.Null), 579 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 580 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 581 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 582 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 583 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 584 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 585 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 586 exp.National, this=token.text 587 ), 588 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 589 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 590 } 591 592 PLACEHOLDER_PARSERS = { 593 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 594 TokenType.PARAMETER: lambda self: self._parse_parameter(), 595 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 596 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 597 else None, 598 } 599 600 RANGE_PARSERS = { 601 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 602 TokenType.GLOB: binary_range_parser(exp.Glob), 603 TokenType.ILIKE: binary_range_parser(exp.ILike), 604 TokenType.IN: lambda self, this: self._parse_in(this), 605 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 606 TokenType.IS: lambda self, this: self._parse_is(this), 607 TokenType.LIKE: binary_range_parser(exp.Like), 608 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 609 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 610 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 611 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 612 } 613 614 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 615 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 616 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 617 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 618 "CHARACTER SET": lambda self: self._parse_character_set(), 619 "CHECKSUM": lambda self: self._parse_checksum(), 620 "CLUSTER BY": lambda self: self._parse_cluster(), 621 "CLUSTERED": lambda self: self._parse_clustered_by(), 622 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 623 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 624 "COPY": lambda self: self._parse_copy_property(), 625 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 626 "DEFINER": lambda self: self._parse_definer(), 627 "DETERMINISTIC": lambda self: self.expression( 628 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 629 ), 630 "DISTKEY": lambda self: self._parse_distkey(), 631 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 632 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 633 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 634 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 635 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 636 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 637 "FREESPACE": lambda self: self._parse_freespace(), 638 "HEAP": lambda self: self.expression(exp.HeapProperty), 639 "IMMUTABLE": lambda self: self.expression( 640 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 641 ), 642 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 643 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 644 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 645 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 646 "LIKE": lambda self: self._parse_create_like(), 647 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 648 "LOCK": lambda self: self._parse_locking(), 649 "LOCKING": lambda self: self._parse_locking(), 650 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 651 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 652 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 653 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 654 "NO": lambda self: self._parse_no_property(), 655 "ON": lambda self: self._parse_on_property(), 656 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 657 "PARTITION BY": lambda self: self._parse_partitioned_by(), 658 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 659 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 660 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 661 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 662 "RETURNS": lambda self: self._parse_returns(), 663 "ROW": lambda self: self._parse_row(), 664 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 665 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 666 "SETTINGS": lambda self: self.expression( 667 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 668 ), 669 "SORTKEY": lambda self: self._parse_sortkey(), 670 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 671 "STABLE": lambda self: self.expression( 672 exp.StabilityProperty, this=exp.Literal.string("STABLE") 673 ), 674 "STORED": lambda self: self._parse_stored(), 675 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 676 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 677 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 678 "TO": lambda self: self._parse_to_table(), 679 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 680 "TTL": lambda self: self._parse_ttl(), 681 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 682 "VOLATILE": lambda self: self._parse_volatile_property(), 683 "WITH": lambda self: self._parse_with_property(), 684 } 685 686 CONSTRAINT_PARSERS = { 687 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 688 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 689 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 690 "CHARACTER SET": lambda self: self.expression( 691 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 692 ), 693 "CHECK": lambda self: self.expression( 694 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 695 ), 696 "COLLATE": lambda self: self.expression( 697 exp.CollateColumnConstraint, this=self._parse_var() 698 ), 699 "COMMENT": lambda self: self.expression( 700 exp.CommentColumnConstraint, this=self._parse_string() 701 ), 702 "COMPRESS": lambda self: self._parse_compress(), 703 "CLUSTERED": lambda self: self.expression( 704 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 705 ), 706 "NONCLUSTERED": lambda self: self.expression( 707 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 708 ), 709 "DEFAULT": lambda self: self.expression( 710 exp.DefaultColumnConstraint, this=self._parse_bitwise() 711 ), 712 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 713 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 714 "FORMAT": lambda self: self.expression( 715 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 716 ), 717 "GENERATED": lambda self: self._parse_generated_as_identity(), 718 "IDENTITY": lambda self: self._parse_auto_increment(), 719 "INLINE": lambda self: self._parse_inline(), 720 "LIKE": lambda self: self._parse_create_like(), 721 "NOT": lambda self: self._parse_not_constraint(), 722 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 723 "ON": lambda self: ( 724 self._match(TokenType.UPDATE) 725 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 726 ) 727 or self.expression(exp.OnProperty, this=self._parse_id_var()), 728 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 729 "PRIMARY KEY": lambda self: self._parse_primary_key(), 730 "REFERENCES": lambda self: self._parse_references(match=False), 731 "TITLE": lambda self: self.expression( 732 exp.TitleColumnConstraint, this=self._parse_var_or_string() 733 ), 734 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 735 "UNIQUE": lambda self: self._parse_unique(), 736 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 737 "WITH": lambda self: self.expression( 738 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 739 ), 740 } 741 742 ALTER_PARSERS = { 743 "ADD": lambda self: self._parse_alter_table_add(), 744 "ALTER": lambda self: self._parse_alter_table_alter(), 745 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 746 "DROP": lambda self: self._parse_alter_table_drop(), 747 "RENAME": lambda self: self._parse_alter_table_rename(), 748 } 749 750 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 751 752 NO_PAREN_FUNCTION_PARSERS = { 753 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 754 "CASE": lambda self: self._parse_case(), 755 "IF": lambda self: self._parse_if(), 756 "NEXT": lambda self: self._parse_next_value_for(), 757 } 758 759 INVALID_FUNC_NAME_TOKENS = { 760 TokenType.IDENTIFIER, 761 TokenType.STRING, 762 } 763 764 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 765 766 FUNCTION_PARSERS = { 767 "ANY_VALUE": lambda self: self._parse_any_value(), 768 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 769 "CONCAT": lambda self: self._parse_concat(), 770 "CONCAT_WS": lambda self: self._parse_concat_ws(), 771 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 772 "DECODE": lambda self: self._parse_decode(), 773 "EXTRACT": lambda self: self._parse_extract(), 774 "JSON_OBJECT": lambda self: self._parse_json_object(), 775 "LOG": lambda self: self._parse_logarithm(), 776 "MATCH": lambda self: self._parse_match_against(), 777 "OPENJSON": lambda self: self._parse_open_json(), 778 "POSITION": lambda self: self._parse_position(), 779 "SAFE_CAST": lambda self: self._parse_cast(False), 780 "STRING_AGG": lambda self: self._parse_string_agg(), 781 "SUBSTRING": lambda self: self._parse_substring(), 782 "TRIM": lambda self: self._parse_trim(), 783 "TRY_CAST": lambda self: self._parse_cast(False), 784 "TRY_CONVERT": lambda self: self._parse_convert(False), 785 } 786 787 QUERY_MODIFIER_PARSERS = { 788 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 789 TokenType.WHERE: lambda self: ("where", self._parse_where()), 790 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 791 TokenType.HAVING: lambda self: ("having", self._parse_having()), 792 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 793 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 794 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 795 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 796 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 797 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 798 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 799 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 800 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 801 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 802 TokenType.CLUSTER_BY: lambda self: ( 803 "cluster", 804 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 805 ), 806 TokenType.DISTRIBUTE_BY: lambda self: ( 807 "distribute", 808 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 809 ), 810 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 811 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 812 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 813 } 814 815 SET_PARSERS = { 816 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 817 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 818 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 819 "TRANSACTION": lambda self: self._parse_set_transaction(), 820 } 821 822 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 823 824 TYPE_LITERAL_PARSERS = { 825 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 826 } 827 828 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 829 830 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 831 832 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 833 834 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 835 TRANSACTION_CHARACTERISTICS = { 836 "ISOLATION LEVEL REPEATABLE READ", 837 "ISOLATION LEVEL READ COMMITTED", 838 "ISOLATION LEVEL READ UNCOMMITTED", 839 "ISOLATION LEVEL SERIALIZABLE", 840 "READ WRITE", 841 "READ ONLY", 842 } 843 844 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 845 846 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 847 848 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 849 850 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 851 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 852 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 853 854 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 855 856 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 857 858 DISTINCT_TOKENS = {TokenType.DISTINCT} 859 860 STRICT_CAST = True 861 862 # A NULL arg in CONCAT yields NULL by default 863 CONCAT_NULL_OUTPUTS_STRING = False 864 865 PREFIXED_PIVOT_COLUMNS = False 866 IDENTIFY_PIVOT_STRINGS = False 867 868 LOG_BASE_FIRST = True 869 LOG_DEFAULTS_TO_LN = False 870 871 # Whether or not ADD is present for each column added by ALTER TABLE 872 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 873 874 # Whether or not the table sample clause expects CSV syntax 875 TABLESAMPLE_CSV = False 876 877 __slots__ = ( 878 "error_level", 879 "error_message_context", 880 "max_errors", 881 "sql", 882 "errors", 883 "_tokens", 884 "_index", 885 "_curr", 886 "_next", 887 "_prev", 888 "_prev_comments", 889 "_tokenizer", 890 ) 891 892 # Autofilled 893 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 894 INDEX_OFFSET: int = 0 895 UNNEST_COLUMN_ONLY: bool = False 896 ALIAS_POST_TABLESAMPLE: bool = False 897 STRICT_STRING_CONCAT = False 898 SUPPORTS_USER_DEFINED_TYPES = True 899 NORMALIZE_FUNCTIONS = "upper" 900 NULL_ORDERING: str = "nulls_are_small" 901 SHOW_TRIE: t.Dict = {} 902 SET_TRIE: t.Dict = {} 903 FORMAT_MAPPING: t.Dict[str, str] = {} 904 FORMAT_TRIE: t.Dict = {} 905 TIME_MAPPING: t.Dict[str, str] = {} 906 TIME_TRIE: t.Dict = {} 907 908 def __init__( 909 self, 910 error_level: t.Optional[ErrorLevel] = None, 911 error_message_context: int = 100, 912 max_errors: int = 3, 913 ): 914 self.error_level = error_level or ErrorLevel.IMMEDIATE 915 self.error_message_context = error_message_context 916 self.max_errors = max_errors 917 self._tokenizer = self.TOKENIZER_CLASS() 918 self.reset() 919 920 def reset(self): 921 self.sql = "" 922 self.errors = [] 923 self._tokens = [] 924 self._index = 0 925 self._curr = None 926 self._next = None 927 self._prev = None 928 self._prev_comments = None 929 930 def parse( 931 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 932 ) -> t.List[t.Optional[exp.Expression]]: 933 """ 934 Parses a list of tokens and returns a list of syntax trees, one tree 935 per parsed SQL statement. 936 937 Args: 938 raw_tokens: The list of tokens. 939 sql: The original SQL string, used to produce helpful debug messages. 940 941 Returns: 942 The list of the produced syntax trees. 943 """ 944 return self._parse( 945 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 946 ) 947 948 def parse_into( 949 self, 950 expression_types: exp.IntoType, 951 raw_tokens: t.List[Token], 952 sql: t.Optional[str] = None, 953 ) -> t.List[t.Optional[exp.Expression]]: 954 """ 955 Parses a list of tokens into a given Expression type. If a collection of Expression 956 types is given instead, this method will try to parse the token list into each one 957 of them, stopping at the first for which the parsing succeeds. 958 959 Args: 960 expression_types: The expression type(s) to try and parse the token list into. 961 raw_tokens: The list of tokens. 962 sql: The original SQL string, used to produce helpful debug messages. 963 964 Returns: 965 The target Expression. 966 """ 967 errors = [] 968 for expression_type in ensure_list(expression_types): 969 parser = self.EXPRESSION_PARSERS.get(expression_type) 970 if not parser: 971 raise TypeError(f"No parser registered for {expression_type}") 972 973 try: 974 return self._parse(parser, raw_tokens, sql) 975 except ParseError as e: 976 e.errors[0]["into_expression"] = expression_type 977 errors.append(e) 978 979 raise ParseError( 980 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 981 errors=merge_errors(errors), 982 ) from errors[-1] 983 984 def _parse( 985 self, 986 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 987 raw_tokens: t.List[Token], 988 sql: t.Optional[str] = None, 989 ) -> t.List[t.Optional[exp.Expression]]: 990 self.reset() 991 self.sql = sql or "" 992 993 total = len(raw_tokens) 994 chunks: t.List[t.List[Token]] = [[]] 995 996 for i, token in enumerate(raw_tokens): 997 if token.token_type == TokenType.SEMICOLON: 998 if i < total - 1: 999 chunks.append([]) 1000 else: 1001 chunks[-1].append(token) 1002 1003 expressions = [] 1004 1005 for tokens in chunks: 1006 self._index = -1 1007 self._tokens = tokens 1008 self._advance() 1009 1010 expressions.append(parse_method(self)) 1011 1012 if self._index < len(self._tokens): 1013 self.raise_error("Invalid expression / Unexpected token") 1014 1015 self.check_errors() 1016 1017 return expressions 1018 1019 def check_errors(self) -> None: 1020 """Logs or raises any found errors, depending on the chosen error level setting.""" 1021 if self.error_level == ErrorLevel.WARN: 1022 for error in self.errors: 1023 logger.error(str(error)) 1024 elif self.error_level == ErrorLevel.RAISE and self.errors: 1025 raise ParseError( 1026 concat_messages(self.errors, self.max_errors), 1027 errors=merge_errors(self.errors), 1028 ) 1029 1030 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1031 """ 1032 Appends an error in the list of recorded errors or raises it, depending on the chosen 1033 error level setting. 1034 """ 1035 token = token or self._curr or self._prev or Token.string("") 1036 start = token.start 1037 end = token.end + 1 1038 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1039 highlight = self.sql[start:end] 1040 end_context = self.sql[end : end + self.error_message_context] 1041 1042 error = ParseError.new( 1043 f"{message}. Line {token.line}, Col: {token.col}.\n" 1044 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1045 description=message, 1046 line=token.line, 1047 col=token.col, 1048 start_context=start_context, 1049 highlight=highlight, 1050 end_context=end_context, 1051 ) 1052 1053 if self.error_level == ErrorLevel.IMMEDIATE: 1054 raise error 1055 1056 self.errors.append(error) 1057 1058 def expression( 1059 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1060 ) -> E: 1061 """ 1062 Creates a new, validated Expression. 1063 1064 Args: 1065 exp_class: The expression class to instantiate. 1066 comments: An optional list of comments to attach to the expression. 1067 kwargs: The arguments to set for the expression along with their respective values. 1068 1069 Returns: 1070 The target expression. 1071 """ 1072 instance = exp_class(**kwargs) 1073 instance.add_comments(comments) if comments else self._add_comments(instance) 1074 return self.validate_expression(instance) 1075 1076 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1077 if expression and self._prev_comments: 1078 expression.add_comments(self._prev_comments) 1079 self._prev_comments = None 1080 1081 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1082 """ 1083 Validates an Expression, making sure that all its mandatory arguments are set. 1084 1085 Args: 1086 expression: The expression to validate. 1087 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1088 1089 Returns: 1090 The validated expression. 1091 """ 1092 if self.error_level != ErrorLevel.IGNORE: 1093 for error_message in expression.error_messages(args): 1094 self.raise_error(error_message) 1095 1096 return expression 1097 1098 def _find_sql(self, start: Token, end: Token) -> str: 1099 return self.sql[start.start : end.end + 1] 1100 1101 def _advance(self, times: int = 1) -> None: 1102 self._index += times 1103 self._curr = seq_get(self._tokens, self._index) 1104 self._next = seq_get(self._tokens, self._index + 1) 1105 1106 if self._index > 0: 1107 self._prev = self._tokens[self._index - 1] 1108 self._prev_comments = self._prev.comments 1109 else: 1110 self._prev = None 1111 self._prev_comments = None 1112 1113 def _retreat(self, index: int) -> None: 1114 if index != self._index: 1115 self._advance(index - self._index) 1116 1117 def _parse_command(self) -> exp.Command: 1118 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1119 1120 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1121 start = self._prev 1122 exists = self._parse_exists() if allow_exists else None 1123 1124 self._match(TokenType.ON) 1125 1126 kind = self._match_set(self.CREATABLES) and self._prev 1127 if not kind: 1128 return self._parse_as_command(start) 1129 1130 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1131 this = self._parse_user_defined_function(kind=kind.token_type) 1132 elif kind.token_type == TokenType.TABLE: 1133 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1134 elif kind.token_type == TokenType.COLUMN: 1135 this = self._parse_column() 1136 else: 1137 this = self._parse_id_var() 1138 1139 self._match(TokenType.IS) 1140 1141 return self.expression( 1142 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1143 ) 1144 1145 def _parse_to_table( 1146 self, 1147 ) -> exp.ToTableProperty: 1148 table = self._parse_table_parts(schema=True) 1149 return self.expression(exp.ToTableProperty, this=table) 1150 1151 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1152 def _parse_ttl(self) -> exp.Expression: 1153 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1154 this = self._parse_bitwise() 1155 1156 if self._match_text_seq("DELETE"): 1157 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1158 if self._match_text_seq("RECOMPRESS"): 1159 return self.expression( 1160 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1161 ) 1162 if self._match_text_seq("TO", "DISK"): 1163 return self.expression( 1164 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1165 ) 1166 if self._match_text_seq("TO", "VOLUME"): 1167 return self.expression( 1168 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1169 ) 1170 1171 return this 1172 1173 expressions = self._parse_csv(_parse_ttl_action) 1174 where = self._parse_where() 1175 group = self._parse_group() 1176 1177 aggregates = None 1178 if group and self._match(TokenType.SET): 1179 aggregates = self._parse_csv(self._parse_set_item) 1180 1181 return self.expression( 1182 exp.MergeTreeTTL, 1183 expressions=expressions, 1184 where=where, 1185 group=group, 1186 aggregates=aggregates, 1187 ) 1188 1189 def _parse_statement(self) -> t.Optional[exp.Expression]: 1190 if self._curr is None: 1191 return None 1192 1193 if self._match_set(self.STATEMENT_PARSERS): 1194 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1195 1196 if self._match_set(Tokenizer.COMMANDS): 1197 return self._parse_command() 1198 1199 expression = self._parse_expression() 1200 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1201 return self._parse_query_modifiers(expression) 1202 1203 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1204 start = self._prev 1205 temporary = self._match(TokenType.TEMPORARY) 1206 materialized = self._match_text_seq("MATERIALIZED") 1207 1208 kind = self._match_set(self.CREATABLES) and self._prev.text 1209 if not kind: 1210 return self._parse_as_command(start) 1211 1212 return self.expression( 1213 exp.Drop, 1214 comments=start.comments, 1215 exists=exists or self._parse_exists(), 1216 this=self._parse_table(schema=True), 1217 kind=kind, 1218 temporary=temporary, 1219 materialized=materialized, 1220 cascade=self._match_text_seq("CASCADE"), 1221 constraints=self._match_text_seq("CONSTRAINTS"), 1222 purge=self._match_text_seq("PURGE"), 1223 ) 1224 1225 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1226 return ( 1227 self._match_text_seq("IF") 1228 and (not not_ or self._match(TokenType.NOT)) 1229 and self._match(TokenType.EXISTS) 1230 ) 1231 1232 def _parse_create(self) -> exp.Create | exp.Command: 1233 # Note: this can't be None because we've matched a statement parser 1234 start = self._prev 1235 comments = self._prev_comments 1236 1237 replace = start.text.upper() == "REPLACE" or self._match_pair( 1238 TokenType.OR, TokenType.REPLACE 1239 ) 1240 unique = self._match(TokenType.UNIQUE) 1241 1242 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1243 self._advance() 1244 1245 properties = None 1246 create_token = self._match_set(self.CREATABLES) and self._prev 1247 1248 if not create_token: 1249 # exp.Properties.Location.POST_CREATE 1250 properties = self._parse_properties() 1251 create_token = self._match_set(self.CREATABLES) and self._prev 1252 1253 if not properties or not create_token: 1254 return self._parse_as_command(start) 1255 1256 exists = self._parse_exists(not_=True) 1257 this = None 1258 expression: t.Optional[exp.Expression] = None 1259 indexes = None 1260 no_schema_binding = None 1261 begin = None 1262 clone = None 1263 1264 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1265 nonlocal properties 1266 if properties and temp_props: 1267 properties.expressions.extend(temp_props.expressions) 1268 elif temp_props: 1269 properties = temp_props 1270 1271 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1272 this = self._parse_user_defined_function(kind=create_token.token_type) 1273 1274 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1275 extend_props(self._parse_properties()) 1276 1277 self._match(TokenType.ALIAS) 1278 1279 if self._match(TokenType.COMMAND): 1280 expression = self._parse_as_command(self._prev) 1281 else: 1282 begin = self._match(TokenType.BEGIN) 1283 return_ = self._match_text_seq("RETURN") 1284 expression = self._parse_statement() 1285 1286 if return_: 1287 expression = self.expression(exp.Return, this=expression) 1288 elif create_token.token_type == TokenType.INDEX: 1289 this = self._parse_index(index=self._parse_id_var()) 1290 elif create_token.token_type in self.DB_CREATABLES: 1291 table_parts = self._parse_table_parts(schema=True) 1292 1293 # exp.Properties.Location.POST_NAME 1294 self._match(TokenType.COMMA) 1295 extend_props(self._parse_properties(before=True)) 1296 1297 this = self._parse_schema(this=table_parts) 1298 1299 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1300 extend_props(self._parse_properties()) 1301 1302 self._match(TokenType.ALIAS) 1303 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1304 # exp.Properties.Location.POST_ALIAS 1305 extend_props(self._parse_properties()) 1306 1307 expression = self._parse_ddl_select() 1308 1309 if create_token.token_type == TokenType.TABLE: 1310 # exp.Properties.Location.POST_EXPRESSION 1311 extend_props(self._parse_properties()) 1312 1313 indexes = [] 1314 while True: 1315 index = self._parse_index() 1316 1317 # exp.Properties.Location.POST_INDEX 1318 extend_props(self._parse_properties()) 1319 1320 if not index: 1321 break 1322 else: 1323 self._match(TokenType.COMMA) 1324 indexes.append(index) 1325 elif create_token.token_type == TokenType.VIEW: 1326 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1327 no_schema_binding = True 1328 1329 shallow = self._match_text_seq("SHALLOW") 1330 1331 if self._match_text_seq("CLONE"): 1332 clone = self._parse_table(schema=True) 1333 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1334 clone_kind = ( 1335 self._match(TokenType.L_PAREN) 1336 and self._match_texts(self.CLONE_KINDS) 1337 and self._prev.text.upper() 1338 ) 1339 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1340 self._match(TokenType.R_PAREN) 1341 clone = self.expression( 1342 exp.Clone, 1343 this=clone, 1344 when=when, 1345 kind=clone_kind, 1346 shallow=shallow, 1347 expression=clone_expression, 1348 ) 1349 1350 return self.expression( 1351 exp.Create, 1352 comments=comments, 1353 this=this, 1354 kind=create_token.text, 1355 replace=replace, 1356 unique=unique, 1357 expression=expression, 1358 exists=exists, 1359 properties=properties, 1360 indexes=indexes, 1361 no_schema_binding=no_schema_binding, 1362 begin=begin, 1363 clone=clone, 1364 ) 1365 1366 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1367 # only used for teradata currently 1368 self._match(TokenType.COMMA) 1369 1370 kwargs = { 1371 "no": self._match_text_seq("NO"), 1372 "dual": self._match_text_seq("DUAL"), 1373 "before": self._match_text_seq("BEFORE"), 1374 "default": self._match_text_seq("DEFAULT"), 1375 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1376 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1377 "after": self._match_text_seq("AFTER"), 1378 "minimum": self._match_texts(("MIN", "MINIMUM")), 1379 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1380 } 1381 1382 if self._match_texts(self.PROPERTY_PARSERS): 1383 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1384 try: 1385 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1386 except TypeError: 1387 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1388 1389 return None 1390 1391 def _parse_property(self) -> t.Optional[exp.Expression]: 1392 if self._match_texts(self.PROPERTY_PARSERS): 1393 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1394 1395 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1396 return self._parse_character_set(default=True) 1397 1398 if self._match_text_seq("COMPOUND", "SORTKEY"): 1399 return self._parse_sortkey(compound=True) 1400 1401 if self._match_text_seq("SQL", "SECURITY"): 1402 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1403 1404 assignment = self._match_pair( 1405 TokenType.VAR, TokenType.EQ, advance=False 1406 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1407 1408 if assignment: 1409 key = self._parse_var_or_string() 1410 self._match(TokenType.EQ) 1411 return self.expression( 1412 exp.Property, 1413 this=key, 1414 value=self._parse_column() or self._parse_var(any_token=True), 1415 ) 1416 1417 return None 1418 1419 def _parse_stored(self) -> exp.FileFormatProperty: 1420 self._match(TokenType.ALIAS) 1421 1422 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1423 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1424 1425 return self.expression( 1426 exp.FileFormatProperty, 1427 this=self.expression( 1428 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1429 ) 1430 if input_format or output_format 1431 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1432 ) 1433 1434 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1435 self._match(TokenType.EQ) 1436 self._match(TokenType.ALIAS) 1437 return self.expression(exp_class, this=self._parse_field()) 1438 1439 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1440 properties = [] 1441 while True: 1442 if before: 1443 prop = self._parse_property_before() 1444 else: 1445 prop = self._parse_property() 1446 1447 if not prop: 1448 break 1449 for p in ensure_list(prop): 1450 properties.append(p) 1451 1452 if properties: 1453 return self.expression(exp.Properties, expressions=properties) 1454 1455 return None 1456 1457 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1458 return self.expression( 1459 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1460 ) 1461 1462 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1463 if self._index >= 2: 1464 pre_volatile_token = self._tokens[self._index - 2] 1465 else: 1466 pre_volatile_token = None 1467 1468 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1469 return exp.VolatileProperty() 1470 1471 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1472 1473 def _parse_with_property( 1474 self, 1475 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1476 if self._match(TokenType.L_PAREN, advance=False): 1477 return self._parse_wrapped_csv(self._parse_property) 1478 1479 if self._match_text_seq("JOURNAL"): 1480 return self._parse_withjournaltable() 1481 1482 if self._match_text_seq("DATA"): 1483 return self._parse_withdata(no=False) 1484 elif self._match_text_seq("NO", "DATA"): 1485 return self._parse_withdata(no=True) 1486 1487 if not self._next: 1488 return None 1489 1490 return self._parse_withisolatedloading() 1491 1492 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1493 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1494 self._match(TokenType.EQ) 1495 1496 user = self._parse_id_var() 1497 self._match(TokenType.PARAMETER) 1498 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1499 1500 if not user or not host: 1501 return None 1502 1503 return exp.DefinerProperty(this=f"{user}@{host}") 1504 1505 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1506 self._match(TokenType.TABLE) 1507 self._match(TokenType.EQ) 1508 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1509 1510 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1511 return self.expression(exp.LogProperty, no=no) 1512 1513 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1514 return self.expression(exp.JournalProperty, **kwargs) 1515 1516 def _parse_checksum(self) -> exp.ChecksumProperty: 1517 self._match(TokenType.EQ) 1518 1519 on = None 1520 if self._match(TokenType.ON): 1521 on = True 1522 elif self._match_text_seq("OFF"): 1523 on = False 1524 1525 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1526 1527 def _parse_cluster(self) -> exp.Cluster: 1528 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1529 1530 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1531 self._match_text_seq("BY") 1532 1533 self._match_l_paren() 1534 expressions = self._parse_csv(self._parse_column) 1535 self._match_r_paren() 1536 1537 if self._match_text_seq("SORTED", "BY"): 1538 self._match_l_paren() 1539 sorted_by = self._parse_csv(self._parse_ordered) 1540 self._match_r_paren() 1541 else: 1542 sorted_by = None 1543 1544 self._match(TokenType.INTO) 1545 buckets = self._parse_number() 1546 self._match_text_seq("BUCKETS") 1547 1548 return self.expression( 1549 exp.ClusteredByProperty, 1550 expressions=expressions, 1551 sorted_by=sorted_by, 1552 buckets=buckets, 1553 ) 1554 1555 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1556 if not self._match_text_seq("GRANTS"): 1557 self._retreat(self._index - 1) 1558 return None 1559 1560 return self.expression(exp.CopyGrantsProperty) 1561 1562 def _parse_freespace(self) -> exp.FreespaceProperty: 1563 self._match(TokenType.EQ) 1564 return self.expression( 1565 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1566 ) 1567 1568 def _parse_mergeblockratio( 1569 self, no: bool = False, default: bool = False 1570 ) -> exp.MergeBlockRatioProperty: 1571 if self._match(TokenType.EQ): 1572 return self.expression( 1573 exp.MergeBlockRatioProperty, 1574 this=self._parse_number(), 1575 percent=self._match(TokenType.PERCENT), 1576 ) 1577 1578 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1579 1580 def _parse_datablocksize( 1581 self, 1582 default: t.Optional[bool] = None, 1583 minimum: t.Optional[bool] = None, 1584 maximum: t.Optional[bool] = None, 1585 ) -> exp.DataBlocksizeProperty: 1586 self._match(TokenType.EQ) 1587 size = self._parse_number() 1588 1589 units = None 1590 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1591 units = self._prev.text 1592 1593 return self.expression( 1594 exp.DataBlocksizeProperty, 1595 size=size, 1596 units=units, 1597 default=default, 1598 minimum=minimum, 1599 maximum=maximum, 1600 ) 1601 1602 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1603 self._match(TokenType.EQ) 1604 always = self._match_text_seq("ALWAYS") 1605 manual = self._match_text_seq("MANUAL") 1606 never = self._match_text_seq("NEVER") 1607 default = self._match_text_seq("DEFAULT") 1608 1609 autotemp = None 1610 if self._match_text_seq("AUTOTEMP"): 1611 autotemp = self._parse_schema() 1612 1613 return self.expression( 1614 exp.BlockCompressionProperty, 1615 always=always, 1616 manual=manual, 1617 never=never, 1618 default=default, 1619 autotemp=autotemp, 1620 ) 1621 1622 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1623 no = self._match_text_seq("NO") 1624 concurrent = self._match_text_seq("CONCURRENT") 1625 self._match_text_seq("ISOLATED", "LOADING") 1626 for_all = self._match_text_seq("FOR", "ALL") 1627 for_insert = self._match_text_seq("FOR", "INSERT") 1628 for_none = self._match_text_seq("FOR", "NONE") 1629 return self.expression( 1630 exp.IsolatedLoadingProperty, 1631 no=no, 1632 concurrent=concurrent, 1633 for_all=for_all, 1634 for_insert=for_insert, 1635 for_none=for_none, 1636 ) 1637 1638 def _parse_locking(self) -> exp.LockingProperty: 1639 if self._match(TokenType.TABLE): 1640 kind = "TABLE" 1641 elif self._match(TokenType.VIEW): 1642 kind = "VIEW" 1643 elif self._match(TokenType.ROW): 1644 kind = "ROW" 1645 elif self._match_text_seq("DATABASE"): 1646 kind = "DATABASE" 1647 else: 1648 kind = None 1649 1650 if kind in ("DATABASE", "TABLE", "VIEW"): 1651 this = self._parse_table_parts() 1652 else: 1653 this = None 1654 1655 if self._match(TokenType.FOR): 1656 for_or_in = "FOR" 1657 elif self._match(TokenType.IN): 1658 for_or_in = "IN" 1659 else: 1660 for_or_in = None 1661 1662 if self._match_text_seq("ACCESS"): 1663 lock_type = "ACCESS" 1664 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1665 lock_type = "EXCLUSIVE" 1666 elif self._match_text_seq("SHARE"): 1667 lock_type = "SHARE" 1668 elif self._match_text_seq("READ"): 1669 lock_type = "READ" 1670 elif self._match_text_seq("WRITE"): 1671 lock_type = "WRITE" 1672 elif self._match_text_seq("CHECKSUM"): 1673 lock_type = "CHECKSUM" 1674 else: 1675 lock_type = None 1676 1677 override = self._match_text_seq("OVERRIDE") 1678 1679 return self.expression( 1680 exp.LockingProperty, 1681 this=this, 1682 kind=kind, 1683 for_or_in=for_or_in, 1684 lock_type=lock_type, 1685 override=override, 1686 ) 1687 1688 def _parse_partition_by(self) -> t.List[exp.Expression]: 1689 if self._match(TokenType.PARTITION_BY): 1690 return self._parse_csv(self._parse_conjunction) 1691 return [] 1692 1693 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1694 self._match(TokenType.EQ) 1695 return self.expression( 1696 exp.PartitionedByProperty, 1697 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1698 ) 1699 1700 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1701 if self._match_text_seq("AND", "STATISTICS"): 1702 statistics = True 1703 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1704 statistics = False 1705 else: 1706 statistics = None 1707 1708 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1709 1710 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1711 if self._match_text_seq("PRIMARY", "INDEX"): 1712 return exp.NoPrimaryIndexProperty() 1713 return None 1714 1715 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1716 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1717 return exp.OnCommitProperty() 1718 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1719 return exp.OnCommitProperty(delete=True) 1720 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1721 1722 def _parse_distkey(self) -> exp.DistKeyProperty: 1723 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1724 1725 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1726 table = self._parse_table(schema=True) 1727 1728 options = [] 1729 while self._match_texts(("INCLUDING", "EXCLUDING")): 1730 this = self._prev.text.upper() 1731 1732 id_var = self._parse_id_var() 1733 if not id_var: 1734 return None 1735 1736 options.append( 1737 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1738 ) 1739 1740 return self.expression(exp.LikeProperty, this=table, expressions=options) 1741 1742 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1743 return self.expression( 1744 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1745 ) 1746 1747 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1748 self._match(TokenType.EQ) 1749 return self.expression( 1750 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1751 ) 1752 1753 def _parse_returns(self) -> exp.ReturnsProperty: 1754 value: t.Optional[exp.Expression] 1755 is_table = self._match(TokenType.TABLE) 1756 1757 if is_table: 1758 if self._match(TokenType.LT): 1759 value = self.expression( 1760 exp.Schema, 1761 this="TABLE", 1762 expressions=self._parse_csv(self._parse_struct_types), 1763 ) 1764 if not self._match(TokenType.GT): 1765 self.raise_error("Expecting >") 1766 else: 1767 value = self._parse_schema(exp.var("TABLE")) 1768 else: 1769 value = self._parse_types() 1770 1771 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1772 1773 def _parse_describe(self) -> exp.Describe: 1774 kind = self._match_set(self.CREATABLES) and self._prev.text 1775 this = self._parse_table(schema=True) 1776 properties = self._parse_properties() 1777 expressions = properties.expressions if properties else None 1778 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1779 1780 def _parse_insert(self) -> exp.Insert: 1781 comments = ensure_list(self._prev_comments) 1782 overwrite = self._match(TokenType.OVERWRITE) 1783 ignore = self._match(TokenType.IGNORE) 1784 local = self._match_text_seq("LOCAL") 1785 alternative = None 1786 1787 if self._match_text_seq("DIRECTORY"): 1788 this: t.Optional[exp.Expression] = self.expression( 1789 exp.Directory, 1790 this=self._parse_var_or_string(), 1791 local=local, 1792 row_format=self._parse_row_format(match_row=True), 1793 ) 1794 else: 1795 if self._match(TokenType.OR): 1796 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1797 1798 self._match(TokenType.INTO) 1799 comments += ensure_list(self._prev_comments) 1800 self._match(TokenType.TABLE) 1801 this = self._parse_table(schema=True) 1802 1803 returning = self._parse_returning() 1804 1805 return self.expression( 1806 exp.Insert, 1807 comments=comments, 1808 this=this, 1809 by_name=self._match_text_seq("BY", "NAME"), 1810 exists=self._parse_exists(), 1811 partition=self._parse_partition(), 1812 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1813 and self._parse_conjunction(), 1814 expression=self._parse_ddl_select(), 1815 conflict=self._parse_on_conflict(), 1816 returning=returning or self._parse_returning(), 1817 overwrite=overwrite, 1818 alternative=alternative, 1819 ignore=ignore, 1820 ) 1821 1822 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1823 conflict = self._match_text_seq("ON", "CONFLICT") 1824 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1825 1826 if not conflict and not duplicate: 1827 return None 1828 1829 nothing = None 1830 expressions = None 1831 key = None 1832 constraint = None 1833 1834 if conflict: 1835 if self._match_text_seq("ON", "CONSTRAINT"): 1836 constraint = self._parse_id_var() 1837 else: 1838 key = self._parse_csv(self._parse_value) 1839 1840 self._match_text_seq("DO") 1841 if self._match_text_seq("NOTHING"): 1842 nothing = True 1843 else: 1844 self._match(TokenType.UPDATE) 1845 self._match(TokenType.SET) 1846 expressions = self._parse_csv(self._parse_equality) 1847 1848 return self.expression( 1849 exp.OnConflict, 1850 duplicate=duplicate, 1851 expressions=expressions, 1852 nothing=nothing, 1853 key=key, 1854 constraint=constraint, 1855 ) 1856 1857 def _parse_returning(self) -> t.Optional[exp.Returning]: 1858 if not self._match(TokenType.RETURNING): 1859 return None 1860 return self.expression( 1861 exp.Returning, 1862 expressions=self._parse_csv(self._parse_expression), 1863 into=self._match(TokenType.INTO) and self._parse_table_part(), 1864 ) 1865 1866 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1867 if not self._match(TokenType.FORMAT): 1868 return None 1869 return self._parse_row_format() 1870 1871 def _parse_row_format( 1872 self, match_row: bool = False 1873 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1874 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1875 return None 1876 1877 if self._match_text_seq("SERDE"): 1878 this = self._parse_string() 1879 1880 serde_properties = None 1881 if self._match(TokenType.SERDE_PROPERTIES): 1882 serde_properties = self.expression( 1883 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1884 ) 1885 1886 return self.expression( 1887 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1888 ) 1889 1890 self._match_text_seq("DELIMITED") 1891 1892 kwargs = {} 1893 1894 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1895 kwargs["fields"] = self._parse_string() 1896 if self._match_text_seq("ESCAPED", "BY"): 1897 kwargs["escaped"] = self._parse_string() 1898 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1899 kwargs["collection_items"] = self._parse_string() 1900 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1901 kwargs["map_keys"] = self._parse_string() 1902 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1903 kwargs["lines"] = self._parse_string() 1904 if self._match_text_seq("NULL", "DEFINED", "AS"): 1905 kwargs["null"] = self._parse_string() 1906 1907 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1908 1909 def _parse_load(self) -> exp.LoadData | exp.Command: 1910 if self._match_text_seq("DATA"): 1911 local = self._match_text_seq("LOCAL") 1912 self._match_text_seq("INPATH") 1913 inpath = self._parse_string() 1914 overwrite = self._match(TokenType.OVERWRITE) 1915 self._match_pair(TokenType.INTO, TokenType.TABLE) 1916 1917 return self.expression( 1918 exp.LoadData, 1919 this=self._parse_table(schema=True), 1920 local=local, 1921 overwrite=overwrite, 1922 inpath=inpath, 1923 partition=self._parse_partition(), 1924 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1925 serde=self._match_text_seq("SERDE") and self._parse_string(), 1926 ) 1927 return self._parse_as_command(self._prev) 1928 1929 def _parse_delete(self) -> exp.Delete: 1930 # This handles MySQL's "Multiple-Table Syntax" 1931 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1932 tables = None 1933 comments = self._prev_comments 1934 if not self._match(TokenType.FROM, advance=False): 1935 tables = self._parse_csv(self._parse_table) or None 1936 1937 returning = self._parse_returning() 1938 1939 return self.expression( 1940 exp.Delete, 1941 comments=comments, 1942 tables=tables, 1943 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1944 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1945 where=self._parse_where(), 1946 returning=returning or self._parse_returning(), 1947 limit=self._parse_limit(), 1948 ) 1949 1950 def _parse_update(self) -> exp.Update: 1951 comments = self._prev_comments 1952 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1953 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1954 returning = self._parse_returning() 1955 return self.expression( 1956 exp.Update, 1957 comments=comments, 1958 **{ # type: ignore 1959 "this": this, 1960 "expressions": expressions, 1961 "from": self._parse_from(joins=True), 1962 "where": self._parse_where(), 1963 "returning": returning or self._parse_returning(), 1964 "order": self._parse_order(), 1965 "limit": self._parse_limit(), 1966 }, 1967 ) 1968 1969 def _parse_uncache(self) -> exp.Uncache: 1970 if not self._match(TokenType.TABLE): 1971 self.raise_error("Expecting TABLE after UNCACHE") 1972 1973 return self.expression( 1974 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1975 ) 1976 1977 def _parse_cache(self) -> exp.Cache: 1978 lazy = self._match_text_seq("LAZY") 1979 self._match(TokenType.TABLE) 1980 table = self._parse_table(schema=True) 1981 1982 options = [] 1983 if self._match_text_seq("OPTIONS"): 1984 self._match_l_paren() 1985 k = self._parse_string() 1986 self._match(TokenType.EQ) 1987 v = self._parse_string() 1988 options = [k, v] 1989 self._match_r_paren() 1990 1991 self._match(TokenType.ALIAS) 1992 return self.expression( 1993 exp.Cache, 1994 this=table, 1995 lazy=lazy, 1996 options=options, 1997 expression=self._parse_select(nested=True), 1998 ) 1999 2000 def _parse_partition(self) -> t.Optional[exp.Partition]: 2001 if not self._match(TokenType.PARTITION): 2002 return None 2003 2004 return self.expression( 2005 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2006 ) 2007 2008 def _parse_value(self) -> exp.Tuple: 2009 if self._match(TokenType.L_PAREN): 2010 expressions = self._parse_csv(self._parse_conjunction) 2011 self._match_r_paren() 2012 return self.expression(exp.Tuple, expressions=expressions) 2013 2014 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2015 # https://prestodb.io/docs/current/sql/values.html 2016 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2017 2018 def _parse_projections(self) -> t.List[exp.Expression]: 2019 return self._parse_expressions() 2020 2021 def _parse_select( 2022 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2023 ) -> t.Optional[exp.Expression]: 2024 cte = self._parse_with() 2025 2026 if cte: 2027 this = self._parse_statement() 2028 2029 if not this: 2030 self.raise_error("Failed to parse any statement following CTE") 2031 return cte 2032 2033 if "with" in this.arg_types: 2034 this.set("with", cte) 2035 else: 2036 self.raise_error(f"{this.key} does not support CTE") 2037 this = cte 2038 2039 return this 2040 2041 # duckdb supports leading with FROM x 2042 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2043 2044 if self._match(TokenType.SELECT): 2045 comments = self._prev_comments 2046 2047 hint = self._parse_hint() 2048 all_ = self._match(TokenType.ALL) 2049 distinct = self._match_set(self.DISTINCT_TOKENS) 2050 2051 kind = ( 2052 self._match(TokenType.ALIAS) 2053 and self._match_texts(("STRUCT", "VALUE")) 2054 and self._prev.text 2055 ) 2056 2057 if distinct: 2058 distinct = self.expression( 2059 exp.Distinct, 2060 on=self._parse_value() if self._match(TokenType.ON) else None, 2061 ) 2062 2063 if all_ and distinct: 2064 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2065 2066 limit = self._parse_limit(top=True) 2067 projections = self._parse_projections() 2068 2069 this = self.expression( 2070 exp.Select, 2071 kind=kind, 2072 hint=hint, 2073 distinct=distinct, 2074 expressions=projections, 2075 limit=limit, 2076 ) 2077 this.comments = comments 2078 2079 into = self._parse_into() 2080 if into: 2081 this.set("into", into) 2082 2083 if not from_: 2084 from_ = self._parse_from() 2085 2086 if from_: 2087 this.set("from", from_) 2088 2089 this = self._parse_query_modifiers(this) 2090 elif (table or nested) and self._match(TokenType.L_PAREN): 2091 if self._match(TokenType.PIVOT): 2092 this = self._parse_simplified_pivot() 2093 elif self._match(TokenType.FROM): 2094 this = exp.select("*").from_( 2095 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2096 ) 2097 else: 2098 this = self._parse_table() if table else self._parse_select(nested=True) 2099 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2100 2101 self._match_r_paren() 2102 2103 # We return early here so that the UNION isn't attached to the subquery by the 2104 # following call to _parse_set_operations, but instead becomes the parent node 2105 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2106 elif self._match(TokenType.VALUES): 2107 this = self.expression( 2108 exp.Values, 2109 expressions=self._parse_csv(self._parse_value), 2110 alias=self._parse_table_alias(), 2111 ) 2112 elif from_: 2113 this = exp.select("*").from_(from_.this, copy=False) 2114 else: 2115 this = None 2116 2117 return self._parse_set_operations(this) 2118 2119 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2120 if not skip_with_token and not self._match(TokenType.WITH): 2121 return None 2122 2123 comments = self._prev_comments 2124 recursive = self._match(TokenType.RECURSIVE) 2125 2126 expressions = [] 2127 while True: 2128 expressions.append(self._parse_cte()) 2129 2130 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2131 break 2132 else: 2133 self._match(TokenType.WITH) 2134 2135 return self.expression( 2136 exp.With, comments=comments, expressions=expressions, recursive=recursive 2137 ) 2138 2139 def _parse_cte(self) -> exp.CTE: 2140 alias = self._parse_table_alias() 2141 if not alias or not alias.this: 2142 self.raise_error("Expected CTE to have alias") 2143 2144 self._match(TokenType.ALIAS) 2145 return self.expression( 2146 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2147 ) 2148 2149 def _parse_table_alias( 2150 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2151 ) -> t.Optional[exp.TableAlias]: 2152 any_token = self._match(TokenType.ALIAS) 2153 alias = ( 2154 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2155 or self._parse_string_as_identifier() 2156 ) 2157 2158 index = self._index 2159 if self._match(TokenType.L_PAREN): 2160 columns = self._parse_csv(self._parse_function_parameter) 2161 self._match_r_paren() if columns else self._retreat(index) 2162 else: 2163 columns = None 2164 2165 if not alias and not columns: 2166 return None 2167 2168 return self.expression(exp.TableAlias, this=alias, columns=columns) 2169 2170 def _parse_subquery( 2171 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2172 ) -> t.Optional[exp.Subquery]: 2173 if not this: 2174 return None 2175 2176 return self.expression( 2177 exp.Subquery, 2178 this=this, 2179 pivots=self._parse_pivots(), 2180 alias=self._parse_table_alias() if parse_alias else None, 2181 ) 2182 2183 def _parse_query_modifiers( 2184 self, this: t.Optional[exp.Expression] 2185 ) -> t.Optional[exp.Expression]: 2186 if isinstance(this, self.MODIFIABLES): 2187 for join in iter(self._parse_join, None): 2188 this.append("joins", join) 2189 for lateral in iter(self._parse_lateral, None): 2190 this.append("laterals", lateral) 2191 2192 while True: 2193 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2194 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2195 key, expression = parser(self) 2196 2197 if expression: 2198 this.set(key, expression) 2199 if key == "limit": 2200 offset = expression.args.pop("offset", None) 2201 if offset: 2202 this.set("offset", exp.Offset(expression=offset)) 2203 continue 2204 break 2205 return this 2206 2207 def _parse_hint(self) -> t.Optional[exp.Hint]: 2208 if self._match(TokenType.HINT): 2209 hints = [] 2210 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2211 hints.extend(hint) 2212 2213 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2214 self.raise_error("Expected */ after HINT") 2215 2216 return self.expression(exp.Hint, expressions=hints) 2217 2218 return None 2219 2220 def _parse_into(self) -> t.Optional[exp.Into]: 2221 if not self._match(TokenType.INTO): 2222 return None 2223 2224 temp = self._match(TokenType.TEMPORARY) 2225 unlogged = self._match_text_seq("UNLOGGED") 2226 self._match(TokenType.TABLE) 2227 2228 return self.expression( 2229 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2230 ) 2231 2232 def _parse_from( 2233 self, joins: bool = False, skip_from_token: bool = False 2234 ) -> t.Optional[exp.From]: 2235 if not skip_from_token and not self._match(TokenType.FROM): 2236 return None 2237 2238 return self.expression( 2239 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2240 ) 2241 2242 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2243 if not self._match(TokenType.MATCH_RECOGNIZE): 2244 return None 2245 2246 self._match_l_paren() 2247 2248 partition = self._parse_partition_by() 2249 order = self._parse_order() 2250 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2251 2252 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2253 rows = exp.var("ONE ROW PER MATCH") 2254 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2255 text = "ALL ROWS PER MATCH" 2256 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2257 text += f" SHOW EMPTY MATCHES" 2258 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2259 text += f" OMIT EMPTY MATCHES" 2260 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2261 text += f" WITH UNMATCHED ROWS" 2262 rows = exp.var(text) 2263 else: 2264 rows = None 2265 2266 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2267 text = "AFTER MATCH SKIP" 2268 if self._match_text_seq("PAST", "LAST", "ROW"): 2269 text += f" PAST LAST ROW" 2270 elif self._match_text_seq("TO", "NEXT", "ROW"): 2271 text += f" TO NEXT ROW" 2272 elif self._match_text_seq("TO", "FIRST"): 2273 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2274 elif self._match_text_seq("TO", "LAST"): 2275 text += f" TO LAST {self._advance_any().text}" # type: ignore 2276 after = exp.var(text) 2277 else: 2278 after = None 2279 2280 if self._match_text_seq("PATTERN"): 2281 self._match_l_paren() 2282 2283 if not self._curr: 2284 self.raise_error("Expecting )", self._curr) 2285 2286 paren = 1 2287 start = self._curr 2288 2289 while self._curr and paren > 0: 2290 if self._curr.token_type == TokenType.L_PAREN: 2291 paren += 1 2292 if self._curr.token_type == TokenType.R_PAREN: 2293 paren -= 1 2294 2295 end = self._prev 2296 self._advance() 2297 2298 if paren > 0: 2299 self.raise_error("Expecting )", self._curr) 2300 2301 pattern = exp.var(self._find_sql(start, end)) 2302 else: 2303 pattern = None 2304 2305 define = ( 2306 self._parse_csv( 2307 lambda: self.expression( 2308 exp.Alias, 2309 alias=self._parse_id_var(any_token=True), 2310 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2311 ) 2312 ) 2313 if self._match_text_seq("DEFINE") 2314 else None 2315 ) 2316 2317 self._match_r_paren() 2318 2319 return self.expression( 2320 exp.MatchRecognize, 2321 partition_by=partition, 2322 order=order, 2323 measures=measures, 2324 rows=rows, 2325 after=after, 2326 pattern=pattern, 2327 define=define, 2328 alias=self._parse_table_alias(), 2329 ) 2330 2331 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2332 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2333 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2334 2335 if outer_apply or cross_apply: 2336 this = self._parse_select(table=True) 2337 view = None 2338 outer = not cross_apply 2339 elif self._match(TokenType.LATERAL): 2340 this = self._parse_select(table=True) 2341 view = self._match(TokenType.VIEW) 2342 outer = self._match(TokenType.OUTER) 2343 else: 2344 return None 2345 2346 if not this: 2347 this = ( 2348 self._parse_unnest() 2349 or self._parse_function() 2350 or self._parse_id_var(any_token=False) 2351 ) 2352 2353 while self._match(TokenType.DOT): 2354 this = exp.Dot( 2355 this=this, 2356 expression=self._parse_function() or self._parse_id_var(any_token=False), 2357 ) 2358 2359 if view: 2360 table = self._parse_id_var(any_token=False) 2361 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2362 table_alias: t.Optional[exp.TableAlias] = self.expression( 2363 exp.TableAlias, this=table, columns=columns 2364 ) 2365 elif isinstance(this, exp.Subquery) and this.alias: 2366 # Ensures parity between the Subquery's and the Lateral's "alias" args 2367 table_alias = this.args["alias"].copy() 2368 else: 2369 table_alias = self._parse_table_alias() 2370 2371 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2372 2373 def _parse_join_parts( 2374 self, 2375 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2376 return ( 2377 self._match_set(self.JOIN_METHODS) and self._prev, 2378 self._match_set(self.JOIN_SIDES) and self._prev, 2379 self._match_set(self.JOIN_KINDS) and self._prev, 2380 ) 2381 2382 def _parse_join( 2383 self, skip_join_token: bool = False, parse_bracket: bool = False 2384 ) -> t.Optional[exp.Join]: 2385 if self._match(TokenType.COMMA): 2386 return self.expression(exp.Join, this=self._parse_table()) 2387 2388 index = self._index 2389 method, side, kind = self._parse_join_parts() 2390 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2391 join = self._match(TokenType.JOIN) 2392 2393 if not skip_join_token and not join: 2394 self._retreat(index) 2395 kind = None 2396 method = None 2397 side = None 2398 2399 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2400 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2401 2402 if not skip_join_token and not join and not outer_apply and not cross_apply: 2403 return None 2404 2405 if outer_apply: 2406 side = Token(TokenType.LEFT, "LEFT") 2407 2408 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2409 2410 if method: 2411 kwargs["method"] = method.text 2412 if side: 2413 kwargs["side"] = side.text 2414 if kind: 2415 kwargs["kind"] = kind.text 2416 if hint: 2417 kwargs["hint"] = hint 2418 2419 if self._match(TokenType.ON): 2420 kwargs["on"] = self._parse_conjunction() 2421 elif self._match(TokenType.USING): 2422 kwargs["using"] = self._parse_wrapped_id_vars() 2423 elif not (kind and kind.token_type == TokenType.CROSS): 2424 index = self._index 2425 joins = self._parse_joins() 2426 2427 if joins and self._match(TokenType.ON): 2428 kwargs["on"] = self._parse_conjunction() 2429 elif joins and self._match(TokenType.USING): 2430 kwargs["using"] = self._parse_wrapped_id_vars() 2431 else: 2432 joins = None 2433 self._retreat(index) 2434 2435 kwargs["this"].set("joins", joins) 2436 2437 comments = [c for token in (method, side, kind) if token for c in token.comments] 2438 return self.expression(exp.Join, comments=comments, **kwargs) 2439 2440 def _parse_index( 2441 self, 2442 index: t.Optional[exp.Expression] = None, 2443 ) -> t.Optional[exp.Index]: 2444 if index: 2445 unique = None 2446 primary = None 2447 amp = None 2448 2449 self._match(TokenType.ON) 2450 self._match(TokenType.TABLE) # hive 2451 table = self._parse_table_parts(schema=True) 2452 else: 2453 unique = self._match(TokenType.UNIQUE) 2454 primary = self._match_text_seq("PRIMARY") 2455 amp = self._match_text_seq("AMP") 2456 2457 if not self._match(TokenType.INDEX): 2458 return None 2459 2460 index = self._parse_id_var() 2461 table = None 2462 2463 using = self._parse_field() if self._match(TokenType.USING) else None 2464 2465 if self._match(TokenType.L_PAREN, advance=False): 2466 columns = self._parse_wrapped_csv(self._parse_ordered) 2467 else: 2468 columns = None 2469 2470 return self.expression( 2471 exp.Index, 2472 this=index, 2473 table=table, 2474 using=using, 2475 columns=columns, 2476 unique=unique, 2477 primary=primary, 2478 amp=amp, 2479 partition_by=self._parse_partition_by(), 2480 ) 2481 2482 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2483 hints: t.List[exp.Expression] = [] 2484 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2485 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2486 hints.append( 2487 self.expression( 2488 exp.WithTableHint, 2489 expressions=self._parse_csv( 2490 lambda: self._parse_function() or self._parse_var(any_token=True) 2491 ), 2492 ) 2493 ) 2494 self._match_r_paren() 2495 else: 2496 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2497 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2498 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2499 2500 self._match_texts({"INDEX", "KEY"}) 2501 if self._match(TokenType.FOR): 2502 hint.set("target", self._advance_any() and self._prev.text.upper()) 2503 2504 hint.set("expressions", self._parse_wrapped_id_vars()) 2505 hints.append(hint) 2506 2507 return hints or None 2508 2509 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2510 return ( 2511 (not schema and self._parse_function(optional_parens=False)) 2512 or self._parse_id_var(any_token=False) 2513 or self._parse_string_as_identifier() 2514 or self._parse_placeholder() 2515 ) 2516 2517 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2518 catalog = None 2519 db = None 2520 table = self._parse_table_part(schema=schema) 2521 2522 while self._match(TokenType.DOT): 2523 if catalog: 2524 # This allows nesting the table in arbitrarily many dot expressions if needed 2525 table = self.expression( 2526 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2527 ) 2528 else: 2529 catalog = db 2530 db = table 2531 table = self._parse_table_part(schema=schema) 2532 2533 if not table: 2534 self.raise_error(f"Expected table name but got {self._curr}") 2535 2536 return self.expression( 2537 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2538 ) 2539 2540 def _parse_table( 2541 self, 2542 schema: bool = False, 2543 joins: bool = False, 2544 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2545 parse_bracket: bool = False, 2546 ) -> t.Optional[exp.Expression]: 2547 lateral = self._parse_lateral() 2548 if lateral: 2549 return lateral 2550 2551 unnest = self._parse_unnest() 2552 if unnest: 2553 return unnest 2554 2555 values = self._parse_derived_table_values() 2556 if values: 2557 return values 2558 2559 subquery = self._parse_select(table=True) 2560 if subquery: 2561 if not subquery.args.get("pivots"): 2562 subquery.set("pivots", self._parse_pivots()) 2563 return subquery 2564 2565 bracket = parse_bracket and self._parse_bracket(None) 2566 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2567 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2568 2569 if schema: 2570 return self._parse_schema(this=this) 2571 2572 version = self._parse_version() 2573 2574 if version: 2575 this.set("version", version) 2576 2577 if self.ALIAS_POST_TABLESAMPLE: 2578 table_sample = self._parse_table_sample() 2579 2580 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2581 if alias: 2582 this.set("alias", alias) 2583 2584 this.set("hints", self._parse_table_hints()) 2585 2586 if not this.args.get("pivots"): 2587 this.set("pivots", self._parse_pivots()) 2588 2589 if not self.ALIAS_POST_TABLESAMPLE: 2590 table_sample = self._parse_table_sample() 2591 2592 if table_sample: 2593 table_sample.set("this", this) 2594 this = table_sample 2595 2596 if joins: 2597 for join in iter(self._parse_join, None): 2598 this.append("joins", join) 2599 2600 return this 2601 2602 def _parse_version(self) -> t.Optional[exp.Version]: 2603 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2604 this = "TIMESTAMP" 2605 elif self._match(TokenType.VERSION_SNAPSHOT): 2606 this = "VERSION" 2607 else: 2608 return None 2609 2610 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2611 kind = self._prev.text.upper() 2612 start = self._parse_bitwise() 2613 self._match_texts(("TO", "AND")) 2614 end = self._parse_bitwise() 2615 expression: t.Optional[exp.Expression] = self.expression( 2616 exp.Tuple, expressions=[start, end] 2617 ) 2618 elif self._match_text_seq("CONTAINED", "IN"): 2619 kind = "CONTAINED IN" 2620 expression = self.expression( 2621 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2622 ) 2623 elif self._match(TokenType.ALL): 2624 kind = "ALL" 2625 expression = None 2626 else: 2627 self._match_text_seq("AS", "OF") 2628 kind = "AS OF" 2629 expression = self._parse_type() 2630 2631 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2632 2633 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2634 if not self._match(TokenType.UNNEST): 2635 return None 2636 2637 expressions = self._parse_wrapped_csv(self._parse_type) 2638 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2639 2640 alias = self._parse_table_alias() if with_alias else None 2641 2642 if alias and self.UNNEST_COLUMN_ONLY: 2643 if alias.args.get("columns"): 2644 self.raise_error("Unexpected extra column alias in unnest.") 2645 2646 alias.set("columns", [alias.this]) 2647 alias.set("this", None) 2648 2649 offset = None 2650 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2651 self._match(TokenType.ALIAS) 2652 offset = self._parse_id_var() or exp.to_identifier("offset") 2653 2654 return self.expression( 2655 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2656 ) 2657 2658 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2659 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2660 if not is_derived and not self._match(TokenType.VALUES): 2661 return None 2662 2663 expressions = self._parse_csv(self._parse_value) 2664 alias = self._parse_table_alias() 2665 2666 if is_derived: 2667 self._match_r_paren() 2668 2669 return self.expression( 2670 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2671 ) 2672 2673 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2674 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2675 as_modifier and self._match_text_seq("USING", "SAMPLE") 2676 ): 2677 return None 2678 2679 bucket_numerator = None 2680 bucket_denominator = None 2681 bucket_field = None 2682 percent = None 2683 rows = None 2684 size = None 2685 seed = None 2686 2687 kind = ( 2688 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2689 ) 2690 method = self._parse_var(tokens=(TokenType.ROW,)) 2691 2692 self._match(TokenType.L_PAREN) 2693 2694 if self.TABLESAMPLE_CSV: 2695 num = None 2696 expressions = self._parse_csv(self._parse_primary) 2697 else: 2698 expressions = None 2699 num = self._parse_primary() 2700 2701 if self._match_text_seq("BUCKET"): 2702 bucket_numerator = self._parse_number() 2703 self._match_text_seq("OUT", "OF") 2704 bucket_denominator = bucket_denominator = self._parse_number() 2705 self._match(TokenType.ON) 2706 bucket_field = self._parse_field() 2707 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2708 percent = num 2709 elif self._match(TokenType.ROWS): 2710 rows = num 2711 elif num: 2712 size = num 2713 2714 self._match(TokenType.R_PAREN) 2715 2716 if self._match(TokenType.L_PAREN): 2717 method = self._parse_var() 2718 seed = self._match(TokenType.COMMA) and self._parse_number() 2719 self._match_r_paren() 2720 elif self._match_texts(("SEED", "REPEATABLE")): 2721 seed = self._parse_wrapped(self._parse_number) 2722 2723 return self.expression( 2724 exp.TableSample, 2725 expressions=expressions, 2726 method=method, 2727 bucket_numerator=bucket_numerator, 2728 bucket_denominator=bucket_denominator, 2729 bucket_field=bucket_field, 2730 percent=percent, 2731 rows=rows, 2732 size=size, 2733 seed=seed, 2734 kind=kind, 2735 ) 2736 2737 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2738 return list(iter(self._parse_pivot, None)) or None 2739 2740 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2741 return list(iter(self._parse_join, None)) or None 2742 2743 # https://duckdb.org/docs/sql/statements/pivot 2744 def _parse_simplified_pivot(self) -> exp.Pivot: 2745 def _parse_on() -> t.Optional[exp.Expression]: 2746 this = self._parse_bitwise() 2747 return self._parse_in(this) if self._match(TokenType.IN) else this 2748 2749 this = self._parse_table() 2750 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2751 using = self._match(TokenType.USING) and self._parse_csv( 2752 lambda: self._parse_alias(self._parse_function()) 2753 ) 2754 group = self._parse_group() 2755 return self.expression( 2756 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2757 ) 2758 2759 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2760 index = self._index 2761 include_nulls = None 2762 2763 if self._match(TokenType.PIVOT): 2764 unpivot = False 2765 elif self._match(TokenType.UNPIVOT): 2766 unpivot = True 2767 2768 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2769 if self._match_text_seq("INCLUDE", "NULLS"): 2770 include_nulls = True 2771 elif self._match_text_seq("EXCLUDE", "NULLS"): 2772 include_nulls = False 2773 else: 2774 return None 2775 2776 expressions = [] 2777 field = None 2778 2779 if not self._match(TokenType.L_PAREN): 2780 self._retreat(index) 2781 return None 2782 2783 if unpivot: 2784 expressions = self._parse_csv(self._parse_column) 2785 else: 2786 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2787 2788 if not expressions: 2789 self.raise_error("Failed to parse PIVOT's aggregation list") 2790 2791 if not self._match(TokenType.FOR): 2792 self.raise_error("Expecting FOR") 2793 2794 value = self._parse_column() 2795 2796 if not self._match(TokenType.IN): 2797 self.raise_error("Expecting IN") 2798 2799 field = self._parse_in(value, alias=True) 2800 2801 self._match_r_paren() 2802 2803 pivot = self.expression( 2804 exp.Pivot, 2805 expressions=expressions, 2806 field=field, 2807 unpivot=unpivot, 2808 include_nulls=include_nulls, 2809 ) 2810 2811 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2812 pivot.set("alias", self._parse_table_alias()) 2813 2814 if not unpivot: 2815 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2816 2817 columns: t.List[exp.Expression] = [] 2818 for fld in pivot.args["field"].expressions: 2819 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2820 for name in names: 2821 if self.PREFIXED_PIVOT_COLUMNS: 2822 name = f"{name}_{field_name}" if name else field_name 2823 else: 2824 name = f"{field_name}_{name}" if name else field_name 2825 2826 columns.append(exp.to_identifier(name)) 2827 2828 pivot.set("columns", columns) 2829 2830 return pivot 2831 2832 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2833 return [agg.alias for agg in aggregations] 2834 2835 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2836 if not skip_where_token and not self._match(TokenType.WHERE): 2837 return None 2838 2839 return self.expression( 2840 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2841 ) 2842 2843 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2844 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2845 return None 2846 2847 elements = defaultdict(list) 2848 2849 if self._match(TokenType.ALL): 2850 return self.expression(exp.Group, all=True) 2851 2852 while True: 2853 expressions = self._parse_csv(self._parse_conjunction) 2854 if expressions: 2855 elements["expressions"].extend(expressions) 2856 2857 grouping_sets = self._parse_grouping_sets() 2858 if grouping_sets: 2859 elements["grouping_sets"].extend(grouping_sets) 2860 2861 rollup = None 2862 cube = None 2863 totals = None 2864 2865 with_ = self._match(TokenType.WITH) 2866 if self._match(TokenType.ROLLUP): 2867 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2868 elements["rollup"].extend(ensure_list(rollup)) 2869 2870 if self._match(TokenType.CUBE): 2871 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2872 elements["cube"].extend(ensure_list(cube)) 2873 2874 if self._match_text_seq("TOTALS"): 2875 totals = True 2876 elements["totals"] = True # type: ignore 2877 2878 if not (grouping_sets or rollup or cube or totals): 2879 break 2880 2881 return self.expression(exp.Group, **elements) # type: ignore 2882 2883 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2884 if not self._match(TokenType.GROUPING_SETS): 2885 return None 2886 2887 return self._parse_wrapped_csv(self._parse_grouping_set) 2888 2889 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2890 if self._match(TokenType.L_PAREN): 2891 grouping_set = self._parse_csv(self._parse_column) 2892 self._match_r_paren() 2893 return self.expression(exp.Tuple, expressions=grouping_set) 2894 2895 return self._parse_column() 2896 2897 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2898 if not skip_having_token and not self._match(TokenType.HAVING): 2899 return None 2900 return self.expression(exp.Having, this=self._parse_conjunction()) 2901 2902 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2903 if not self._match(TokenType.QUALIFY): 2904 return None 2905 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2906 2907 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2908 if skip_start_token: 2909 start = None 2910 elif self._match(TokenType.START_WITH): 2911 start = self._parse_conjunction() 2912 else: 2913 return None 2914 2915 self._match(TokenType.CONNECT_BY) 2916 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2917 exp.Prior, this=self._parse_bitwise() 2918 ) 2919 connect = self._parse_conjunction() 2920 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2921 2922 if not start and self._match(TokenType.START_WITH): 2923 start = self._parse_conjunction() 2924 2925 return self.expression(exp.Connect, start=start, connect=connect) 2926 2927 def _parse_order( 2928 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2929 ) -> t.Optional[exp.Expression]: 2930 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2931 return this 2932 2933 return self.expression( 2934 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2935 ) 2936 2937 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2938 if not self._match(token): 2939 return None 2940 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2941 2942 def _parse_ordered(self) -> exp.Ordered: 2943 this = self._parse_conjunction() 2944 self._match(TokenType.ASC) 2945 2946 is_desc = self._match(TokenType.DESC) 2947 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2948 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2949 desc = is_desc or False 2950 asc = not desc 2951 nulls_first = is_nulls_first or False 2952 explicitly_null_ordered = is_nulls_first or is_nulls_last 2953 2954 if ( 2955 not explicitly_null_ordered 2956 and ( 2957 (asc and self.NULL_ORDERING == "nulls_are_small") 2958 or (desc and self.NULL_ORDERING != "nulls_are_small") 2959 ) 2960 and self.NULL_ORDERING != "nulls_are_last" 2961 ): 2962 nulls_first = True 2963 2964 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2965 2966 def _parse_limit( 2967 self, this: t.Optional[exp.Expression] = None, top: bool = False 2968 ) -> t.Optional[exp.Expression]: 2969 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2970 comments = self._prev_comments 2971 if top: 2972 limit_paren = self._match(TokenType.L_PAREN) 2973 expression = self._parse_number() 2974 2975 if limit_paren: 2976 self._match_r_paren() 2977 else: 2978 expression = self._parse_term() 2979 2980 if self._match(TokenType.COMMA): 2981 offset = expression 2982 expression = self._parse_term() 2983 else: 2984 offset = None 2985 2986 limit_exp = self.expression( 2987 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2988 ) 2989 2990 return limit_exp 2991 2992 if self._match(TokenType.FETCH): 2993 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2994 direction = self._prev.text if direction else "FIRST" 2995 2996 count = self._parse_field(tokens=self.FETCH_TOKENS) 2997 percent = self._match(TokenType.PERCENT) 2998 2999 self._match_set((TokenType.ROW, TokenType.ROWS)) 3000 3001 only = self._match_text_seq("ONLY") 3002 with_ties = self._match_text_seq("WITH", "TIES") 3003 3004 if only and with_ties: 3005 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3006 3007 return self.expression( 3008 exp.Fetch, 3009 direction=direction, 3010 count=count, 3011 percent=percent, 3012 with_ties=with_ties, 3013 ) 3014 3015 return this 3016 3017 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3018 if not self._match(TokenType.OFFSET): 3019 return this 3020 3021 count = self._parse_term() 3022 self._match_set((TokenType.ROW, TokenType.ROWS)) 3023 return self.expression(exp.Offset, this=this, expression=count) 3024 3025 def _parse_locks(self) -> t.List[exp.Lock]: 3026 locks = [] 3027 while True: 3028 if self._match_text_seq("FOR", "UPDATE"): 3029 update = True 3030 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3031 "LOCK", "IN", "SHARE", "MODE" 3032 ): 3033 update = False 3034 else: 3035 break 3036 3037 expressions = None 3038 if self._match_text_seq("OF"): 3039 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3040 3041 wait: t.Optional[bool | exp.Expression] = None 3042 if self._match_text_seq("NOWAIT"): 3043 wait = True 3044 elif self._match_text_seq("WAIT"): 3045 wait = self._parse_primary() 3046 elif self._match_text_seq("SKIP", "LOCKED"): 3047 wait = False 3048 3049 locks.append( 3050 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3051 ) 3052 3053 return locks 3054 3055 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3056 if not self._match_set(self.SET_OPERATIONS): 3057 return this 3058 3059 token_type = self._prev.token_type 3060 3061 if token_type == TokenType.UNION: 3062 expression = exp.Union 3063 elif token_type == TokenType.EXCEPT: 3064 expression = exp.Except 3065 else: 3066 expression = exp.Intersect 3067 3068 return self.expression( 3069 expression, 3070 this=this, 3071 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3072 by_name=self._match_text_seq("BY", "NAME"), 3073 expression=self._parse_set_operations(self._parse_select(nested=True)), 3074 ) 3075 3076 def _parse_expression(self) -> t.Optional[exp.Expression]: 3077 return self._parse_alias(self._parse_conjunction()) 3078 3079 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3080 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3081 3082 def _parse_equality(self) -> t.Optional[exp.Expression]: 3083 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3084 3085 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3086 return self._parse_tokens(self._parse_range, self.COMPARISON) 3087 3088 def _parse_range(self) -> t.Optional[exp.Expression]: 3089 this = self._parse_bitwise() 3090 negate = self._match(TokenType.NOT) 3091 3092 if self._match_set(self.RANGE_PARSERS): 3093 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3094 if not expression: 3095 return this 3096 3097 this = expression 3098 elif self._match(TokenType.ISNULL): 3099 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3100 3101 # Postgres supports ISNULL and NOTNULL for conditions. 3102 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3103 if self._match(TokenType.NOTNULL): 3104 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3105 this = self.expression(exp.Not, this=this) 3106 3107 if negate: 3108 this = self.expression(exp.Not, this=this) 3109 3110 if self._match(TokenType.IS): 3111 this = self._parse_is(this) 3112 3113 return this 3114 3115 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3116 index = self._index - 1 3117 negate = self._match(TokenType.NOT) 3118 3119 if self._match_text_seq("DISTINCT", "FROM"): 3120 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3121 return self.expression(klass, this=this, expression=self._parse_expression()) 3122 3123 expression = self._parse_null() or self._parse_boolean() 3124 if not expression: 3125 self._retreat(index) 3126 return None 3127 3128 this = self.expression(exp.Is, this=this, expression=expression) 3129 return self.expression(exp.Not, this=this) if negate else this 3130 3131 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3132 unnest = self._parse_unnest(with_alias=False) 3133 if unnest: 3134 this = self.expression(exp.In, this=this, unnest=unnest) 3135 elif self._match(TokenType.L_PAREN): 3136 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3137 3138 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3139 this = self.expression(exp.In, this=this, query=expressions[0]) 3140 else: 3141 this = self.expression(exp.In, this=this, expressions=expressions) 3142 3143 self._match_r_paren(this) 3144 else: 3145 this = self.expression(exp.In, this=this, field=self._parse_field()) 3146 3147 return this 3148 3149 def _parse_between(self, this: exp.Expression) -> exp.Between: 3150 low = self._parse_bitwise() 3151 self._match(TokenType.AND) 3152 high = self._parse_bitwise() 3153 return self.expression(exp.Between, this=this, low=low, high=high) 3154 3155 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3156 if not self._match(TokenType.ESCAPE): 3157 return this 3158 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3159 3160 def _parse_interval(self) -> t.Optional[exp.Interval]: 3161 index = self._index 3162 3163 if not self._match(TokenType.INTERVAL): 3164 return None 3165 3166 if self._match(TokenType.STRING, advance=False): 3167 this = self._parse_primary() 3168 else: 3169 this = self._parse_term() 3170 3171 if not this: 3172 self._retreat(index) 3173 return None 3174 3175 unit = self._parse_function() or self._parse_var(any_token=True) 3176 3177 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3178 # each INTERVAL expression into this canonical form so it's easy to transpile 3179 if this and this.is_number: 3180 this = exp.Literal.string(this.name) 3181 elif this and this.is_string: 3182 parts = this.name.split() 3183 3184 if len(parts) == 2: 3185 if unit: 3186 # This is not actually a unit, it's something else (e.g. a "window side") 3187 unit = None 3188 self._retreat(self._index - 1) 3189 3190 this = exp.Literal.string(parts[0]) 3191 unit = self.expression(exp.Var, this=parts[1]) 3192 3193 return self.expression(exp.Interval, this=this, unit=unit) 3194 3195 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3196 this = self._parse_term() 3197 3198 while True: 3199 if self._match_set(self.BITWISE): 3200 this = self.expression( 3201 self.BITWISE[self._prev.token_type], 3202 this=this, 3203 expression=self._parse_term(), 3204 ) 3205 elif self._match(TokenType.DQMARK): 3206 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3207 elif self._match_pair(TokenType.LT, TokenType.LT): 3208 this = self.expression( 3209 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3210 ) 3211 elif self._match_pair(TokenType.GT, TokenType.GT): 3212 this = self.expression( 3213 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3214 ) 3215 else: 3216 break 3217 3218 return this 3219 3220 def _parse_term(self) -> t.Optional[exp.Expression]: 3221 return self._parse_tokens(self._parse_factor, self.TERM) 3222 3223 def _parse_factor(self) -> t.Optional[exp.Expression]: 3224 return self._parse_tokens(self._parse_unary, self.FACTOR) 3225 3226 def _parse_unary(self) -> t.Optional[exp.Expression]: 3227 if self._match_set(self.UNARY_PARSERS): 3228 return self.UNARY_PARSERS[self._prev.token_type](self) 3229 return self._parse_at_time_zone(self._parse_type()) 3230 3231 def _parse_type(self) -> t.Optional[exp.Expression]: 3232 interval = self._parse_interval() 3233 if interval: 3234 return interval 3235 3236 index = self._index 3237 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3238 this = self._parse_column() 3239 3240 if data_type: 3241 if isinstance(this, exp.Literal): 3242 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3243 if parser: 3244 return parser(self, this, data_type) 3245 return self.expression(exp.Cast, this=this, to=data_type) 3246 if not data_type.expressions: 3247 self._retreat(index) 3248 return self._parse_column() 3249 return self._parse_column_ops(data_type) 3250 3251 return this 3252 3253 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3254 this = self._parse_type() 3255 if not this: 3256 return None 3257 3258 return self.expression( 3259 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3260 ) 3261 3262 def _parse_types( 3263 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3264 ) -> t.Optional[exp.Expression]: 3265 index = self._index 3266 3267 prefix = self._match_text_seq("SYSUDTLIB", ".") 3268 3269 if not self._match_set(self.TYPE_TOKENS): 3270 identifier = allow_identifiers and self._parse_id_var( 3271 any_token=False, tokens=(TokenType.VAR,) 3272 ) 3273 3274 if identifier: 3275 tokens = self._tokenizer.tokenize(identifier.name) 3276 3277 if len(tokens) != 1: 3278 self.raise_error("Unexpected identifier", self._prev) 3279 3280 if tokens[0].token_type in self.TYPE_TOKENS: 3281 self._prev = tokens[0] 3282 elif self.SUPPORTS_USER_DEFINED_TYPES: 3283 type_name = identifier.name 3284 3285 while self._match(TokenType.DOT): 3286 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3287 3288 return exp.DataType.build(type_name, udt=True) 3289 else: 3290 return None 3291 else: 3292 return None 3293 3294 type_token = self._prev.token_type 3295 3296 if type_token == TokenType.PSEUDO_TYPE: 3297 return self.expression(exp.PseudoType, this=self._prev.text) 3298 3299 if type_token == TokenType.OBJECT_IDENTIFIER: 3300 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3301 3302 nested = type_token in self.NESTED_TYPE_TOKENS 3303 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3304 expressions = None 3305 maybe_func = False 3306 3307 if self._match(TokenType.L_PAREN): 3308 if is_struct: 3309 expressions = self._parse_csv(self._parse_struct_types) 3310 elif nested: 3311 expressions = self._parse_csv( 3312 lambda: self._parse_types( 3313 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3314 ) 3315 ) 3316 elif type_token in self.ENUM_TYPE_TOKENS: 3317 expressions = self._parse_csv(self._parse_equality) 3318 else: 3319 expressions = self._parse_csv(self._parse_type_size) 3320 3321 if not expressions or not self._match(TokenType.R_PAREN): 3322 self._retreat(index) 3323 return None 3324 3325 maybe_func = True 3326 3327 this: t.Optional[exp.Expression] = None 3328 values: t.Optional[t.List[exp.Expression]] = None 3329 3330 if nested and self._match(TokenType.LT): 3331 if is_struct: 3332 expressions = self._parse_csv(self._parse_struct_types) 3333 else: 3334 expressions = self._parse_csv( 3335 lambda: self._parse_types( 3336 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3337 ) 3338 ) 3339 3340 if not self._match(TokenType.GT): 3341 self.raise_error("Expecting >") 3342 3343 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3344 values = self._parse_csv(self._parse_conjunction) 3345 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3346 3347 if type_token in self.TIMESTAMPS: 3348 if self._match_text_seq("WITH", "TIME", "ZONE"): 3349 maybe_func = False 3350 tz_type = ( 3351 exp.DataType.Type.TIMETZ 3352 if type_token in self.TIMES 3353 else exp.DataType.Type.TIMESTAMPTZ 3354 ) 3355 this = exp.DataType(this=tz_type, expressions=expressions) 3356 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3357 maybe_func = False 3358 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3359 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3360 maybe_func = False 3361 elif type_token == TokenType.INTERVAL: 3362 unit = self._parse_var() 3363 3364 if self._match_text_seq("TO"): 3365 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3366 else: 3367 span = None 3368 3369 if span or not unit: 3370 this = self.expression( 3371 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3372 ) 3373 else: 3374 this = self.expression(exp.Interval, unit=unit) 3375 3376 if maybe_func and check_func: 3377 index2 = self._index 3378 peek = self._parse_string() 3379 3380 if not peek: 3381 self._retreat(index) 3382 return None 3383 3384 self._retreat(index2) 3385 3386 if not this: 3387 if self._match_text_seq("UNSIGNED"): 3388 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3389 if not unsigned_type_token: 3390 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3391 3392 type_token = unsigned_type_token or type_token 3393 3394 this = exp.DataType( 3395 this=exp.DataType.Type[type_token.value], 3396 expressions=expressions, 3397 nested=nested, 3398 values=values, 3399 prefix=prefix, 3400 ) 3401 3402 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3403 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3404 3405 return this 3406 3407 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3408 this = self._parse_type() or self._parse_id_var() 3409 self._match(TokenType.COLON) 3410 return self._parse_column_def(this) 3411 3412 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3413 if not self._match_text_seq("AT", "TIME", "ZONE"): 3414 return this 3415 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3416 3417 def _parse_column(self) -> t.Optional[exp.Expression]: 3418 this = self._parse_field() 3419 if isinstance(this, exp.Identifier): 3420 this = self.expression(exp.Column, this=this) 3421 elif not this: 3422 return self._parse_bracket(this) 3423 return self._parse_column_ops(this) 3424 3425 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3426 this = self._parse_bracket(this) 3427 3428 while self._match_set(self.COLUMN_OPERATORS): 3429 op_token = self._prev.token_type 3430 op = self.COLUMN_OPERATORS.get(op_token) 3431 3432 if op_token == TokenType.DCOLON: 3433 field = self._parse_types() 3434 if not field: 3435 self.raise_error("Expected type") 3436 elif op and self._curr: 3437 self._advance() 3438 value = self._prev.text 3439 field = ( 3440 exp.Literal.number(value) 3441 if self._prev.token_type == TokenType.NUMBER 3442 else exp.Literal.string(value) 3443 ) 3444 else: 3445 field = self._parse_field(anonymous_func=True, any_token=True) 3446 3447 if isinstance(field, exp.Func): 3448 # bigquery allows function calls like x.y.count(...) 3449 # SAFE.SUBSTR(...) 3450 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3451 this = self._replace_columns_with_dots(this) 3452 3453 if op: 3454 this = op(self, this, field) 3455 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3456 this = self.expression( 3457 exp.Column, 3458 this=field, 3459 table=this.this, 3460 db=this.args.get("table"), 3461 catalog=this.args.get("db"), 3462 ) 3463 else: 3464 this = self.expression(exp.Dot, this=this, expression=field) 3465 this = self._parse_bracket(this) 3466 return this 3467 3468 def _parse_primary(self) -> t.Optional[exp.Expression]: 3469 if self._match_set(self.PRIMARY_PARSERS): 3470 token_type = self._prev.token_type 3471 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3472 3473 if token_type == TokenType.STRING: 3474 expressions = [primary] 3475 while self._match(TokenType.STRING): 3476 expressions.append(exp.Literal.string(self._prev.text)) 3477 3478 if len(expressions) > 1: 3479 return self.expression(exp.Concat, expressions=expressions) 3480 3481 return primary 3482 3483 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3484 return exp.Literal.number(f"0.{self._prev.text}") 3485 3486 if self._match(TokenType.L_PAREN): 3487 comments = self._prev_comments 3488 query = self._parse_select() 3489 3490 if query: 3491 expressions = [query] 3492 else: 3493 expressions = self._parse_expressions() 3494 3495 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3496 3497 if isinstance(this, exp.Subqueryable): 3498 this = self._parse_set_operations( 3499 self._parse_subquery(this=this, parse_alias=False) 3500 ) 3501 elif len(expressions) > 1: 3502 this = self.expression(exp.Tuple, expressions=expressions) 3503 else: 3504 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3505 3506 if this: 3507 this.add_comments(comments) 3508 3509 self._match_r_paren(expression=this) 3510 return this 3511 3512 return None 3513 3514 def _parse_field( 3515 self, 3516 any_token: bool = False, 3517 tokens: t.Optional[t.Collection[TokenType]] = None, 3518 anonymous_func: bool = False, 3519 ) -> t.Optional[exp.Expression]: 3520 return ( 3521 self._parse_primary() 3522 or self._parse_function(anonymous=anonymous_func) 3523 or self._parse_id_var(any_token=any_token, tokens=tokens) 3524 ) 3525 3526 def _parse_function( 3527 self, 3528 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3529 anonymous: bool = False, 3530 optional_parens: bool = True, 3531 ) -> t.Optional[exp.Expression]: 3532 if not self._curr: 3533 return None 3534 3535 token_type = self._curr.token_type 3536 this = self._curr.text 3537 upper = this.upper() 3538 3539 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3540 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3541 self._advance() 3542 return parser(self) 3543 3544 if not self._next or self._next.token_type != TokenType.L_PAREN: 3545 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3546 self._advance() 3547 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3548 3549 return None 3550 3551 if token_type not in self.FUNC_TOKENS: 3552 return None 3553 3554 self._advance(2) 3555 3556 parser = self.FUNCTION_PARSERS.get(upper) 3557 if parser and not anonymous: 3558 this = parser(self) 3559 else: 3560 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3561 3562 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3563 this = self.expression(subquery_predicate, this=self._parse_select()) 3564 self._match_r_paren() 3565 return this 3566 3567 if functions is None: 3568 functions = self.FUNCTIONS 3569 3570 function = functions.get(upper) 3571 3572 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3573 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3574 3575 if function and not anonymous: 3576 func = self.validate_expression(function(args), args) 3577 if not self.NORMALIZE_FUNCTIONS: 3578 func.meta["name"] = this 3579 this = func 3580 else: 3581 this = self.expression(exp.Anonymous, this=this, expressions=args) 3582 3583 self._match_r_paren(this) 3584 return self._parse_window(this) 3585 3586 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3587 return self._parse_column_def(self._parse_id_var()) 3588 3589 def _parse_user_defined_function( 3590 self, kind: t.Optional[TokenType] = None 3591 ) -> t.Optional[exp.Expression]: 3592 this = self._parse_id_var() 3593 3594 while self._match(TokenType.DOT): 3595 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3596 3597 if not self._match(TokenType.L_PAREN): 3598 return this 3599 3600 expressions = self._parse_csv(self._parse_function_parameter) 3601 self._match_r_paren() 3602 return self.expression( 3603 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3604 ) 3605 3606 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3607 literal = self._parse_primary() 3608 if literal: 3609 return self.expression(exp.Introducer, this=token.text, expression=literal) 3610 3611 return self.expression(exp.Identifier, this=token.text) 3612 3613 def _parse_session_parameter(self) -> exp.SessionParameter: 3614 kind = None 3615 this = self._parse_id_var() or self._parse_primary() 3616 3617 if this and self._match(TokenType.DOT): 3618 kind = this.name 3619 this = self._parse_var() or self._parse_primary() 3620 3621 return self.expression(exp.SessionParameter, this=this, kind=kind) 3622 3623 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3624 index = self._index 3625 3626 if self._match(TokenType.L_PAREN): 3627 expressions = t.cast( 3628 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3629 ) 3630 3631 if not self._match(TokenType.R_PAREN): 3632 self._retreat(index) 3633 else: 3634 expressions = [self._parse_id_var()] 3635 3636 if self._match_set(self.LAMBDAS): 3637 return self.LAMBDAS[self._prev.token_type](self, expressions) 3638 3639 self._retreat(index) 3640 3641 this: t.Optional[exp.Expression] 3642 3643 if self._match(TokenType.DISTINCT): 3644 this = self.expression( 3645 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3646 ) 3647 else: 3648 this = self._parse_select_or_expression(alias=alias) 3649 3650 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3651 3652 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3653 index = self._index 3654 3655 if not self.errors: 3656 try: 3657 if self._parse_select(nested=True): 3658 return this 3659 except ParseError: 3660 pass 3661 finally: 3662 self.errors.clear() 3663 self._retreat(index) 3664 3665 if not self._match(TokenType.L_PAREN): 3666 return this 3667 3668 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3669 3670 self._match_r_paren() 3671 return self.expression(exp.Schema, this=this, expressions=args) 3672 3673 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3674 return self._parse_column_def(self._parse_field(any_token=True)) 3675 3676 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3677 # column defs are not really columns, they're identifiers 3678 if isinstance(this, exp.Column): 3679 this = this.this 3680 3681 kind = self._parse_types(schema=True) 3682 3683 if self._match_text_seq("FOR", "ORDINALITY"): 3684 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3685 3686 constraints: t.List[exp.Expression] = [] 3687 3688 if not kind and self._match(TokenType.ALIAS): 3689 constraints.append( 3690 self.expression( 3691 exp.ComputedColumnConstraint, 3692 this=self._parse_conjunction(), 3693 persisted=self._match_text_seq("PERSISTED"), 3694 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3695 ) 3696 ) 3697 3698 while True: 3699 constraint = self._parse_column_constraint() 3700 if not constraint: 3701 break 3702 constraints.append(constraint) 3703 3704 if not kind and not constraints: 3705 return this 3706 3707 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3708 3709 def _parse_auto_increment( 3710 self, 3711 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3712 start = None 3713 increment = None 3714 3715 if self._match(TokenType.L_PAREN, advance=False): 3716 args = self._parse_wrapped_csv(self._parse_bitwise) 3717 start = seq_get(args, 0) 3718 increment = seq_get(args, 1) 3719 elif self._match_text_seq("START"): 3720 start = self._parse_bitwise() 3721 self._match_text_seq("INCREMENT") 3722 increment = self._parse_bitwise() 3723 3724 if start and increment: 3725 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3726 3727 return exp.AutoIncrementColumnConstraint() 3728 3729 def _parse_compress(self) -> exp.CompressColumnConstraint: 3730 if self._match(TokenType.L_PAREN, advance=False): 3731 return self.expression( 3732 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3733 ) 3734 3735 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3736 3737 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3738 if self._match_text_seq("BY", "DEFAULT"): 3739 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3740 this = self.expression( 3741 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3742 ) 3743 else: 3744 self._match_text_seq("ALWAYS") 3745 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3746 3747 self._match(TokenType.ALIAS) 3748 identity = self._match_text_seq("IDENTITY") 3749 3750 if self._match(TokenType.L_PAREN): 3751 if self._match(TokenType.START_WITH): 3752 this.set("start", self._parse_bitwise()) 3753 if self._match_text_seq("INCREMENT", "BY"): 3754 this.set("increment", self._parse_bitwise()) 3755 if self._match_text_seq("MINVALUE"): 3756 this.set("minvalue", self._parse_bitwise()) 3757 if self._match_text_seq("MAXVALUE"): 3758 this.set("maxvalue", self._parse_bitwise()) 3759 3760 if self._match_text_seq("CYCLE"): 3761 this.set("cycle", True) 3762 elif self._match_text_seq("NO", "CYCLE"): 3763 this.set("cycle", False) 3764 3765 if not identity: 3766 this.set("expression", self._parse_bitwise()) 3767 3768 self._match_r_paren() 3769 3770 return this 3771 3772 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3773 self._match_text_seq("LENGTH") 3774 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3775 3776 def _parse_not_constraint( 3777 self, 3778 ) -> t.Optional[exp.Expression]: 3779 if self._match_text_seq("NULL"): 3780 return self.expression(exp.NotNullColumnConstraint) 3781 if self._match_text_seq("CASESPECIFIC"): 3782 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3783 if self._match_text_seq("FOR", "REPLICATION"): 3784 return self.expression(exp.NotForReplicationColumnConstraint) 3785 return None 3786 3787 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3788 if self._match(TokenType.CONSTRAINT): 3789 this = self._parse_id_var() 3790 else: 3791 this = None 3792 3793 if self._match_texts(self.CONSTRAINT_PARSERS): 3794 return self.expression( 3795 exp.ColumnConstraint, 3796 this=this, 3797 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3798 ) 3799 3800 return this 3801 3802 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3803 if not self._match(TokenType.CONSTRAINT): 3804 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3805 3806 this = self._parse_id_var() 3807 expressions = [] 3808 3809 while True: 3810 constraint = self._parse_unnamed_constraint() or self._parse_function() 3811 if not constraint: 3812 break 3813 expressions.append(constraint) 3814 3815 return self.expression(exp.Constraint, this=this, expressions=expressions) 3816 3817 def _parse_unnamed_constraint( 3818 self, constraints: t.Optional[t.Collection[str]] = None 3819 ) -> t.Optional[exp.Expression]: 3820 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3821 return None 3822 3823 constraint = self._prev.text.upper() 3824 if constraint not in self.CONSTRAINT_PARSERS: 3825 self.raise_error(f"No parser found for schema constraint {constraint}.") 3826 3827 return self.CONSTRAINT_PARSERS[constraint](self) 3828 3829 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3830 self._match_text_seq("KEY") 3831 return self.expression( 3832 exp.UniqueColumnConstraint, 3833 this=self._parse_schema(self._parse_id_var(any_token=False)), 3834 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3835 ) 3836 3837 def _parse_key_constraint_options(self) -> t.List[str]: 3838 options = [] 3839 while True: 3840 if not self._curr: 3841 break 3842 3843 if self._match(TokenType.ON): 3844 action = None 3845 on = self._advance_any() and self._prev.text 3846 3847 if self._match_text_seq("NO", "ACTION"): 3848 action = "NO ACTION" 3849 elif self._match_text_seq("CASCADE"): 3850 action = "CASCADE" 3851 elif self._match_pair(TokenType.SET, TokenType.NULL): 3852 action = "SET NULL" 3853 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3854 action = "SET DEFAULT" 3855 else: 3856 self.raise_error("Invalid key constraint") 3857 3858 options.append(f"ON {on} {action}") 3859 elif self._match_text_seq("NOT", "ENFORCED"): 3860 options.append("NOT ENFORCED") 3861 elif self._match_text_seq("DEFERRABLE"): 3862 options.append("DEFERRABLE") 3863 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3864 options.append("INITIALLY DEFERRED") 3865 elif self._match_text_seq("NORELY"): 3866 options.append("NORELY") 3867 elif self._match_text_seq("MATCH", "FULL"): 3868 options.append("MATCH FULL") 3869 else: 3870 break 3871 3872 return options 3873 3874 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3875 if match and not self._match(TokenType.REFERENCES): 3876 return None 3877 3878 expressions = None 3879 this = self._parse_table(schema=True) 3880 options = self._parse_key_constraint_options() 3881 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3882 3883 def _parse_foreign_key(self) -> exp.ForeignKey: 3884 expressions = self._parse_wrapped_id_vars() 3885 reference = self._parse_references() 3886 options = {} 3887 3888 while self._match(TokenType.ON): 3889 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3890 self.raise_error("Expected DELETE or UPDATE") 3891 3892 kind = self._prev.text.lower() 3893 3894 if self._match_text_seq("NO", "ACTION"): 3895 action = "NO ACTION" 3896 elif self._match(TokenType.SET): 3897 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3898 action = "SET " + self._prev.text.upper() 3899 else: 3900 self._advance() 3901 action = self._prev.text.upper() 3902 3903 options[kind] = action 3904 3905 return self.expression( 3906 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3907 ) 3908 3909 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3910 return self._parse_field() 3911 3912 def _parse_primary_key( 3913 self, wrapped_optional: bool = False, in_props: bool = False 3914 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3915 desc = ( 3916 self._match_set((TokenType.ASC, TokenType.DESC)) 3917 and self._prev.token_type == TokenType.DESC 3918 ) 3919 3920 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3921 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3922 3923 expressions = self._parse_wrapped_csv( 3924 self._parse_primary_key_part, optional=wrapped_optional 3925 ) 3926 options = self._parse_key_constraint_options() 3927 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3928 3929 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3930 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3931 return this 3932 3933 bracket_kind = self._prev.token_type 3934 3935 if self._match(TokenType.COLON): 3936 expressions: t.List[exp.Expression] = [ 3937 self.expression(exp.Slice, expression=self._parse_conjunction()) 3938 ] 3939 else: 3940 expressions = self._parse_csv( 3941 lambda: self._parse_slice( 3942 self._parse_alias(self._parse_conjunction(), explicit=True) 3943 ) 3944 ) 3945 3946 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3947 if bracket_kind == TokenType.L_BRACE: 3948 this = self.expression(exp.Struct, expressions=expressions) 3949 elif not this or this.name.upper() == "ARRAY": 3950 this = self.expression(exp.Array, expressions=expressions) 3951 else: 3952 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3953 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3954 3955 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3956 self.raise_error("Expected ]") 3957 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3958 self.raise_error("Expected }") 3959 3960 self._add_comments(this) 3961 return self._parse_bracket(this) 3962 3963 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3964 if self._match(TokenType.COLON): 3965 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3966 return this 3967 3968 def _parse_case(self) -> t.Optional[exp.Expression]: 3969 ifs = [] 3970 default = None 3971 3972 comments = self._prev_comments 3973 expression = self._parse_conjunction() 3974 3975 while self._match(TokenType.WHEN): 3976 this = self._parse_conjunction() 3977 self._match(TokenType.THEN) 3978 then = self._parse_conjunction() 3979 ifs.append(self.expression(exp.If, this=this, true=then)) 3980 3981 if self._match(TokenType.ELSE): 3982 default = self._parse_conjunction() 3983 3984 if not self._match(TokenType.END): 3985 self.raise_error("Expected END after CASE", self._prev) 3986 3987 return self._parse_window( 3988 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3989 ) 3990 3991 def _parse_if(self) -> t.Optional[exp.Expression]: 3992 if self._match(TokenType.L_PAREN): 3993 args = self._parse_csv(self._parse_conjunction) 3994 this = self.validate_expression(exp.If.from_arg_list(args), args) 3995 self._match_r_paren() 3996 else: 3997 index = self._index - 1 3998 condition = self._parse_conjunction() 3999 4000 if not condition: 4001 self._retreat(index) 4002 return None 4003 4004 self._match(TokenType.THEN) 4005 true = self._parse_conjunction() 4006 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4007 self._match(TokenType.END) 4008 this = self.expression(exp.If, this=condition, true=true, false=false) 4009 4010 return self._parse_window(this) 4011 4012 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4013 if not self._match_text_seq("VALUE", "FOR"): 4014 self._retreat(self._index - 1) 4015 return None 4016 4017 return self.expression( 4018 exp.NextValueFor, 4019 this=self._parse_column(), 4020 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4021 ) 4022 4023 def _parse_extract(self) -> exp.Extract: 4024 this = self._parse_function() or self._parse_var() or self._parse_type() 4025 4026 if self._match(TokenType.FROM): 4027 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4028 4029 if not self._match(TokenType.COMMA): 4030 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4031 4032 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4033 4034 def _parse_any_value(self) -> exp.AnyValue: 4035 this = self._parse_lambda() 4036 is_max = None 4037 having = None 4038 4039 if self._match(TokenType.HAVING): 4040 self._match_texts(("MAX", "MIN")) 4041 is_max = self._prev.text == "MAX" 4042 having = self._parse_column() 4043 4044 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4045 4046 def _parse_cast(self, strict: bool) -> exp.Expression: 4047 this = self._parse_conjunction() 4048 4049 if not self._match(TokenType.ALIAS): 4050 if self._match(TokenType.COMMA): 4051 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4052 4053 self.raise_error("Expected AS after CAST") 4054 4055 fmt = None 4056 to = self._parse_types() 4057 4058 if not to: 4059 self.raise_error("Expected TYPE after CAST") 4060 elif isinstance(to, exp.Identifier): 4061 to = exp.DataType.build(to.name, udt=True) 4062 elif to.this == exp.DataType.Type.CHAR: 4063 if self._match(TokenType.CHARACTER_SET): 4064 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4065 elif self._match(TokenType.FORMAT): 4066 fmt_string = self._parse_string() 4067 fmt = self._parse_at_time_zone(fmt_string) 4068 4069 if to.this in exp.DataType.TEMPORAL_TYPES: 4070 this = self.expression( 4071 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4072 this=this, 4073 format=exp.Literal.string( 4074 format_time( 4075 fmt_string.this if fmt_string else "", 4076 self.FORMAT_MAPPING or self.TIME_MAPPING, 4077 self.FORMAT_TRIE or self.TIME_TRIE, 4078 ) 4079 ), 4080 ) 4081 4082 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4083 this.set("zone", fmt.args["zone"]) 4084 4085 return this 4086 4087 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4088 4089 def _parse_concat(self) -> t.Optional[exp.Expression]: 4090 args = self._parse_csv(self._parse_conjunction) 4091 if self.CONCAT_NULL_OUTPUTS_STRING: 4092 args = self._ensure_string_if_null(args) 4093 4094 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4095 # we find such a call we replace it with its argument. 4096 if len(args) == 1: 4097 return args[0] 4098 4099 return self.expression( 4100 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4101 ) 4102 4103 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4104 args = self._parse_csv(self._parse_conjunction) 4105 if len(args) < 2: 4106 return self.expression(exp.ConcatWs, expressions=args) 4107 delim, *values = args 4108 if self.CONCAT_NULL_OUTPUTS_STRING: 4109 values = self._ensure_string_if_null(values) 4110 4111 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4112 4113 def _parse_string_agg(self) -> exp.Expression: 4114 if self._match(TokenType.DISTINCT): 4115 args: t.List[t.Optional[exp.Expression]] = [ 4116 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4117 ] 4118 if self._match(TokenType.COMMA): 4119 args.extend(self._parse_csv(self._parse_conjunction)) 4120 else: 4121 args = self._parse_csv(self._parse_conjunction) # type: ignore 4122 4123 index = self._index 4124 if not self._match(TokenType.R_PAREN) and args: 4125 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4126 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4127 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4128 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4129 4130 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4131 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4132 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4133 if not self._match_text_seq("WITHIN", "GROUP"): 4134 self._retreat(index) 4135 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4136 4137 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4138 order = self._parse_order(this=seq_get(args, 0)) 4139 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4140 4141 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4142 this = self._parse_bitwise() 4143 4144 if self._match(TokenType.USING): 4145 to: t.Optional[exp.Expression] = self.expression( 4146 exp.CharacterSet, this=self._parse_var() 4147 ) 4148 elif self._match(TokenType.COMMA): 4149 to = self._parse_types() 4150 else: 4151 to = None 4152 4153 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4154 4155 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4156 """ 4157 There are generally two variants of the DECODE function: 4158 4159 - DECODE(bin, charset) 4160 - DECODE(expression, search, result [, search, result] ... [, default]) 4161 4162 The second variant will always be parsed into a CASE expression. Note that NULL 4163 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4164 instead of relying on pattern matching. 4165 """ 4166 args = self._parse_csv(self._parse_conjunction) 4167 4168 if len(args) < 3: 4169 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4170 4171 expression, *expressions = args 4172 if not expression: 4173 return None 4174 4175 ifs = [] 4176 for search, result in zip(expressions[::2], expressions[1::2]): 4177 if not search or not result: 4178 return None 4179 4180 if isinstance(search, exp.Literal): 4181 ifs.append( 4182 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4183 ) 4184 elif isinstance(search, exp.Null): 4185 ifs.append( 4186 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4187 ) 4188 else: 4189 cond = exp.or_( 4190 exp.EQ(this=expression.copy(), expression=search), 4191 exp.and_( 4192 exp.Is(this=expression.copy(), expression=exp.Null()), 4193 exp.Is(this=search.copy(), expression=exp.Null()), 4194 copy=False, 4195 ), 4196 copy=False, 4197 ) 4198 ifs.append(exp.If(this=cond, true=result)) 4199 4200 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4201 4202 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4203 self._match_text_seq("KEY") 4204 key = self._parse_column() 4205 self._match_set((TokenType.COLON, TokenType.COMMA)) 4206 self._match_text_seq("VALUE") 4207 value = self._parse_bitwise() 4208 4209 if not key and not value: 4210 return None 4211 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4212 4213 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4214 if not this or not self._match_text_seq("FORMAT", "JSON"): 4215 return this 4216 4217 return self.expression(exp.FormatJson, this=this) 4218 4219 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4220 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4221 for value in values: 4222 if self._match_text_seq(value, "ON", on): 4223 return f"{value} ON {on}" 4224 4225 return None 4226 4227 def _parse_json_object(self) -> exp.JSONObject: 4228 star = self._parse_star() 4229 expressions = ( 4230 [star] 4231 if star 4232 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4233 ) 4234 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4235 4236 unique_keys = None 4237 if self._match_text_seq("WITH", "UNIQUE"): 4238 unique_keys = True 4239 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4240 unique_keys = False 4241 4242 self._match_text_seq("KEYS") 4243 4244 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4245 self._parse_type() 4246 ) 4247 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4248 4249 return self.expression( 4250 exp.JSONObject, 4251 expressions=expressions, 4252 null_handling=null_handling, 4253 unique_keys=unique_keys, 4254 return_type=return_type, 4255 encoding=encoding, 4256 ) 4257 4258 def _parse_logarithm(self) -> exp.Func: 4259 # Default argument order is base, expression 4260 args = self._parse_csv(self._parse_range) 4261 4262 if len(args) > 1: 4263 if not self.LOG_BASE_FIRST: 4264 args.reverse() 4265 return exp.Log.from_arg_list(args) 4266 4267 return self.expression( 4268 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4269 ) 4270 4271 def _parse_match_against(self) -> exp.MatchAgainst: 4272 expressions = self._parse_csv(self._parse_column) 4273 4274 self._match_text_seq(")", "AGAINST", "(") 4275 4276 this = self._parse_string() 4277 4278 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4279 modifier = "IN NATURAL LANGUAGE MODE" 4280 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4281 modifier = f"{modifier} WITH QUERY EXPANSION" 4282 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4283 modifier = "IN BOOLEAN MODE" 4284 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4285 modifier = "WITH QUERY EXPANSION" 4286 else: 4287 modifier = None 4288 4289 return self.expression( 4290 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4291 ) 4292 4293 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4294 def _parse_open_json(self) -> exp.OpenJSON: 4295 this = self._parse_bitwise() 4296 path = self._match(TokenType.COMMA) and self._parse_string() 4297 4298 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4299 this = self._parse_field(any_token=True) 4300 kind = self._parse_types() 4301 path = self._parse_string() 4302 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4303 4304 return self.expression( 4305 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4306 ) 4307 4308 expressions = None 4309 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4310 self._match_l_paren() 4311 expressions = self._parse_csv(_parse_open_json_column_def) 4312 4313 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4314 4315 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4316 args = self._parse_csv(self._parse_bitwise) 4317 4318 if self._match(TokenType.IN): 4319 return self.expression( 4320 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4321 ) 4322 4323 if haystack_first: 4324 haystack = seq_get(args, 0) 4325 needle = seq_get(args, 1) 4326 else: 4327 needle = seq_get(args, 0) 4328 haystack = seq_get(args, 1) 4329 4330 return self.expression( 4331 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4332 ) 4333 4334 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4335 args = self._parse_csv(self._parse_table) 4336 return exp.JoinHint(this=func_name.upper(), expressions=args) 4337 4338 def _parse_substring(self) -> exp.Substring: 4339 # Postgres supports the form: substring(string [from int] [for int]) 4340 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4341 4342 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4343 4344 if self._match(TokenType.FROM): 4345 args.append(self._parse_bitwise()) 4346 if self._match(TokenType.FOR): 4347 args.append(self._parse_bitwise()) 4348 4349 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4350 4351 def _parse_trim(self) -> exp.Trim: 4352 # https://www.w3resource.com/sql/character-functions/trim.php 4353 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4354 4355 position = None 4356 collation = None 4357 4358 if self._match_texts(self.TRIM_TYPES): 4359 position = self._prev.text.upper() 4360 4361 expression = self._parse_bitwise() 4362 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4363 this = self._parse_bitwise() 4364 else: 4365 this = expression 4366 expression = None 4367 4368 if self._match(TokenType.COLLATE): 4369 collation = self._parse_bitwise() 4370 4371 return self.expression( 4372 exp.Trim, this=this, position=position, expression=expression, collation=collation 4373 ) 4374 4375 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4376 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4377 4378 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4379 return self._parse_window(self._parse_id_var(), alias=True) 4380 4381 def _parse_respect_or_ignore_nulls( 4382 self, this: t.Optional[exp.Expression] 4383 ) -> t.Optional[exp.Expression]: 4384 if self._match_text_seq("IGNORE", "NULLS"): 4385 return self.expression(exp.IgnoreNulls, this=this) 4386 if self._match_text_seq("RESPECT", "NULLS"): 4387 return self.expression(exp.RespectNulls, this=this) 4388 return this 4389 4390 def _parse_window( 4391 self, this: t.Optional[exp.Expression], alias: bool = False 4392 ) -> t.Optional[exp.Expression]: 4393 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4394 self._match(TokenType.WHERE) 4395 this = self.expression( 4396 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4397 ) 4398 self._match_r_paren() 4399 4400 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4401 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4402 if self._match_text_seq("WITHIN", "GROUP"): 4403 order = self._parse_wrapped(self._parse_order) 4404 this = self.expression(exp.WithinGroup, this=this, expression=order) 4405 4406 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4407 # Some dialects choose to implement and some do not. 4408 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4409 4410 # There is some code above in _parse_lambda that handles 4411 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4412 4413 # The below changes handle 4414 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4415 4416 # Oracle allows both formats 4417 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4418 # and Snowflake chose to do the same for familiarity 4419 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4420 this = self._parse_respect_or_ignore_nulls(this) 4421 4422 # bigquery select from window x AS (partition by ...) 4423 if alias: 4424 over = None 4425 self._match(TokenType.ALIAS) 4426 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4427 return this 4428 else: 4429 over = self._prev.text.upper() 4430 4431 if not self._match(TokenType.L_PAREN): 4432 return self.expression( 4433 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4434 ) 4435 4436 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4437 4438 first = self._match(TokenType.FIRST) 4439 if self._match_text_seq("LAST"): 4440 first = False 4441 4442 partition, order = self._parse_partition_and_order() 4443 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4444 4445 if kind: 4446 self._match(TokenType.BETWEEN) 4447 start = self._parse_window_spec() 4448 self._match(TokenType.AND) 4449 end = self._parse_window_spec() 4450 4451 spec = self.expression( 4452 exp.WindowSpec, 4453 kind=kind, 4454 start=start["value"], 4455 start_side=start["side"], 4456 end=end["value"], 4457 end_side=end["side"], 4458 ) 4459 else: 4460 spec = None 4461 4462 self._match_r_paren() 4463 4464 window = self.expression( 4465 exp.Window, 4466 this=this, 4467 partition_by=partition, 4468 order=order, 4469 spec=spec, 4470 alias=window_alias, 4471 over=over, 4472 first=first, 4473 ) 4474 4475 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4476 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4477 return self._parse_window(window, alias=alias) 4478 4479 return window 4480 4481 def _parse_partition_and_order( 4482 self, 4483 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4484 return self._parse_partition_by(), self._parse_order() 4485 4486 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4487 self._match(TokenType.BETWEEN) 4488 4489 return { 4490 "value": ( 4491 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4492 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4493 or self._parse_bitwise() 4494 ), 4495 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4496 } 4497 4498 def _parse_alias( 4499 self, this: t.Optional[exp.Expression], explicit: bool = False 4500 ) -> t.Optional[exp.Expression]: 4501 any_token = self._match(TokenType.ALIAS) 4502 4503 if explicit and not any_token: 4504 return this 4505 4506 if self._match(TokenType.L_PAREN): 4507 aliases = self.expression( 4508 exp.Aliases, 4509 this=this, 4510 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4511 ) 4512 self._match_r_paren(aliases) 4513 return aliases 4514 4515 alias = self._parse_id_var(any_token) 4516 4517 if alias: 4518 return self.expression(exp.Alias, this=this, alias=alias) 4519 4520 return this 4521 4522 def _parse_id_var( 4523 self, 4524 any_token: bool = True, 4525 tokens: t.Optional[t.Collection[TokenType]] = None, 4526 ) -> t.Optional[exp.Expression]: 4527 identifier = self._parse_identifier() 4528 4529 if identifier: 4530 return identifier 4531 4532 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4533 quoted = self._prev.token_type == TokenType.STRING 4534 return exp.Identifier(this=self._prev.text, quoted=quoted) 4535 4536 return None 4537 4538 def _parse_string(self) -> t.Optional[exp.Expression]: 4539 if self._match(TokenType.STRING): 4540 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4541 return self._parse_placeholder() 4542 4543 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4544 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4545 4546 def _parse_number(self) -> t.Optional[exp.Expression]: 4547 if self._match(TokenType.NUMBER): 4548 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4549 return self._parse_placeholder() 4550 4551 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4552 if self._match(TokenType.IDENTIFIER): 4553 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4554 return self._parse_placeholder() 4555 4556 def _parse_var( 4557 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4558 ) -> t.Optional[exp.Expression]: 4559 if ( 4560 (any_token and self._advance_any()) 4561 or self._match(TokenType.VAR) 4562 or (self._match_set(tokens) if tokens else False) 4563 ): 4564 return self.expression(exp.Var, this=self._prev.text) 4565 return self._parse_placeholder() 4566 4567 def _advance_any(self) -> t.Optional[Token]: 4568 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4569 self._advance() 4570 return self._prev 4571 return None 4572 4573 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4574 return self._parse_var() or self._parse_string() 4575 4576 def _parse_null(self) -> t.Optional[exp.Expression]: 4577 if self._match(TokenType.NULL): 4578 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4579 return self._parse_placeholder() 4580 4581 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4582 if self._match(TokenType.TRUE): 4583 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4584 if self._match(TokenType.FALSE): 4585 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4586 return self._parse_placeholder() 4587 4588 def _parse_star(self) -> t.Optional[exp.Expression]: 4589 if self._match(TokenType.STAR): 4590 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4591 return self._parse_placeholder() 4592 4593 def _parse_parameter(self) -> exp.Parameter: 4594 wrapped = self._match(TokenType.L_BRACE) 4595 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4596 self._match(TokenType.R_BRACE) 4597 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4598 4599 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4600 if self._match_set(self.PLACEHOLDER_PARSERS): 4601 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4602 if placeholder: 4603 return placeholder 4604 self._advance(-1) 4605 return None 4606 4607 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4608 if not self._match(TokenType.EXCEPT): 4609 return None 4610 if self._match(TokenType.L_PAREN, advance=False): 4611 return self._parse_wrapped_csv(self._parse_column) 4612 return self._parse_csv(self._parse_column) 4613 4614 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4615 if not self._match(TokenType.REPLACE): 4616 return None 4617 if self._match(TokenType.L_PAREN, advance=False): 4618 return self._parse_wrapped_csv(self._parse_expression) 4619 return self._parse_expressions() 4620 4621 def _parse_csv( 4622 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4623 ) -> t.List[exp.Expression]: 4624 parse_result = parse_method() 4625 items = [parse_result] if parse_result is not None else [] 4626 4627 while self._match(sep): 4628 self._add_comments(parse_result) 4629 parse_result = parse_method() 4630 if parse_result is not None: 4631 items.append(parse_result) 4632 4633 return items 4634 4635 def _parse_tokens( 4636 self, parse_method: t.Callable, expressions: t.Dict 4637 ) -> t.Optional[exp.Expression]: 4638 this = parse_method() 4639 4640 while self._match_set(expressions): 4641 this = self.expression( 4642 expressions[self._prev.token_type], 4643 this=this, 4644 comments=self._prev_comments, 4645 expression=parse_method(), 4646 ) 4647 4648 return this 4649 4650 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4651 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4652 4653 def _parse_wrapped_csv( 4654 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4655 ) -> t.List[exp.Expression]: 4656 return self._parse_wrapped( 4657 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4658 ) 4659 4660 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4661 wrapped = self._match(TokenType.L_PAREN) 4662 if not wrapped and not optional: 4663 self.raise_error("Expecting (") 4664 parse_result = parse_method() 4665 if wrapped: 4666 self._match_r_paren() 4667 return parse_result 4668 4669 def _parse_expressions(self) -> t.List[exp.Expression]: 4670 return self._parse_csv(self._parse_expression) 4671 4672 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4673 return self._parse_select() or self._parse_set_operations( 4674 self._parse_expression() if alias else self._parse_conjunction() 4675 ) 4676 4677 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4678 return self._parse_query_modifiers( 4679 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4680 ) 4681 4682 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4683 this = None 4684 if self._match_texts(self.TRANSACTION_KIND): 4685 this = self._prev.text 4686 4687 self._match_texts({"TRANSACTION", "WORK"}) 4688 4689 modes = [] 4690 while True: 4691 mode = [] 4692 while self._match(TokenType.VAR): 4693 mode.append(self._prev.text) 4694 4695 if mode: 4696 modes.append(" ".join(mode)) 4697 if not self._match(TokenType.COMMA): 4698 break 4699 4700 return self.expression(exp.Transaction, this=this, modes=modes) 4701 4702 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4703 chain = None 4704 savepoint = None 4705 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4706 4707 self._match_texts({"TRANSACTION", "WORK"}) 4708 4709 if self._match_text_seq("TO"): 4710 self._match_text_seq("SAVEPOINT") 4711 savepoint = self._parse_id_var() 4712 4713 if self._match(TokenType.AND): 4714 chain = not self._match_text_seq("NO") 4715 self._match_text_seq("CHAIN") 4716 4717 if is_rollback: 4718 return self.expression(exp.Rollback, savepoint=savepoint) 4719 4720 return self.expression(exp.Commit, chain=chain) 4721 4722 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4723 if not self._match_text_seq("ADD"): 4724 return None 4725 4726 self._match(TokenType.COLUMN) 4727 exists_column = self._parse_exists(not_=True) 4728 expression = self._parse_field_def() 4729 4730 if expression: 4731 expression.set("exists", exists_column) 4732 4733 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4734 if self._match_texts(("FIRST", "AFTER")): 4735 position = self._prev.text 4736 column_position = self.expression( 4737 exp.ColumnPosition, this=self._parse_column(), position=position 4738 ) 4739 expression.set("position", column_position) 4740 4741 return expression 4742 4743 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4744 drop = self._match(TokenType.DROP) and self._parse_drop() 4745 if drop and not isinstance(drop, exp.Command): 4746 drop.set("kind", drop.args.get("kind", "COLUMN")) 4747 return drop 4748 4749 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4750 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4751 return self.expression( 4752 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4753 ) 4754 4755 def _parse_add_constraint(self) -> exp.AddConstraint: 4756 this = None 4757 kind = self._prev.token_type 4758 4759 if kind == TokenType.CONSTRAINT: 4760 this = self._parse_id_var() 4761 4762 if self._match_text_seq("CHECK"): 4763 expression = self._parse_wrapped(self._parse_conjunction) 4764 enforced = self._match_text_seq("ENFORCED") 4765 4766 return self.expression( 4767 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4768 ) 4769 4770 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4771 expression = self._parse_foreign_key() 4772 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4773 expression = self._parse_primary_key() 4774 else: 4775 expression = None 4776 4777 return self.expression(exp.AddConstraint, this=this, expression=expression) 4778 4779 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4780 index = self._index - 1 4781 4782 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4783 return self._parse_csv(self._parse_add_constraint) 4784 4785 self._retreat(index) 4786 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4787 return self._parse_csv(self._parse_field_def) 4788 4789 return self._parse_csv(self._parse_add_column) 4790 4791 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4792 self._match(TokenType.COLUMN) 4793 column = self._parse_field(any_token=True) 4794 4795 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4796 return self.expression(exp.AlterColumn, this=column, drop=True) 4797 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4798 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4799 4800 self._match_text_seq("SET", "DATA") 4801 return self.expression( 4802 exp.AlterColumn, 4803 this=column, 4804 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4805 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4806 using=self._match(TokenType.USING) and self._parse_conjunction(), 4807 ) 4808 4809 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4810 index = self._index - 1 4811 4812 partition_exists = self._parse_exists() 4813 if self._match(TokenType.PARTITION, advance=False): 4814 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4815 4816 self._retreat(index) 4817 return self._parse_csv(self._parse_drop_column) 4818 4819 def _parse_alter_table_rename(self) -> exp.RenameTable: 4820 self._match_text_seq("TO") 4821 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4822 4823 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4824 start = self._prev 4825 4826 if not self._match(TokenType.TABLE): 4827 return self._parse_as_command(start) 4828 4829 exists = self._parse_exists() 4830 only = self._match_text_seq("ONLY") 4831 this = self._parse_table(schema=True) 4832 4833 if self._next: 4834 self._advance() 4835 4836 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4837 if parser: 4838 actions = ensure_list(parser(self)) 4839 4840 if not self._curr: 4841 return self.expression( 4842 exp.AlterTable, 4843 this=this, 4844 exists=exists, 4845 actions=actions, 4846 only=only, 4847 ) 4848 4849 return self._parse_as_command(start) 4850 4851 def _parse_merge(self) -> exp.Merge: 4852 self._match(TokenType.INTO) 4853 target = self._parse_table() 4854 4855 if target and self._match(TokenType.ALIAS, advance=False): 4856 target.set("alias", self._parse_table_alias()) 4857 4858 self._match(TokenType.USING) 4859 using = self._parse_table() 4860 4861 self._match(TokenType.ON) 4862 on = self._parse_conjunction() 4863 4864 whens = [] 4865 while self._match(TokenType.WHEN): 4866 matched = not self._match(TokenType.NOT) 4867 self._match_text_seq("MATCHED") 4868 source = ( 4869 False 4870 if self._match_text_seq("BY", "TARGET") 4871 else self._match_text_seq("BY", "SOURCE") 4872 ) 4873 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4874 4875 self._match(TokenType.THEN) 4876 4877 if self._match(TokenType.INSERT): 4878 _this = self._parse_star() 4879 if _this: 4880 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4881 else: 4882 then = self.expression( 4883 exp.Insert, 4884 this=self._parse_value(), 4885 expression=self._match(TokenType.VALUES) and self._parse_value(), 4886 ) 4887 elif self._match(TokenType.UPDATE): 4888 expressions = self._parse_star() 4889 if expressions: 4890 then = self.expression(exp.Update, expressions=expressions) 4891 else: 4892 then = self.expression( 4893 exp.Update, 4894 expressions=self._match(TokenType.SET) 4895 and self._parse_csv(self._parse_equality), 4896 ) 4897 elif self._match(TokenType.DELETE): 4898 then = self.expression(exp.Var, this=self._prev.text) 4899 else: 4900 then = None 4901 4902 whens.append( 4903 self.expression( 4904 exp.When, 4905 matched=matched, 4906 source=source, 4907 condition=condition, 4908 then=then, 4909 ) 4910 ) 4911 4912 return self.expression( 4913 exp.Merge, 4914 this=target, 4915 using=using, 4916 on=on, 4917 expressions=whens, 4918 ) 4919 4920 def _parse_show(self) -> t.Optional[exp.Expression]: 4921 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4922 if parser: 4923 return parser(self) 4924 return self._parse_as_command(self._prev) 4925 4926 def _parse_set_item_assignment( 4927 self, kind: t.Optional[str] = None 4928 ) -> t.Optional[exp.Expression]: 4929 index = self._index 4930 4931 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4932 return self._parse_set_transaction(global_=kind == "GLOBAL") 4933 4934 left = self._parse_primary() or self._parse_id_var() 4935 4936 if not self._match_texts(("=", "TO")): 4937 self._retreat(index) 4938 return None 4939 4940 right = self._parse_statement() or self._parse_id_var() 4941 this = self.expression(exp.EQ, this=left, expression=right) 4942 4943 return self.expression(exp.SetItem, this=this, kind=kind) 4944 4945 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4946 self._match_text_seq("TRANSACTION") 4947 characteristics = self._parse_csv( 4948 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4949 ) 4950 return self.expression( 4951 exp.SetItem, 4952 expressions=characteristics, 4953 kind="TRANSACTION", 4954 **{"global": global_}, # type: ignore 4955 ) 4956 4957 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4958 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4959 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4960 4961 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4962 index = self._index 4963 set_ = self.expression( 4964 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4965 ) 4966 4967 if self._curr: 4968 self._retreat(index) 4969 return self._parse_as_command(self._prev) 4970 4971 return set_ 4972 4973 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4974 for option in options: 4975 if self._match_text_seq(*option.split(" ")): 4976 return exp.var(option) 4977 return None 4978 4979 def _parse_as_command(self, start: Token) -> exp.Command: 4980 while self._curr: 4981 self._advance() 4982 text = self._find_sql(start, self._prev) 4983 size = len(start.text) 4984 return exp.Command(this=text[:size], expression=text[size:]) 4985 4986 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4987 settings = [] 4988 4989 self._match_l_paren() 4990 kind = self._parse_id_var() 4991 4992 if self._match(TokenType.L_PAREN): 4993 while True: 4994 key = self._parse_id_var() 4995 value = self._parse_primary() 4996 4997 if not key and value is None: 4998 break 4999 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5000 self._match(TokenType.R_PAREN) 5001 5002 self._match_r_paren() 5003 5004 return self.expression( 5005 exp.DictProperty, 5006 this=this, 5007 kind=kind.this if kind else None, 5008 settings=settings, 5009 ) 5010 5011 def _parse_dict_range(self, this: str) -> exp.DictRange: 5012 self._match_l_paren() 5013 has_min = self._match_text_seq("MIN") 5014 if has_min: 5015 min = self._parse_var() or self._parse_primary() 5016 self._match_text_seq("MAX") 5017 max = self._parse_var() or self._parse_primary() 5018 else: 5019 max = self._parse_var() or self._parse_primary() 5020 min = exp.Literal.number(0) 5021 self._match_r_paren() 5022 return self.expression(exp.DictRange, this=this, min=min, max=max) 5023 5024 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5025 index = self._index 5026 expression = self._parse_column() 5027 if not self._match(TokenType.IN): 5028 self._retreat(index - 1) 5029 return None 5030 iterator = self._parse_column() 5031 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5032 return self.expression( 5033 exp.Comprehension, 5034 this=this, 5035 expression=expression, 5036 iterator=iterator, 5037 condition=condition, 5038 ) 5039 5040 def _find_parser( 5041 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5042 ) -> t.Optional[t.Callable]: 5043 if not self._curr: 5044 return None 5045 5046 index = self._index 5047 this = [] 5048 while True: 5049 # The current token might be multiple words 5050 curr = self._curr.text.upper() 5051 key = curr.split(" ") 5052 this.append(curr) 5053 5054 self._advance() 5055 result, trie = in_trie(trie, key) 5056 if result == TrieResult.FAILED: 5057 break 5058 5059 if result == TrieResult.EXISTS: 5060 subparser = parsers[" ".join(this)] 5061 return subparser 5062 5063 self._retreat(index) 5064 return None 5065 5066 def _match(self, token_type, advance=True, expression=None): 5067 if not self._curr: 5068 return None 5069 5070 if self._curr.token_type == token_type: 5071 if advance: 5072 self._advance() 5073 self._add_comments(expression) 5074 return True 5075 5076 return None 5077 5078 def _match_set(self, types, advance=True): 5079 if not self._curr: 5080 return None 5081 5082 if self._curr.token_type in types: 5083 if advance: 5084 self._advance() 5085 return True 5086 5087 return None 5088 5089 def _match_pair(self, token_type_a, token_type_b, advance=True): 5090 if not self._curr or not self._next: 5091 return None 5092 5093 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5094 if advance: 5095 self._advance(2) 5096 return True 5097 5098 return None 5099 5100 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5101 if not self._match(TokenType.L_PAREN, expression=expression): 5102 self.raise_error("Expecting (") 5103 5104 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5105 if not self._match(TokenType.R_PAREN, expression=expression): 5106 self.raise_error("Expecting )") 5107 5108 def _match_texts(self, texts, advance=True): 5109 if self._curr and self._curr.text.upper() in texts: 5110 if advance: 5111 self._advance() 5112 return True 5113 return False 5114 5115 def _match_text_seq(self, *texts, advance=True): 5116 index = self._index 5117 for text in texts: 5118 if self._curr and self._curr.text.upper() == text: 5119 self._advance() 5120 else: 5121 self._retreat(index) 5122 return False 5123 5124 if not advance: 5125 self._retreat(index) 5126 5127 return True 5128 5129 @t.overload 5130 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5131 ... 5132 5133 @t.overload 5134 def _replace_columns_with_dots( 5135 self, this: t.Optional[exp.Expression] 5136 ) -> t.Optional[exp.Expression]: 5137 ... 5138 5139 def _replace_columns_with_dots(self, this): 5140 if isinstance(this, exp.Dot): 5141 exp.replace_children(this, self._replace_columns_with_dots) 5142 elif isinstance(this, exp.Column): 5143 exp.replace_children(this, self._replace_columns_with_dots) 5144 table = this.args.get("table") 5145 this = ( 5146 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5147 ) 5148 5149 return this 5150 5151 def _replace_lambda( 5152 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5153 ) -> t.Optional[exp.Expression]: 5154 if not node: 5155 return node 5156 5157 for column in node.find_all(exp.Column): 5158 if column.parts[0].name in lambda_variables: 5159 dot_or_id = column.to_dot() if column.table else column.this 5160 parent = column.parent 5161 5162 while isinstance(parent, exp.Dot): 5163 if not isinstance(parent.parent, exp.Dot): 5164 parent.replace(dot_or_id) 5165 break 5166 parent = parent.parent 5167 else: 5168 if column is node: 5169 node = dot_or_id 5170 else: 5171 column.replace(dot_or_id) 5172 return node 5173 5174 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5175 return [ 5176 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5177 for value in values 5178 if value 5179 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
908 def __init__( 909 self, 910 error_level: t.Optional[ErrorLevel] = None, 911 error_message_context: int = 100, 912 max_errors: int = 3, 913 ): 914 self.error_level = error_level or ErrorLevel.IMMEDIATE 915 self.error_message_context = error_message_context 916 self.max_errors = max_errors 917 self._tokenizer = self.TOKENIZER_CLASS() 918 self.reset()
930 def parse( 931 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 932 ) -> t.List[t.Optional[exp.Expression]]: 933 """ 934 Parses a list of tokens and returns a list of syntax trees, one tree 935 per parsed SQL statement. 936 937 Args: 938 raw_tokens: The list of tokens. 939 sql: The original SQL string, used to produce helpful debug messages. 940 941 Returns: 942 The list of the produced syntax trees. 943 """ 944 return self._parse( 945 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 946 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
948 def parse_into( 949 self, 950 expression_types: exp.IntoType, 951 raw_tokens: t.List[Token], 952 sql: t.Optional[str] = None, 953 ) -> t.List[t.Optional[exp.Expression]]: 954 """ 955 Parses a list of tokens into a given Expression type. If a collection of Expression 956 types is given instead, this method will try to parse the token list into each one 957 of them, stopping at the first for which the parsing succeeds. 958 959 Args: 960 expression_types: The expression type(s) to try and parse the token list into. 961 raw_tokens: The list of tokens. 962 sql: The original SQL string, used to produce helpful debug messages. 963 964 Returns: 965 The target Expression. 966 """ 967 errors = [] 968 for expression_type in ensure_list(expression_types): 969 parser = self.EXPRESSION_PARSERS.get(expression_type) 970 if not parser: 971 raise TypeError(f"No parser registered for {expression_type}") 972 973 try: 974 return self._parse(parser, raw_tokens, sql) 975 except ParseError as e: 976 e.errors[0]["into_expression"] = expression_type 977 errors.append(e) 978 979 raise ParseError( 980 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 981 errors=merge_errors(errors), 982 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1019 def check_errors(self) -> None: 1020 """Logs or raises any found errors, depending on the chosen error level setting.""" 1021 if self.error_level == ErrorLevel.WARN: 1022 for error in self.errors: 1023 logger.error(str(error)) 1024 elif self.error_level == ErrorLevel.RAISE and self.errors: 1025 raise ParseError( 1026 concat_messages(self.errors, self.max_errors), 1027 errors=merge_errors(self.errors), 1028 )
Logs or raises any found errors, depending on the chosen error level setting.
1030 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1031 """ 1032 Appends an error in the list of recorded errors or raises it, depending on the chosen 1033 error level setting. 1034 """ 1035 token = token or self._curr or self._prev or Token.string("") 1036 start = token.start 1037 end = token.end + 1 1038 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1039 highlight = self.sql[start:end] 1040 end_context = self.sql[end : end + self.error_message_context] 1041 1042 error = ParseError.new( 1043 f"{message}. Line {token.line}, Col: {token.col}.\n" 1044 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1045 description=message, 1046 line=token.line, 1047 col=token.col, 1048 start_context=start_context, 1049 highlight=highlight, 1050 end_context=end_context, 1051 ) 1052 1053 if self.error_level == ErrorLevel.IMMEDIATE: 1054 raise error 1055 1056 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1058 def expression( 1059 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1060 ) -> E: 1061 """ 1062 Creates a new, validated Expression. 1063 1064 Args: 1065 exp_class: The expression class to instantiate. 1066 comments: An optional list of comments to attach to the expression. 1067 kwargs: The arguments to set for the expression along with their respective values. 1068 1069 Returns: 1070 The target expression. 1071 """ 1072 instance = exp_class(**kwargs) 1073 instance.add_comments(comments) if comments else self._add_comments(instance) 1074 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1081 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1082 """ 1083 Validates an Expression, making sure that all its mandatory arguments are set. 1084 1085 Args: 1086 expression: The expression to validate. 1087 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1088 1089 Returns: 1090 The validated expression. 1091 """ 1092 if self.error_level != ErrorLevel.IGNORE: 1093 for error_message in expression.error_messages(args): 1094 self.raise_error(error_message) 1095 1096 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.