sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.UDECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 TokenType.DECIMAL: TokenType.UDECIMAL, 220 } 221 222 SUBQUERY_PREDICATES = { 223 TokenType.ANY: exp.Any, 224 TokenType.ALL: exp.All, 225 TokenType.EXISTS: exp.Exists, 226 TokenType.SOME: exp.Any, 227 } 228 229 RESERVED_KEYWORDS = { 230 *Tokenizer.SINGLE_TOKENS.values(), 231 TokenType.SELECT, 232 } 233 234 DB_CREATABLES = { 235 TokenType.DATABASE, 236 TokenType.SCHEMA, 237 TokenType.TABLE, 238 TokenType.VIEW, 239 TokenType.DICTIONARY, 240 } 241 242 CREATABLES = { 243 TokenType.COLUMN, 244 TokenType.FUNCTION, 245 TokenType.INDEX, 246 TokenType.PROCEDURE, 247 *DB_CREATABLES, 248 } 249 250 # Tokens that can represent identifiers 251 ID_VAR_TOKENS = { 252 TokenType.VAR, 253 TokenType.ANTI, 254 TokenType.APPLY, 255 TokenType.ASC, 256 TokenType.AUTO_INCREMENT, 257 TokenType.BEGIN, 258 TokenType.CACHE, 259 TokenType.CASE, 260 TokenType.COLLATE, 261 TokenType.COMMAND, 262 TokenType.COMMENT, 263 TokenType.COMMIT, 264 TokenType.CONSTRAINT, 265 TokenType.DEFAULT, 266 TokenType.DELETE, 267 TokenType.DESC, 268 TokenType.DESCRIBE, 269 TokenType.DICTIONARY, 270 TokenType.DIV, 271 TokenType.END, 272 TokenType.EXECUTE, 273 TokenType.ESCAPE, 274 TokenType.FALSE, 275 TokenType.FIRST, 276 TokenType.FILTER, 277 TokenType.FORMAT, 278 TokenType.FULL, 279 TokenType.IS, 280 TokenType.ISNULL, 281 TokenType.INTERVAL, 282 TokenType.KEEP, 283 TokenType.KILL, 284 TokenType.LEFT, 285 TokenType.LOAD, 286 TokenType.MERGE, 287 TokenType.NATURAL, 288 TokenType.NEXT, 289 TokenType.OFFSET, 290 TokenType.ORDINALITY, 291 TokenType.OVERLAPS, 292 TokenType.OVERWRITE, 293 TokenType.PARTITION, 294 TokenType.PERCENT, 295 TokenType.PIVOT, 296 TokenType.PRAGMA, 297 TokenType.RANGE, 298 TokenType.REFERENCES, 299 TokenType.RIGHT, 300 TokenType.ROW, 301 TokenType.ROWS, 302 TokenType.SEMI, 303 TokenType.SET, 304 TokenType.SETTINGS, 305 TokenType.SHOW, 306 TokenType.TEMPORARY, 307 TokenType.TOP, 308 TokenType.TRUE, 309 TokenType.UNIQUE, 310 TokenType.UNPIVOT, 311 TokenType.UPDATE, 312 TokenType.VOLATILE, 313 TokenType.WINDOW, 314 *CREATABLES, 315 *SUBQUERY_PREDICATES, 316 *TYPE_TOKENS, 317 *NO_PAREN_FUNCTIONS, 318 } 319 320 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 321 322 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 323 TokenType.ANTI, 324 TokenType.APPLY, 325 TokenType.ASOF, 326 TokenType.FULL, 327 TokenType.LEFT, 328 TokenType.LOCK, 329 TokenType.NATURAL, 330 TokenType.OFFSET, 331 TokenType.RIGHT, 332 TokenType.SEMI, 333 TokenType.WINDOW, 334 } 335 336 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 337 338 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 339 340 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 341 342 FUNC_TOKENS = { 343 TokenType.COLLATE, 344 TokenType.COMMAND, 345 TokenType.CURRENT_DATE, 346 TokenType.CURRENT_DATETIME, 347 TokenType.CURRENT_TIMESTAMP, 348 TokenType.CURRENT_TIME, 349 TokenType.CURRENT_USER, 350 TokenType.FILTER, 351 TokenType.FIRST, 352 TokenType.FORMAT, 353 TokenType.GLOB, 354 TokenType.IDENTIFIER, 355 TokenType.INDEX, 356 TokenType.ISNULL, 357 TokenType.ILIKE, 358 TokenType.INSERT, 359 TokenType.LIKE, 360 TokenType.MERGE, 361 TokenType.OFFSET, 362 TokenType.PRIMARY_KEY, 363 TokenType.RANGE, 364 TokenType.REPLACE, 365 TokenType.RLIKE, 366 TokenType.ROW, 367 TokenType.UNNEST, 368 TokenType.VAR, 369 TokenType.LEFT, 370 TokenType.RIGHT, 371 TokenType.DATE, 372 TokenType.DATETIME, 373 TokenType.TABLE, 374 TokenType.TIMESTAMP, 375 TokenType.TIMESTAMPTZ, 376 TokenType.WINDOW, 377 TokenType.XOR, 378 *TYPE_TOKENS, 379 *SUBQUERY_PREDICATES, 380 } 381 382 CONJUNCTION = { 383 TokenType.AND: exp.And, 384 TokenType.OR: exp.Or, 385 } 386 387 EQUALITY = { 388 TokenType.EQ: exp.EQ, 389 TokenType.NEQ: exp.NEQ, 390 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 391 } 392 393 COMPARISON = { 394 TokenType.GT: exp.GT, 395 TokenType.GTE: exp.GTE, 396 TokenType.LT: exp.LT, 397 TokenType.LTE: exp.LTE, 398 } 399 400 BITWISE = { 401 TokenType.AMP: exp.BitwiseAnd, 402 TokenType.CARET: exp.BitwiseXor, 403 TokenType.PIPE: exp.BitwiseOr, 404 TokenType.DPIPE: exp.DPipe, 405 } 406 407 TERM = { 408 TokenType.DASH: exp.Sub, 409 TokenType.PLUS: exp.Add, 410 TokenType.MOD: exp.Mod, 411 TokenType.COLLATE: exp.Collate, 412 } 413 414 FACTOR = { 415 TokenType.DIV: exp.IntDiv, 416 TokenType.LR_ARROW: exp.Distance, 417 TokenType.SLASH: exp.Div, 418 TokenType.STAR: exp.Mul, 419 } 420 421 TIMES = { 422 TokenType.TIME, 423 TokenType.TIMETZ, 424 } 425 426 TIMESTAMPS = { 427 TokenType.TIMESTAMP, 428 TokenType.TIMESTAMPTZ, 429 TokenType.TIMESTAMPLTZ, 430 *TIMES, 431 } 432 433 SET_OPERATIONS = { 434 TokenType.UNION, 435 TokenType.INTERSECT, 436 TokenType.EXCEPT, 437 } 438 439 JOIN_METHODS = { 440 TokenType.NATURAL, 441 TokenType.ASOF, 442 } 443 444 JOIN_SIDES = { 445 TokenType.LEFT, 446 TokenType.RIGHT, 447 TokenType.FULL, 448 } 449 450 JOIN_KINDS = { 451 TokenType.INNER, 452 TokenType.OUTER, 453 TokenType.CROSS, 454 TokenType.SEMI, 455 TokenType.ANTI, 456 } 457 458 JOIN_HINTS: t.Set[str] = set() 459 460 LAMBDAS = { 461 TokenType.ARROW: lambda self, expressions: self.expression( 462 exp.Lambda, 463 this=self._replace_lambda( 464 self._parse_conjunction(), 465 {node.name for node in expressions}, 466 ), 467 expressions=expressions, 468 ), 469 TokenType.FARROW: lambda self, expressions: self.expression( 470 exp.Kwarg, 471 this=exp.var(expressions[0].name), 472 expression=self._parse_conjunction(), 473 ), 474 } 475 476 COLUMN_OPERATORS = { 477 TokenType.DOT: None, 478 TokenType.DCOLON: lambda self, this, to: self.expression( 479 exp.Cast if self.STRICT_CAST else exp.TryCast, 480 this=this, 481 to=to, 482 ), 483 TokenType.ARROW: lambda self, this, path: self.expression( 484 exp.JSONExtract, 485 this=this, 486 expression=path, 487 ), 488 TokenType.DARROW: lambda self, this, path: self.expression( 489 exp.JSONExtractScalar, 490 this=this, 491 expression=path, 492 ), 493 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 494 exp.JSONBExtract, 495 this=this, 496 expression=path, 497 ), 498 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 499 exp.JSONBExtractScalar, 500 this=this, 501 expression=path, 502 ), 503 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 504 exp.JSONBContains, 505 this=this, 506 expression=key, 507 ), 508 } 509 510 EXPRESSION_PARSERS = { 511 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 512 exp.Column: lambda self: self._parse_column(), 513 exp.Condition: lambda self: self._parse_conjunction(), 514 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 515 exp.Expression: lambda self: self._parse_statement(), 516 exp.From: lambda self: self._parse_from(), 517 exp.Group: lambda self: self._parse_group(), 518 exp.Having: lambda self: self._parse_having(), 519 exp.Identifier: lambda self: self._parse_id_var(), 520 exp.Join: lambda self: self._parse_join(), 521 exp.Lambda: lambda self: self._parse_lambda(), 522 exp.Lateral: lambda self: self._parse_lateral(), 523 exp.Limit: lambda self: self._parse_limit(), 524 exp.Offset: lambda self: self._parse_offset(), 525 exp.Order: lambda self: self._parse_order(), 526 exp.Ordered: lambda self: self._parse_ordered(), 527 exp.Properties: lambda self: self._parse_properties(), 528 exp.Qualify: lambda self: self._parse_qualify(), 529 exp.Returning: lambda self: self._parse_returning(), 530 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 531 exp.Table: lambda self: self._parse_table_parts(), 532 exp.TableAlias: lambda self: self._parse_table_alias(), 533 exp.Where: lambda self: self._parse_where(), 534 exp.Window: lambda self: self._parse_named_window(), 535 exp.With: lambda self: self._parse_with(), 536 "JOIN_TYPE": lambda self: self._parse_join_parts(), 537 } 538 539 STATEMENT_PARSERS = { 540 TokenType.ALTER: lambda self: self._parse_alter(), 541 TokenType.BEGIN: lambda self: self._parse_transaction(), 542 TokenType.CACHE: lambda self: self._parse_cache(), 543 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 544 TokenType.COMMENT: lambda self: self._parse_comment(), 545 TokenType.CREATE: lambda self: self._parse_create(), 546 TokenType.DELETE: lambda self: self._parse_delete(), 547 TokenType.DESC: lambda self: self._parse_describe(), 548 TokenType.DESCRIBE: lambda self: self._parse_describe(), 549 TokenType.DROP: lambda self: self._parse_drop(), 550 TokenType.INSERT: lambda self: self._parse_insert(), 551 TokenType.KILL: lambda self: self._parse_kill(), 552 TokenType.LOAD: lambda self: self._parse_load(), 553 TokenType.MERGE: lambda self: self._parse_merge(), 554 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 555 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 556 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 557 TokenType.SET: lambda self: self._parse_set(), 558 TokenType.UNCACHE: lambda self: self._parse_uncache(), 559 TokenType.UPDATE: lambda self: self._parse_update(), 560 TokenType.USE: lambda self: self.expression( 561 exp.Use, 562 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 563 and exp.var(self._prev.text), 564 this=self._parse_table(schema=False), 565 ), 566 } 567 568 UNARY_PARSERS = { 569 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 570 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 571 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 572 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 573 } 574 575 PRIMARY_PARSERS = { 576 TokenType.STRING: lambda self, token: self.expression( 577 exp.Literal, this=token.text, is_string=True 578 ), 579 TokenType.NUMBER: lambda self, token: self.expression( 580 exp.Literal, this=token.text, is_string=False 581 ), 582 TokenType.STAR: lambda self, _: self.expression( 583 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 584 ), 585 TokenType.NULL: lambda self, _: self.expression(exp.Null), 586 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 587 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 588 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 589 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 590 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 591 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 592 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 593 exp.National, this=token.text 594 ), 595 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 596 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 597 exp.RawString, this=token.text 598 ), 599 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 600 } 601 602 PLACEHOLDER_PARSERS = { 603 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 604 TokenType.PARAMETER: lambda self: self._parse_parameter(), 605 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 606 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 607 else None, 608 } 609 610 RANGE_PARSERS = { 611 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 612 TokenType.GLOB: binary_range_parser(exp.Glob), 613 TokenType.ILIKE: binary_range_parser(exp.ILike), 614 TokenType.IN: lambda self, this: self._parse_in(this), 615 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 616 TokenType.IS: lambda self, this: self._parse_is(this), 617 TokenType.LIKE: binary_range_parser(exp.Like), 618 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 619 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 620 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 621 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 622 } 623 624 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 625 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 626 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 627 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 628 "CHARACTER SET": lambda self: self._parse_character_set(), 629 "CHECKSUM": lambda self: self._parse_checksum(), 630 "CLUSTER BY": lambda self: self._parse_cluster(), 631 "CLUSTERED": lambda self: self._parse_clustered_by(), 632 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 633 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 634 "COPY": lambda self: self._parse_copy_property(), 635 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 636 "DEFINER": lambda self: self._parse_definer(), 637 "DETERMINISTIC": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 639 ), 640 "DISTKEY": lambda self: self._parse_distkey(), 641 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 642 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 643 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 644 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 645 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 646 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "FREESPACE": lambda self: self._parse_freespace(), 648 "HEAP": lambda self: self.expression(exp.HeapProperty), 649 "IMMUTABLE": lambda self: self.expression( 650 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 651 ), 652 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 653 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 654 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 655 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 656 "LIKE": lambda self: self._parse_create_like(), 657 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 658 "LOCK": lambda self: self._parse_locking(), 659 "LOCKING": lambda self: self._parse_locking(), 660 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 661 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 662 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 663 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 664 "NO": lambda self: self._parse_no_property(), 665 "ON": lambda self: self._parse_on_property(), 666 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 667 "PARTITION BY": lambda self: self._parse_partitioned_by(), 668 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 669 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 670 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 671 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 672 "RETURNS": lambda self: self._parse_returns(), 673 "ROW": lambda self: self._parse_row(), 674 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 675 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 676 "SETTINGS": lambda self: self.expression( 677 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 678 ), 679 "SORTKEY": lambda self: self._parse_sortkey(), 680 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 681 "STABLE": lambda self: self.expression( 682 exp.StabilityProperty, this=exp.Literal.string("STABLE") 683 ), 684 "STORED": lambda self: self._parse_stored(), 685 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 686 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 687 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 688 "TO": lambda self: self._parse_to_table(), 689 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 690 "TTL": lambda self: self._parse_ttl(), 691 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 692 "VOLATILE": lambda self: self._parse_volatile_property(), 693 "WITH": lambda self: self._parse_with_property(), 694 } 695 696 CONSTRAINT_PARSERS = { 697 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 698 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 699 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 700 "CHARACTER SET": lambda self: self.expression( 701 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 702 ), 703 "CHECK": lambda self: self.expression( 704 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 705 ), 706 "COLLATE": lambda self: self.expression( 707 exp.CollateColumnConstraint, this=self._parse_var() 708 ), 709 "COMMENT": lambda self: self.expression( 710 exp.CommentColumnConstraint, this=self._parse_string() 711 ), 712 "COMPRESS": lambda self: self._parse_compress(), 713 "CLUSTERED": lambda self: self.expression( 714 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 715 ), 716 "NONCLUSTERED": lambda self: self.expression( 717 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 718 ), 719 "DEFAULT": lambda self: self.expression( 720 exp.DefaultColumnConstraint, this=self._parse_bitwise() 721 ), 722 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 723 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 724 "FORMAT": lambda self: self.expression( 725 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 726 ), 727 "GENERATED": lambda self: self._parse_generated_as_identity(), 728 "IDENTITY": lambda self: self._parse_auto_increment(), 729 "INLINE": lambda self: self._parse_inline(), 730 "LIKE": lambda self: self._parse_create_like(), 731 "NOT": lambda self: self._parse_not_constraint(), 732 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 733 "ON": lambda self: ( 734 self._match(TokenType.UPDATE) 735 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 736 ) 737 or self.expression(exp.OnProperty, this=self._parse_id_var()), 738 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 739 "PRIMARY KEY": lambda self: self._parse_primary_key(), 740 "REFERENCES": lambda self: self._parse_references(match=False), 741 "TITLE": lambda self: self.expression( 742 exp.TitleColumnConstraint, this=self._parse_var_or_string() 743 ), 744 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 745 "UNIQUE": lambda self: self._parse_unique(), 746 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 747 "WITH": lambda self: self.expression( 748 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 749 ), 750 } 751 752 ALTER_PARSERS = { 753 "ADD": lambda self: self._parse_alter_table_add(), 754 "ALTER": lambda self: self._parse_alter_table_alter(), 755 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 756 "DROP": lambda self: self._parse_alter_table_drop(), 757 "RENAME": lambda self: self._parse_alter_table_rename(), 758 } 759 760 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 761 762 NO_PAREN_FUNCTION_PARSERS = { 763 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 764 "CASE": lambda self: self._parse_case(), 765 "IF": lambda self: self._parse_if(), 766 "NEXT": lambda self: self._parse_next_value_for(), 767 } 768 769 INVALID_FUNC_NAME_TOKENS = { 770 TokenType.IDENTIFIER, 771 TokenType.STRING, 772 } 773 774 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 775 776 FUNCTION_PARSERS = { 777 "ANY_VALUE": lambda self: self._parse_any_value(), 778 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 779 "CONCAT": lambda self: self._parse_concat(), 780 "CONCAT_WS": lambda self: self._parse_concat_ws(), 781 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 782 "DECODE": lambda self: self._parse_decode(), 783 "EXTRACT": lambda self: self._parse_extract(), 784 "JSON_OBJECT": lambda self: self._parse_json_object(), 785 "LOG": lambda self: self._parse_logarithm(), 786 "MATCH": lambda self: self._parse_match_against(), 787 "OPENJSON": lambda self: self._parse_open_json(), 788 "POSITION": lambda self: self._parse_position(), 789 "SAFE_CAST": lambda self: self._parse_cast(False), 790 "STRING_AGG": lambda self: self._parse_string_agg(), 791 "SUBSTRING": lambda self: self._parse_substring(), 792 "TRIM": lambda self: self._parse_trim(), 793 "TRY_CAST": lambda self: self._parse_cast(False), 794 "TRY_CONVERT": lambda self: self._parse_convert(False), 795 } 796 797 QUERY_MODIFIER_PARSERS = { 798 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 799 TokenType.WHERE: lambda self: ("where", self._parse_where()), 800 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 801 TokenType.HAVING: lambda self: ("having", self._parse_having()), 802 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 803 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 804 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 805 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 806 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 807 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 808 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 809 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 810 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 811 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 812 TokenType.CLUSTER_BY: lambda self: ( 813 "cluster", 814 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 815 ), 816 TokenType.DISTRIBUTE_BY: lambda self: ( 817 "distribute", 818 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 819 ), 820 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 821 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 822 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 823 } 824 825 SET_PARSERS = { 826 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 827 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 828 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 829 "TRANSACTION": lambda self: self._parse_set_transaction(), 830 } 831 832 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 833 834 TYPE_LITERAL_PARSERS = { 835 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 836 } 837 838 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 839 840 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 841 842 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 843 844 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 845 TRANSACTION_CHARACTERISTICS = { 846 "ISOLATION LEVEL REPEATABLE READ", 847 "ISOLATION LEVEL READ COMMITTED", 848 "ISOLATION LEVEL READ UNCOMMITTED", 849 "ISOLATION LEVEL SERIALIZABLE", 850 "READ WRITE", 851 "READ ONLY", 852 } 853 854 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 855 856 CLONE_KEYWORDS = {"CLONE", "COPY"} 857 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 858 859 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 860 861 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 862 863 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 864 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 865 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 866 867 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 868 869 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 870 871 DISTINCT_TOKENS = {TokenType.DISTINCT} 872 873 NULL_TOKENS = {TokenType.NULL} 874 875 STRICT_CAST = True 876 877 # A NULL arg in CONCAT yields NULL by default 878 CONCAT_NULL_OUTPUTS_STRING = False 879 880 PREFIXED_PIVOT_COLUMNS = False 881 IDENTIFY_PIVOT_STRINGS = False 882 883 LOG_BASE_FIRST = True 884 LOG_DEFAULTS_TO_LN = False 885 886 # Whether or not ADD is present for each column added by ALTER TABLE 887 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 888 889 # Whether or not the table sample clause expects CSV syntax 890 TABLESAMPLE_CSV = False 891 892 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments. 893 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 894 895 __slots__ = ( 896 "error_level", 897 "error_message_context", 898 "max_errors", 899 "sql", 900 "errors", 901 "_tokens", 902 "_index", 903 "_curr", 904 "_next", 905 "_prev", 906 "_prev_comments", 907 "_tokenizer", 908 ) 909 910 # Autofilled 911 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 912 INDEX_OFFSET: int = 0 913 UNNEST_COLUMN_ONLY: bool = False 914 ALIAS_POST_TABLESAMPLE: bool = False 915 STRICT_STRING_CONCAT = False 916 SUPPORTS_USER_DEFINED_TYPES = True 917 NORMALIZE_FUNCTIONS = "upper" 918 NULL_ORDERING: str = "nulls_are_small" 919 SHOW_TRIE: t.Dict = {} 920 SET_TRIE: t.Dict = {} 921 FORMAT_MAPPING: t.Dict[str, str] = {} 922 FORMAT_TRIE: t.Dict = {} 923 TIME_MAPPING: t.Dict[str, str] = {} 924 TIME_TRIE: t.Dict = {} 925 926 def __init__( 927 self, 928 error_level: t.Optional[ErrorLevel] = None, 929 error_message_context: int = 100, 930 max_errors: int = 3, 931 ): 932 self.error_level = error_level or ErrorLevel.IMMEDIATE 933 self.error_message_context = error_message_context 934 self.max_errors = max_errors 935 self._tokenizer = self.TOKENIZER_CLASS() 936 self.reset() 937 938 def reset(self): 939 self.sql = "" 940 self.errors = [] 941 self._tokens = [] 942 self._index = 0 943 self._curr = None 944 self._next = None 945 self._prev = None 946 self._prev_comments = None 947 948 def parse( 949 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 950 ) -> t.List[t.Optional[exp.Expression]]: 951 """ 952 Parses a list of tokens and returns a list of syntax trees, one tree 953 per parsed SQL statement. 954 955 Args: 956 raw_tokens: The list of tokens. 957 sql: The original SQL string, used to produce helpful debug messages. 958 959 Returns: 960 The list of the produced syntax trees. 961 """ 962 return self._parse( 963 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 964 ) 965 966 def parse_into( 967 self, 968 expression_types: exp.IntoType, 969 raw_tokens: t.List[Token], 970 sql: t.Optional[str] = None, 971 ) -> t.List[t.Optional[exp.Expression]]: 972 """ 973 Parses a list of tokens into a given Expression type. If a collection of Expression 974 types is given instead, this method will try to parse the token list into each one 975 of them, stopping at the first for which the parsing succeeds. 976 977 Args: 978 expression_types: The expression type(s) to try and parse the token list into. 979 raw_tokens: The list of tokens. 980 sql: The original SQL string, used to produce helpful debug messages. 981 982 Returns: 983 The target Expression. 984 """ 985 errors = [] 986 for expression_type in ensure_list(expression_types): 987 parser = self.EXPRESSION_PARSERS.get(expression_type) 988 if not parser: 989 raise TypeError(f"No parser registered for {expression_type}") 990 991 try: 992 return self._parse(parser, raw_tokens, sql) 993 except ParseError as e: 994 e.errors[0]["into_expression"] = expression_type 995 errors.append(e) 996 997 raise ParseError( 998 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 999 errors=merge_errors(errors), 1000 ) from errors[-1] 1001 1002 def _parse( 1003 self, 1004 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1005 raw_tokens: t.List[Token], 1006 sql: t.Optional[str] = None, 1007 ) -> t.List[t.Optional[exp.Expression]]: 1008 self.reset() 1009 self.sql = sql or "" 1010 1011 total = len(raw_tokens) 1012 chunks: t.List[t.List[Token]] = [[]] 1013 1014 for i, token in enumerate(raw_tokens): 1015 if token.token_type == TokenType.SEMICOLON: 1016 if i < total - 1: 1017 chunks.append([]) 1018 else: 1019 chunks[-1].append(token) 1020 1021 expressions = [] 1022 1023 for tokens in chunks: 1024 self._index = -1 1025 self._tokens = tokens 1026 self._advance() 1027 1028 expressions.append(parse_method(self)) 1029 1030 if self._index < len(self._tokens): 1031 self.raise_error("Invalid expression / Unexpected token") 1032 1033 self.check_errors() 1034 1035 return expressions 1036 1037 def check_errors(self) -> None: 1038 """Logs or raises any found errors, depending on the chosen error level setting.""" 1039 if self.error_level == ErrorLevel.WARN: 1040 for error in self.errors: 1041 logger.error(str(error)) 1042 elif self.error_level == ErrorLevel.RAISE and self.errors: 1043 raise ParseError( 1044 concat_messages(self.errors, self.max_errors), 1045 errors=merge_errors(self.errors), 1046 ) 1047 1048 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1049 """ 1050 Appends an error in the list of recorded errors or raises it, depending on the chosen 1051 error level setting. 1052 """ 1053 token = token or self._curr or self._prev or Token.string("") 1054 start = token.start 1055 end = token.end + 1 1056 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1057 highlight = self.sql[start:end] 1058 end_context = self.sql[end : end + self.error_message_context] 1059 1060 error = ParseError.new( 1061 f"{message}. Line {token.line}, Col: {token.col}.\n" 1062 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1063 description=message, 1064 line=token.line, 1065 col=token.col, 1066 start_context=start_context, 1067 highlight=highlight, 1068 end_context=end_context, 1069 ) 1070 1071 if self.error_level == ErrorLevel.IMMEDIATE: 1072 raise error 1073 1074 self.errors.append(error) 1075 1076 def expression( 1077 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1078 ) -> E: 1079 """ 1080 Creates a new, validated Expression. 1081 1082 Args: 1083 exp_class: The expression class to instantiate. 1084 comments: An optional list of comments to attach to the expression. 1085 kwargs: The arguments to set for the expression along with their respective values. 1086 1087 Returns: 1088 The target expression. 1089 """ 1090 instance = exp_class(**kwargs) 1091 instance.add_comments(comments) if comments else self._add_comments(instance) 1092 return self.validate_expression(instance) 1093 1094 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1095 if expression and self._prev_comments: 1096 expression.add_comments(self._prev_comments) 1097 self._prev_comments = None 1098 1099 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1100 """ 1101 Validates an Expression, making sure that all its mandatory arguments are set. 1102 1103 Args: 1104 expression: The expression to validate. 1105 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1106 1107 Returns: 1108 The validated expression. 1109 """ 1110 if self.error_level != ErrorLevel.IGNORE: 1111 for error_message in expression.error_messages(args): 1112 self.raise_error(error_message) 1113 1114 return expression 1115 1116 def _find_sql(self, start: Token, end: Token) -> str: 1117 return self.sql[start.start : end.end + 1] 1118 1119 def _advance(self, times: int = 1) -> None: 1120 self._index += times 1121 self._curr = seq_get(self._tokens, self._index) 1122 self._next = seq_get(self._tokens, self._index + 1) 1123 1124 if self._index > 0: 1125 self._prev = self._tokens[self._index - 1] 1126 self._prev_comments = self._prev.comments 1127 else: 1128 self._prev = None 1129 self._prev_comments = None 1130 1131 def _retreat(self, index: int) -> None: 1132 if index != self._index: 1133 self._advance(index - self._index) 1134 1135 def _parse_command(self) -> exp.Command: 1136 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1137 1138 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1139 start = self._prev 1140 exists = self._parse_exists() if allow_exists else None 1141 1142 self._match(TokenType.ON) 1143 1144 kind = self._match_set(self.CREATABLES) and self._prev 1145 if not kind: 1146 return self._parse_as_command(start) 1147 1148 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1149 this = self._parse_user_defined_function(kind=kind.token_type) 1150 elif kind.token_type == TokenType.TABLE: 1151 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1152 elif kind.token_type == TokenType.COLUMN: 1153 this = self._parse_column() 1154 else: 1155 this = self._parse_id_var() 1156 1157 self._match(TokenType.IS) 1158 1159 return self.expression( 1160 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1161 ) 1162 1163 def _parse_to_table( 1164 self, 1165 ) -> exp.ToTableProperty: 1166 table = self._parse_table_parts(schema=True) 1167 return self.expression(exp.ToTableProperty, this=table) 1168 1169 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1170 def _parse_ttl(self) -> exp.Expression: 1171 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1172 this = self._parse_bitwise() 1173 1174 if self._match_text_seq("DELETE"): 1175 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1176 if self._match_text_seq("RECOMPRESS"): 1177 return self.expression( 1178 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1179 ) 1180 if self._match_text_seq("TO", "DISK"): 1181 return self.expression( 1182 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1183 ) 1184 if self._match_text_seq("TO", "VOLUME"): 1185 return self.expression( 1186 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1187 ) 1188 1189 return this 1190 1191 expressions = self._parse_csv(_parse_ttl_action) 1192 where = self._parse_where() 1193 group = self._parse_group() 1194 1195 aggregates = None 1196 if group and self._match(TokenType.SET): 1197 aggregates = self._parse_csv(self._parse_set_item) 1198 1199 return self.expression( 1200 exp.MergeTreeTTL, 1201 expressions=expressions, 1202 where=where, 1203 group=group, 1204 aggregates=aggregates, 1205 ) 1206 1207 def _parse_statement(self) -> t.Optional[exp.Expression]: 1208 if self._curr is None: 1209 return None 1210 1211 if self._match_set(self.STATEMENT_PARSERS): 1212 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1213 1214 if self._match_set(Tokenizer.COMMANDS): 1215 return self._parse_command() 1216 1217 expression = self._parse_expression() 1218 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1219 return self._parse_query_modifiers(expression) 1220 1221 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1222 start = self._prev 1223 temporary = self._match(TokenType.TEMPORARY) 1224 materialized = self._match_text_seq("MATERIALIZED") 1225 1226 kind = self._match_set(self.CREATABLES) and self._prev.text 1227 if not kind: 1228 return self._parse_as_command(start) 1229 1230 return self.expression( 1231 exp.Drop, 1232 comments=start.comments, 1233 exists=exists or self._parse_exists(), 1234 this=self._parse_table(schema=True), 1235 kind=kind, 1236 temporary=temporary, 1237 materialized=materialized, 1238 cascade=self._match_text_seq("CASCADE"), 1239 constraints=self._match_text_seq("CONSTRAINTS"), 1240 purge=self._match_text_seq("PURGE"), 1241 ) 1242 1243 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1244 return ( 1245 self._match_text_seq("IF") 1246 and (not not_ or self._match(TokenType.NOT)) 1247 and self._match(TokenType.EXISTS) 1248 ) 1249 1250 def _parse_create(self) -> exp.Create | exp.Command: 1251 # Note: this can't be None because we've matched a statement parser 1252 start = self._prev 1253 comments = self._prev_comments 1254 1255 replace = start.text.upper() == "REPLACE" or self._match_pair( 1256 TokenType.OR, TokenType.REPLACE 1257 ) 1258 unique = self._match(TokenType.UNIQUE) 1259 1260 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1261 self._advance() 1262 1263 properties = None 1264 create_token = self._match_set(self.CREATABLES) and self._prev 1265 1266 if not create_token: 1267 # exp.Properties.Location.POST_CREATE 1268 properties = self._parse_properties() 1269 create_token = self._match_set(self.CREATABLES) and self._prev 1270 1271 if not properties or not create_token: 1272 return self._parse_as_command(start) 1273 1274 exists = self._parse_exists(not_=True) 1275 this = None 1276 expression: t.Optional[exp.Expression] = None 1277 indexes = None 1278 no_schema_binding = None 1279 begin = None 1280 clone = None 1281 1282 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1283 nonlocal properties 1284 if properties and temp_props: 1285 properties.expressions.extend(temp_props.expressions) 1286 elif temp_props: 1287 properties = temp_props 1288 1289 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1290 this = self._parse_user_defined_function(kind=create_token.token_type) 1291 1292 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1293 extend_props(self._parse_properties()) 1294 1295 self._match(TokenType.ALIAS) 1296 1297 if self._match(TokenType.COMMAND): 1298 expression = self._parse_as_command(self._prev) 1299 else: 1300 begin = self._match(TokenType.BEGIN) 1301 return_ = self._match_text_seq("RETURN") 1302 1303 if self._match(TokenType.STRING, advance=False): 1304 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1305 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1306 expression = self._parse_string() 1307 extend_props(self._parse_properties()) 1308 else: 1309 expression = self._parse_statement() 1310 1311 if return_: 1312 expression = self.expression(exp.Return, this=expression) 1313 elif create_token.token_type == TokenType.INDEX: 1314 this = self._parse_index(index=self._parse_id_var()) 1315 elif create_token.token_type in self.DB_CREATABLES: 1316 table_parts = self._parse_table_parts(schema=True) 1317 1318 # exp.Properties.Location.POST_NAME 1319 self._match(TokenType.COMMA) 1320 extend_props(self._parse_properties(before=True)) 1321 1322 this = self._parse_schema(this=table_parts) 1323 1324 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1325 extend_props(self._parse_properties()) 1326 1327 self._match(TokenType.ALIAS) 1328 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1329 # exp.Properties.Location.POST_ALIAS 1330 extend_props(self._parse_properties()) 1331 1332 expression = self._parse_ddl_select() 1333 1334 if create_token.token_type == TokenType.TABLE: 1335 # exp.Properties.Location.POST_EXPRESSION 1336 extend_props(self._parse_properties()) 1337 1338 indexes = [] 1339 while True: 1340 index = self._parse_index() 1341 1342 # exp.Properties.Location.POST_INDEX 1343 extend_props(self._parse_properties()) 1344 1345 if not index: 1346 break 1347 else: 1348 self._match(TokenType.COMMA) 1349 indexes.append(index) 1350 elif create_token.token_type == TokenType.VIEW: 1351 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1352 no_schema_binding = True 1353 1354 shallow = self._match_text_seq("SHALLOW") 1355 1356 if self._match_texts(self.CLONE_KEYWORDS): 1357 copy = self._prev.text.lower() == "copy" 1358 clone = self._parse_table(schema=True) 1359 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1360 clone_kind = ( 1361 self._match(TokenType.L_PAREN) 1362 and self._match_texts(self.CLONE_KINDS) 1363 and self._prev.text.upper() 1364 ) 1365 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1366 self._match(TokenType.R_PAREN) 1367 clone = self.expression( 1368 exp.Clone, 1369 this=clone, 1370 when=when, 1371 kind=clone_kind, 1372 shallow=shallow, 1373 expression=clone_expression, 1374 copy=copy, 1375 ) 1376 1377 return self.expression( 1378 exp.Create, 1379 comments=comments, 1380 this=this, 1381 kind=create_token.text, 1382 replace=replace, 1383 unique=unique, 1384 expression=expression, 1385 exists=exists, 1386 properties=properties, 1387 indexes=indexes, 1388 no_schema_binding=no_schema_binding, 1389 begin=begin, 1390 clone=clone, 1391 ) 1392 1393 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1394 # only used for teradata currently 1395 self._match(TokenType.COMMA) 1396 1397 kwargs = { 1398 "no": self._match_text_seq("NO"), 1399 "dual": self._match_text_seq("DUAL"), 1400 "before": self._match_text_seq("BEFORE"), 1401 "default": self._match_text_seq("DEFAULT"), 1402 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1403 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1404 "after": self._match_text_seq("AFTER"), 1405 "minimum": self._match_texts(("MIN", "MINIMUM")), 1406 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1407 } 1408 1409 if self._match_texts(self.PROPERTY_PARSERS): 1410 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1411 try: 1412 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1413 except TypeError: 1414 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1415 1416 return None 1417 1418 def _parse_property(self) -> t.Optional[exp.Expression]: 1419 if self._match_texts(self.PROPERTY_PARSERS): 1420 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1421 1422 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1423 return self._parse_character_set(default=True) 1424 1425 if self._match_text_seq("COMPOUND", "SORTKEY"): 1426 return self._parse_sortkey(compound=True) 1427 1428 if self._match_text_seq("SQL", "SECURITY"): 1429 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1430 1431 index = self._index 1432 key = self._parse_column() 1433 1434 if not self._match(TokenType.EQ): 1435 self._retreat(index) 1436 return None 1437 1438 return self.expression( 1439 exp.Property, 1440 this=key.to_dot() if isinstance(key, exp.Column) else key, 1441 value=self._parse_column() or self._parse_var(any_token=True), 1442 ) 1443 1444 def _parse_stored(self) -> exp.FileFormatProperty: 1445 self._match(TokenType.ALIAS) 1446 1447 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1448 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1449 1450 return self.expression( 1451 exp.FileFormatProperty, 1452 this=self.expression( 1453 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1454 ) 1455 if input_format or output_format 1456 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1457 ) 1458 1459 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1460 self._match(TokenType.EQ) 1461 self._match(TokenType.ALIAS) 1462 return self.expression(exp_class, this=self._parse_field()) 1463 1464 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1465 properties = [] 1466 while True: 1467 if before: 1468 prop = self._parse_property_before() 1469 else: 1470 prop = self._parse_property() 1471 1472 if not prop: 1473 break 1474 for p in ensure_list(prop): 1475 properties.append(p) 1476 1477 if properties: 1478 return self.expression(exp.Properties, expressions=properties) 1479 1480 return None 1481 1482 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1483 return self.expression( 1484 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1485 ) 1486 1487 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1488 if self._index >= 2: 1489 pre_volatile_token = self._tokens[self._index - 2] 1490 else: 1491 pre_volatile_token = None 1492 1493 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1494 return exp.VolatileProperty() 1495 1496 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1497 1498 def _parse_with_property( 1499 self, 1500 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1501 if self._match(TokenType.L_PAREN, advance=False): 1502 return self._parse_wrapped_csv(self._parse_property) 1503 1504 if self._match_text_seq("JOURNAL"): 1505 return self._parse_withjournaltable() 1506 1507 if self._match_text_seq("DATA"): 1508 return self._parse_withdata(no=False) 1509 elif self._match_text_seq("NO", "DATA"): 1510 return self._parse_withdata(no=True) 1511 1512 if not self._next: 1513 return None 1514 1515 return self._parse_withisolatedloading() 1516 1517 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1518 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1519 self._match(TokenType.EQ) 1520 1521 user = self._parse_id_var() 1522 self._match(TokenType.PARAMETER) 1523 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1524 1525 if not user or not host: 1526 return None 1527 1528 return exp.DefinerProperty(this=f"{user}@{host}") 1529 1530 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1531 self._match(TokenType.TABLE) 1532 self._match(TokenType.EQ) 1533 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1534 1535 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1536 return self.expression(exp.LogProperty, no=no) 1537 1538 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1539 return self.expression(exp.JournalProperty, **kwargs) 1540 1541 def _parse_checksum(self) -> exp.ChecksumProperty: 1542 self._match(TokenType.EQ) 1543 1544 on = None 1545 if self._match(TokenType.ON): 1546 on = True 1547 elif self._match_text_seq("OFF"): 1548 on = False 1549 1550 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1551 1552 def _parse_cluster(self) -> exp.Cluster: 1553 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1554 1555 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1556 self._match_text_seq("BY") 1557 1558 self._match_l_paren() 1559 expressions = self._parse_csv(self._parse_column) 1560 self._match_r_paren() 1561 1562 if self._match_text_seq("SORTED", "BY"): 1563 self._match_l_paren() 1564 sorted_by = self._parse_csv(self._parse_ordered) 1565 self._match_r_paren() 1566 else: 1567 sorted_by = None 1568 1569 self._match(TokenType.INTO) 1570 buckets = self._parse_number() 1571 self._match_text_seq("BUCKETS") 1572 1573 return self.expression( 1574 exp.ClusteredByProperty, 1575 expressions=expressions, 1576 sorted_by=sorted_by, 1577 buckets=buckets, 1578 ) 1579 1580 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1581 if not self._match_text_seq("GRANTS"): 1582 self._retreat(self._index - 1) 1583 return None 1584 1585 return self.expression(exp.CopyGrantsProperty) 1586 1587 def _parse_freespace(self) -> exp.FreespaceProperty: 1588 self._match(TokenType.EQ) 1589 return self.expression( 1590 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1591 ) 1592 1593 def _parse_mergeblockratio( 1594 self, no: bool = False, default: bool = False 1595 ) -> exp.MergeBlockRatioProperty: 1596 if self._match(TokenType.EQ): 1597 return self.expression( 1598 exp.MergeBlockRatioProperty, 1599 this=self._parse_number(), 1600 percent=self._match(TokenType.PERCENT), 1601 ) 1602 1603 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1604 1605 def _parse_datablocksize( 1606 self, 1607 default: t.Optional[bool] = None, 1608 minimum: t.Optional[bool] = None, 1609 maximum: t.Optional[bool] = None, 1610 ) -> exp.DataBlocksizeProperty: 1611 self._match(TokenType.EQ) 1612 size = self._parse_number() 1613 1614 units = None 1615 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1616 units = self._prev.text 1617 1618 return self.expression( 1619 exp.DataBlocksizeProperty, 1620 size=size, 1621 units=units, 1622 default=default, 1623 minimum=minimum, 1624 maximum=maximum, 1625 ) 1626 1627 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1628 self._match(TokenType.EQ) 1629 always = self._match_text_seq("ALWAYS") 1630 manual = self._match_text_seq("MANUAL") 1631 never = self._match_text_seq("NEVER") 1632 default = self._match_text_seq("DEFAULT") 1633 1634 autotemp = None 1635 if self._match_text_seq("AUTOTEMP"): 1636 autotemp = self._parse_schema() 1637 1638 return self.expression( 1639 exp.BlockCompressionProperty, 1640 always=always, 1641 manual=manual, 1642 never=never, 1643 default=default, 1644 autotemp=autotemp, 1645 ) 1646 1647 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1648 no = self._match_text_seq("NO") 1649 concurrent = self._match_text_seq("CONCURRENT") 1650 self._match_text_seq("ISOLATED", "LOADING") 1651 for_all = self._match_text_seq("FOR", "ALL") 1652 for_insert = self._match_text_seq("FOR", "INSERT") 1653 for_none = self._match_text_seq("FOR", "NONE") 1654 return self.expression( 1655 exp.IsolatedLoadingProperty, 1656 no=no, 1657 concurrent=concurrent, 1658 for_all=for_all, 1659 for_insert=for_insert, 1660 for_none=for_none, 1661 ) 1662 1663 def _parse_locking(self) -> exp.LockingProperty: 1664 if self._match(TokenType.TABLE): 1665 kind = "TABLE" 1666 elif self._match(TokenType.VIEW): 1667 kind = "VIEW" 1668 elif self._match(TokenType.ROW): 1669 kind = "ROW" 1670 elif self._match_text_seq("DATABASE"): 1671 kind = "DATABASE" 1672 else: 1673 kind = None 1674 1675 if kind in ("DATABASE", "TABLE", "VIEW"): 1676 this = self._parse_table_parts() 1677 else: 1678 this = None 1679 1680 if self._match(TokenType.FOR): 1681 for_or_in = "FOR" 1682 elif self._match(TokenType.IN): 1683 for_or_in = "IN" 1684 else: 1685 for_or_in = None 1686 1687 if self._match_text_seq("ACCESS"): 1688 lock_type = "ACCESS" 1689 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1690 lock_type = "EXCLUSIVE" 1691 elif self._match_text_seq("SHARE"): 1692 lock_type = "SHARE" 1693 elif self._match_text_seq("READ"): 1694 lock_type = "READ" 1695 elif self._match_text_seq("WRITE"): 1696 lock_type = "WRITE" 1697 elif self._match_text_seq("CHECKSUM"): 1698 lock_type = "CHECKSUM" 1699 else: 1700 lock_type = None 1701 1702 override = self._match_text_seq("OVERRIDE") 1703 1704 return self.expression( 1705 exp.LockingProperty, 1706 this=this, 1707 kind=kind, 1708 for_or_in=for_or_in, 1709 lock_type=lock_type, 1710 override=override, 1711 ) 1712 1713 def _parse_partition_by(self) -> t.List[exp.Expression]: 1714 if self._match(TokenType.PARTITION_BY): 1715 return self._parse_csv(self._parse_conjunction) 1716 return [] 1717 1718 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1719 self._match(TokenType.EQ) 1720 return self.expression( 1721 exp.PartitionedByProperty, 1722 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1723 ) 1724 1725 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1726 if self._match_text_seq("AND", "STATISTICS"): 1727 statistics = True 1728 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1729 statistics = False 1730 else: 1731 statistics = None 1732 1733 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1734 1735 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1736 if self._match_text_seq("PRIMARY", "INDEX"): 1737 return exp.NoPrimaryIndexProperty() 1738 return None 1739 1740 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1741 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1742 return exp.OnCommitProperty() 1743 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1744 return exp.OnCommitProperty(delete=True) 1745 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1746 1747 def _parse_distkey(self) -> exp.DistKeyProperty: 1748 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1749 1750 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1751 table = self._parse_table(schema=True) 1752 1753 options = [] 1754 while self._match_texts(("INCLUDING", "EXCLUDING")): 1755 this = self._prev.text.upper() 1756 1757 id_var = self._parse_id_var() 1758 if not id_var: 1759 return None 1760 1761 options.append( 1762 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1763 ) 1764 1765 return self.expression(exp.LikeProperty, this=table, expressions=options) 1766 1767 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1768 return self.expression( 1769 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1770 ) 1771 1772 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1773 self._match(TokenType.EQ) 1774 return self.expression( 1775 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1776 ) 1777 1778 def _parse_returns(self) -> exp.ReturnsProperty: 1779 value: t.Optional[exp.Expression] 1780 is_table = self._match(TokenType.TABLE) 1781 1782 if is_table: 1783 if self._match(TokenType.LT): 1784 value = self.expression( 1785 exp.Schema, 1786 this="TABLE", 1787 expressions=self._parse_csv(self._parse_struct_types), 1788 ) 1789 if not self._match(TokenType.GT): 1790 self.raise_error("Expecting >") 1791 else: 1792 value = self._parse_schema(exp.var("TABLE")) 1793 else: 1794 value = self._parse_types() 1795 1796 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1797 1798 def _parse_describe(self) -> exp.Describe: 1799 kind = self._match_set(self.CREATABLES) and self._prev.text 1800 this = self._parse_table(schema=True) 1801 properties = self._parse_properties() 1802 expressions = properties.expressions if properties else None 1803 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1804 1805 def _parse_insert(self) -> exp.Insert: 1806 comments = ensure_list(self._prev_comments) 1807 overwrite = self._match(TokenType.OVERWRITE) 1808 ignore = self._match(TokenType.IGNORE) 1809 local = self._match_text_seq("LOCAL") 1810 alternative = None 1811 1812 if self._match_text_seq("DIRECTORY"): 1813 this: t.Optional[exp.Expression] = self.expression( 1814 exp.Directory, 1815 this=self._parse_var_or_string(), 1816 local=local, 1817 row_format=self._parse_row_format(match_row=True), 1818 ) 1819 else: 1820 if self._match(TokenType.OR): 1821 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1822 1823 self._match(TokenType.INTO) 1824 comments += ensure_list(self._prev_comments) 1825 self._match(TokenType.TABLE) 1826 this = self._parse_table(schema=True) 1827 1828 returning = self._parse_returning() 1829 1830 return self.expression( 1831 exp.Insert, 1832 comments=comments, 1833 this=this, 1834 by_name=self._match_text_seq("BY", "NAME"), 1835 exists=self._parse_exists(), 1836 partition=self._parse_partition(), 1837 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1838 and self._parse_conjunction(), 1839 expression=self._parse_ddl_select(), 1840 conflict=self._parse_on_conflict(), 1841 returning=returning or self._parse_returning(), 1842 overwrite=overwrite, 1843 alternative=alternative, 1844 ignore=ignore, 1845 ) 1846 1847 def _parse_kill(self) -> exp.Kill: 1848 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1849 1850 return self.expression( 1851 exp.Kill, 1852 this=self._parse_primary(), 1853 kind=kind, 1854 ) 1855 1856 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1857 conflict = self._match_text_seq("ON", "CONFLICT") 1858 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1859 1860 if not conflict and not duplicate: 1861 return None 1862 1863 nothing = None 1864 expressions = None 1865 key = None 1866 constraint = None 1867 1868 if conflict: 1869 if self._match_text_seq("ON", "CONSTRAINT"): 1870 constraint = self._parse_id_var() 1871 else: 1872 key = self._parse_csv(self._parse_value) 1873 1874 self._match_text_seq("DO") 1875 if self._match_text_seq("NOTHING"): 1876 nothing = True 1877 else: 1878 self._match(TokenType.UPDATE) 1879 self._match(TokenType.SET) 1880 expressions = self._parse_csv(self._parse_equality) 1881 1882 return self.expression( 1883 exp.OnConflict, 1884 duplicate=duplicate, 1885 expressions=expressions, 1886 nothing=nothing, 1887 key=key, 1888 constraint=constraint, 1889 ) 1890 1891 def _parse_returning(self) -> t.Optional[exp.Returning]: 1892 if not self._match(TokenType.RETURNING): 1893 return None 1894 return self.expression( 1895 exp.Returning, 1896 expressions=self._parse_csv(self._parse_expression), 1897 into=self._match(TokenType.INTO) and self._parse_table_part(), 1898 ) 1899 1900 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1901 if not self._match(TokenType.FORMAT): 1902 return None 1903 return self._parse_row_format() 1904 1905 def _parse_row_format( 1906 self, match_row: bool = False 1907 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1908 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1909 return None 1910 1911 if self._match_text_seq("SERDE"): 1912 this = self._parse_string() 1913 1914 serde_properties = None 1915 if self._match(TokenType.SERDE_PROPERTIES): 1916 serde_properties = self.expression( 1917 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1918 ) 1919 1920 return self.expression( 1921 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1922 ) 1923 1924 self._match_text_seq("DELIMITED") 1925 1926 kwargs = {} 1927 1928 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1929 kwargs["fields"] = self._parse_string() 1930 if self._match_text_seq("ESCAPED", "BY"): 1931 kwargs["escaped"] = self._parse_string() 1932 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1933 kwargs["collection_items"] = self._parse_string() 1934 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1935 kwargs["map_keys"] = self._parse_string() 1936 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1937 kwargs["lines"] = self._parse_string() 1938 if self._match_text_seq("NULL", "DEFINED", "AS"): 1939 kwargs["null"] = self._parse_string() 1940 1941 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1942 1943 def _parse_load(self) -> exp.LoadData | exp.Command: 1944 if self._match_text_seq("DATA"): 1945 local = self._match_text_seq("LOCAL") 1946 self._match_text_seq("INPATH") 1947 inpath = self._parse_string() 1948 overwrite = self._match(TokenType.OVERWRITE) 1949 self._match_pair(TokenType.INTO, TokenType.TABLE) 1950 1951 return self.expression( 1952 exp.LoadData, 1953 this=self._parse_table(schema=True), 1954 local=local, 1955 overwrite=overwrite, 1956 inpath=inpath, 1957 partition=self._parse_partition(), 1958 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1959 serde=self._match_text_seq("SERDE") and self._parse_string(), 1960 ) 1961 return self._parse_as_command(self._prev) 1962 1963 def _parse_delete(self) -> exp.Delete: 1964 # This handles MySQL's "Multiple-Table Syntax" 1965 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1966 tables = None 1967 comments = self._prev_comments 1968 if not self._match(TokenType.FROM, advance=False): 1969 tables = self._parse_csv(self._parse_table) or None 1970 1971 returning = self._parse_returning() 1972 1973 return self.expression( 1974 exp.Delete, 1975 comments=comments, 1976 tables=tables, 1977 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1978 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1979 where=self._parse_where(), 1980 returning=returning or self._parse_returning(), 1981 limit=self._parse_limit(), 1982 ) 1983 1984 def _parse_update(self) -> exp.Update: 1985 comments = self._prev_comments 1986 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1987 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1988 returning = self._parse_returning() 1989 return self.expression( 1990 exp.Update, 1991 comments=comments, 1992 **{ # type: ignore 1993 "this": this, 1994 "expressions": expressions, 1995 "from": self._parse_from(joins=True), 1996 "where": self._parse_where(), 1997 "returning": returning or self._parse_returning(), 1998 "order": self._parse_order(), 1999 "limit": self._parse_limit(), 2000 }, 2001 ) 2002 2003 def _parse_uncache(self) -> exp.Uncache: 2004 if not self._match(TokenType.TABLE): 2005 self.raise_error("Expecting TABLE after UNCACHE") 2006 2007 return self.expression( 2008 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2009 ) 2010 2011 def _parse_cache(self) -> exp.Cache: 2012 lazy = self._match_text_seq("LAZY") 2013 self._match(TokenType.TABLE) 2014 table = self._parse_table(schema=True) 2015 2016 options = [] 2017 if self._match_text_seq("OPTIONS"): 2018 self._match_l_paren() 2019 k = self._parse_string() 2020 self._match(TokenType.EQ) 2021 v = self._parse_string() 2022 options = [k, v] 2023 self._match_r_paren() 2024 2025 self._match(TokenType.ALIAS) 2026 return self.expression( 2027 exp.Cache, 2028 this=table, 2029 lazy=lazy, 2030 options=options, 2031 expression=self._parse_select(nested=True), 2032 ) 2033 2034 def _parse_partition(self) -> t.Optional[exp.Partition]: 2035 if not self._match(TokenType.PARTITION): 2036 return None 2037 2038 return self.expression( 2039 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2040 ) 2041 2042 def _parse_value(self) -> exp.Tuple: 2043 if self._match(TokenType.L_PAREN): 2044 expressions = self._parse_csv(self._parse_conjunction) 2045 self._match_r_paren() 2046 return self.expression(exp.Tuple, expressions=expressions) 2047 2048 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2049 # https://prestodb.io/docs/current/sql/values.html 2050 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2051 2052 def _parse_projections(self) -> t.List[exp.Expression]: 2053 return self._parse_expressions() 2054 2055 def _parse_select( 2056 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2057 ) -> t.Optional[exp.Expression]: 2058 cte = self._parse_with() 2059 2060 if cte: 2061 this = self._parse_statement() 2062 2063 if not this: 2064 self.raise_error("Failed to parse any statement following CTE") 2065 return cte 2066 2067 if "with" in this.arg_types: 2068 this.set("with", cte) 2069 else: 2070 self.raise_error(f"{this.key} does not support CTE") 2071 this = cte 2072 2073 return this 2074 2075 # duckdb supports leading with FROM x 2076 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2077 2078 if self._match(TokenType.SELECT): 2079 comments = self._prev_comments 2080 2081 hint = self._parse_hint() 2082 all_ = self._match(TokenType.ALL) 2083 distinct = self._match_set(self.DISTINCT_TOKENS) 2084 2085 kind = ( 2086 self._match(TokenType.ALIAS) 2087 and self._match_texts(("STRUCT", "VALUE")) 2088 and self._prev.text 2089 ) 2090 2091 if distinct: 2092 distinct = self.expression( 2093 exp.Distinct, 2094 on=self._parse_value() if self._match(TokenType.ON) else None, 2095 ) 2096 2097 if all_ and distinct: 2098 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2099 2100 limit = self._parse_limit(top=True) 2101 projections = self._parse_projections() 2102 2103 this = self.expression( 2104 exp.Select, 2105 kind=kind, 2106 hint=hint, 2107 distinct=distinct, 2108 expressions=projections, 2109 limit=limit, 2110 ) 2111 this.comments = comments 2112 2113 into = self._parse_into() 2114 if into: 2115 this.set("into", into) 2116 2117 if not from_: 2118 from_ = self._parse_from() 2119 2120 if from_: 2121 this.set("from", from_) 2122 2123 this = self._parse_query_modifiers(this) 2124 elif (table or nested) and self._match(TokenType.L_PAREN): 2125 if self._match(TokenType.PIVOT): 2126 this = self._parse_simplified_pivot() 2127 elif self._match(TokenType.FROM): 2128 this = exp.select("*").from_( 2129 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2130 ) 2131 else: 2132 this = self._parse_table() if table else self._parse_select(nested=True) 2133 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2134 2135 self._match_r_paren() 2136 2137 # We return early here so that the UNION isn't attached to the subquery by the 2138 # following call to _parse_set_operations, but instead becomes the parent node 2139 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2140 elif self._match(TokenType.VALUES): 2141 this = self.expression( 2142 exp.Values, 2143 expressions=self._parse_csv(self._parse_value), 2144 alias=self._parse_table_alias(), 2145 ) 2146 elif from_: 2147 this = exp.select("*").from_(from_.this, copy=False) 2148 else: 2149 this = None 2150 2151 return self._parse_set_operations(this) 2152 2153 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2154 if not skip_with_token and not self._match(TokenType.WITH): 2155 return None 2156 2157 comments = self._prev_comments 2158 recursive = self._match(TokenType.RECURSIVE) 2159 2160 expressions = [] 2161 while True: 2162 expressions.append(self._parse_cte()) 2163 2164 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2165 break 2166 else: 2167 self._match(TokenType.WITH) 2168 2169 return self.expression( 2170 exp.With, comments=comments, expressions=expressions, recursive=recursive 2171 ) 2172 2173 def _parse_cte(self) -> exp.CTE: 2174 alias = self._parse_table_alias() 2175 if not alias or not alias.this: 2176 self.raise_error("Expected CTE to have alias") 2177 2178 self._match(TokenType.ALIAS) 2179 return self.expression( 2180 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2181 ) 2182 2183 def _parse_table_alias( 2184 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2185 ) -> t.Optional[exp.TableAlias]: 2186 any_token = self._match(TokenType.ALIAS) 2187 alias = ( 2188 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2189 or self._parse_string_as_identifier() 2190 ) 2191 2192 index = self._index 2193 if self._match(TokenType.L_PAREN): 2194 columns = self._parse_csv(self._parse_function_parameter) 2195 self._match_r_paren() if columns else self._retreat(index) 2196 else: 2197 columns = None 2198 2199 if not alias and not columns: 2200 return None 2201 2202 return self.expression(exp.TableAlias, this=alias, columns=columns) 2203 2204 def _parse_subquery( 2205 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2206 ) -> t.Optional[exp.Subquery]: 2207 if not this: 2208 return None 2209 2210 return self.expression( 2211 exp.Subquery, 2212 this=this, 2213 pivots=self._parse_pivots(), 2214 alias=self._parse_table_alias() if parse_alias else None, 2215 ) 2216 2217 def _parse_query_modifiers( 2218 self, this: t.Optional[exp.Expression] 2219 ) -> t.Optional[exp.Expression]: 2220 if isinstance(this, self.MODIFIABLES): 2221 for join in iter(self._parse_join, None): 2222 this.append("joins", join) 2223 for lateral in iter(self._parse_lateral, None): 2224 this.append("laterals", lateral) 2225 2226 while True: 2227 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2228 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2229 key, expression = parser(self) 2230 2231 if expression: 2232 this.set(key, expression) 2233 if key == "limit": 2234 offset = expression.args.pop("offset", None) 2235 if offset: 2236 this.set("offset", exp.Offset(expression=offset)) 2237 continue 2238 break 2239 return this 2240 2241 def _parse_hint(self) -> t.Optional[exp.Hint]: 2242 if self._match(TokenType.HINT): 2243 hints = [] 2244 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2245 hints.extend(hint) 2246 2247 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2248 self.raise_error("Expected */ after HINT") 2249 2250 return self.expression(exp.Hint, expressions=hints) 2251 2252 return None 2253 2254 def _parse_into(self) -> t.Optional[exp.Into]: 2255 if not self._match(TokenType.INTO): 2256 return None 2257 2258 temp = self._match(TokenType.TEMPORARY) 2259 unlogged = self._match_text_seq("UNLOGGED") 2260 self._match(TokenType.TABLE) 2261 2262 return self.expression( 2263 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2264 ) 2265 2266 def _parse_from( 2267 self, joins: bool = False, skip_from_token: bool = False 2268 ) -> t.Optional[exp.From]: 2269 if not skip_from_token and not self._match(TokenType.FROM): 2270 return None 2271 2272 return self.expression( 2273 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2274 ) 2275 2276 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2277 if not self._match(TokenType.MATCH_RECOGNIZE): 2278 return None 2279 2280 self._match_l_paren() 2281 2282 partition = self._parse_partition_by() 2283 order = self._parse_order() 2284 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2285 2286 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2287 rows = exp.var("ONE ROW PER MATCH") 2288 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2289 text = "ALL ROWS PER MATCH" 2290 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2291 text += f" SHOW EMPTY MATCHES" 2292 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2293 text += f" OMIT EMPTY MATCHES" 2294 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2295 text += f" WITH UNMATCHED ROWS" 2296 rows = exp.var(text) 2297 else: 2298 rows = None 2299 2300 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2301 text = "AFTER MATCH SKIP" 2302 if self._match_text_seq("PAST", "LAST", "ROW"): 2303 text += f" PAST LAST ROW" 2304 elif self._match_text_seq("TO", "NEXT", "ROW"): 2305 text += f" TO NEXT ROW" 2306 elif self._match_text_seq("TO", "FIRST"): 2307 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2308 elif self._match_text_seq("TO", "LAST"): 2309 text += f" TO LAST {self._advance_any().text}" # type: ignore 2310 after = exp.var(text) 2311 else: 2312 after = None 2313 2314 if self._match_text_seq("PATTERN"): 2315 self._match_l_paren() 2316 2317 if not self._curr: 2318 self.raise_error("Expecting )", self._curr) 2319 2320 paren = 1 2321 start = self._curr 2322 2323 while self._curr and paren > 0: 2324 if self._curr.token_type == TokenType.L_PAREN: 2325 paren += 1 2326 if self._curr.token_type == TokenType.R_PAREN: 2327 paren -= 1 2328 2329 end = self._prev 2330 self._advance() 2331 2332 if paren > 0: 2333 self.raise_error("Expecting )", self._curr) 2334 2335 pattern = exp.var(self._find_sql(start, end)) 2336 else: 2337 pattern = None 2338 2339 define = ( 2340 self._parse_csv( 2341 lambda: self.expression( 2342 exp.Alias, 2343 alias=self._parse_id_var(any_token=True), 2344 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2345 ) 2346 ) 2347 if self._match_text_seq("DEFINE") 2348 else None 2349 ) 2350 2351 self._match_r_paren() 2352 2353 return self.expression( 2354 exp.MatchRecognize, 2355 partition_by=partition, 2356 order=order, 2357 measures=measures, 2358 rows=rows, 2359 after=after, 2360 pattern=pattern, 2361 define=define, 2362 alias=self._parse_table_alias(), 2363 ) 2364 2365 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2366 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2367 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2368 2369 if outer_apply or cross_apply: 2370 this = self._parse_select(table=True) 2371 view = None 2372 outer = not cross_apply 2373 elif self._match(TokenType.LATERAL): 2374 this = self._parse_select(table=True) 2375 view = self._match(TokenType.VIEW) 2376 outer = self._match(TokenType.OUTER) 2377 else: 2378 return None 2379 2380 if not this: 2381 this = ( 2382 self._parse_unnest() 2383 or self._parse_function() 2384 or self._parse_id_var(any_token=False) 2385 ) 2386 2387 while self._match(TokenType.DOT): 2388 this = exp.Dot( 2389 this=this, 2390 expression=self._parse_function() or self._parse_id_var(any_token=False), 2391 ) 2392 2393 if view: 2394 table = self._parse_id_var(any_token=False) 2395 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2396 table_alias: t.Optional[exp.TableAlias] = self.expression( 2397 exp.TableAlias, this=table, columns=columns 2398 ) 2399 elif isinstance(this, exp.Subquery) and this.alias: 2400 # Ensures parity between the Subquery's and the Lateral's "alias" args 2401 table_alias = this.args["alias"].copy() 2402 else: 2403 table_alias = self._parse_table_alias() 2404 2405 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2406 2407 def _parse_join_parts( 2408 self, 2409 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2410 return ( 2411 self._match_set(self.JOIN_METHODS) and self._prev, 2412 self._match_set(self.JOIN_SIDES) and self._prev, 2413 self._match_set(self.JOIN_KINDS) and self._prev, 2414 ) 2415 2416 def _parse_join( 2417 self, skip_join_token: bool = False, parse_bracket: bool = False 2418 ) -> t.Optional[exp.Join]: 2419 if self._match(TokenType.COMMA): 2420 return self.expression(exp.Join, this=self._parse_table()) 2421 2422 index = self._index 2423 method, side, kind = self._parse_join_parts() 2424 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2425 join = self._match(TokenType.JOIN) 2426 2427 if not skip_join_token and not join: 2428 self._retreat(index) 2429 kind = None 2430 method = None 2431 side = None 2432 2433 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2434 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2435 2436 if not skip_join_token and not join and not outer_apply and not cross_apply: 2437 return None 2438 2439 if outer_apply: 2440 side = Token(TokenType.LEFT, "LEFT") 2441 2442 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2443 2444 if method: 2445 kwargs["method"] = method.text 2446 if side: 2447 kwargs["side"] = side.text 2448 if kind: 2449 kwargs["kind"] = kind.text 2450 if hint: 2451 kwargs["hint"] = hint 2452 2453 if self._match(TokenType.ON): 2454 kwargs["on"] = self._parse_conjunction() 2455 elif self._match(TokenType.USING): 2456 kwargs["using"] = self._parse_wrapped_id_vars() 2457 elif not (kind and kind.token_type == TokenType.CROSS): 2458 index = self._index 2459 joins = self._parse_joins() 2460 2461 if joins and self._match(TokenType.ON): 2462 kwargs["on"] = self._parse_conjunction() 2463 elif joins and self._match(TokenType.USING): 2464 kwargs["using"] = self._parse_wrapped_id_vars() 2465 else: 2466 joins = None 2467 self._retreat(index) 2468 2469 kwargs["this"].set("joins", joins) 2470 2471 comments = [c for token in (method, side, kind) if token for c in token.comments] 2472 return self.expression(exp.Join, comments=comments, **kwargs) 2473 2474 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2475 this = self._parse_conjunction() 2476 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2477 return this 2478 2479 opclass = self._parse_var(any_token=True) 2480 if opclass: 2481 return self.expression(exp.Opclass, this=this, expression=opclass) 2482 2483 return this 2484 2485 def _parse_index( 2486 self, 2487 index: t.Optional[exp.Expression] = None, 2488 ) -> t.Optional[exp.Index]: 2489 if index: 2490 unique = None 2491 primary = None 2492 amp = None 2493 2494 self._match(TokenType.ON) 2495 self._match(TokenType.TABLE) # hive 2496 table = self._parse_table_parts(schema=True) 2497 else: 2498 unique = self._match(TokenType.UNIQUE) 2499 primary = self._match_text_seq("PRIMARY") 2500 amp = self._match_text_seq("AMP") 2501 2502 if not self._match(TokenType.INDEX): 2503 return None 2504 2505 index = self._parse_id_var() 2506 table = None 2507 2508 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2509 2510 if self._match(TokenType.L_PAREN, advance=False): 2511 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2512 else: 2513 columns = None 2514 2515 return self.expression( 2516 exp.Index, 2517 this=index, 2518 table=table, 2519 using=using, 2520 columns=columns, 2521 unique=unique, 2522 primary=primary, 2523 amp=amp, 2524 partition_by=self._parse_partition_by(), 2525 where=self._parse_where(), 2526 ) 2527 2528 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2529 hints: t.List[exp.Expression] = [] 2530 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2531 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2532 hints.append( 2533 self.expression( 2534 exp.WithTableHint, 2535 expressions=self._parse_csv( 2536 lambda: self._parse_function() or self._parse_var(any_token=True) 2537 ), 2538 ) 2539 ) 2540 self._match_r_paren() 2541 else: 2542 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2543 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2544 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2545 2546 self._match_texts({"INDEX", "KEY"}) 2547 if self._match(TokenType.FOR): 2548 hint.set("target", self._advance_any() and self._prev.text.upper()) 2549 2550 hint.set("expressions", self._parse_wrapped_id_vars()) 2551 hints.append(hint) 2552 2553 return hints or None 2554 2555 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2556 return ( 2557 (not schema and self._parse_function(optional_parens=False)) 2558 or self._parse_id_var(any_token=False) 2559 or self._parse_string_as_identifier() 2560 or self._parse_placeholder() 2561 ) 2562 2563 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2564 catalog = None 2565 db = None 2566 table = self._parse_table_part(schema=schema) 2567 2568 while self._match(TokenType.DOT): 2569 if catalog: 2570 # This allows nesting the table in arbitrarily many dot expressions if needed 2571 table = self.expression( 2572 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2573 ) 2574 else: 2575 catalog = db 2576 db = table 2577 table = self._parse_table_part(schema=schema) 2578 2579 if not table: 2580 self.raise_error(f"Expected table name but got {self._curr}") 2581 2582 return self.expression( 2583 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2584 ) 2585 2586 def _parse_table( 2587 self, 2588 schema: bool = False, 2589 joins: bool = False, 2590 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2591 parse_bracket: bool = False, 2592 ) -> t.Optional[exp.Expression]: 2593 lateral = self._parse_lateral() 2594 if lateral: 2595 return lateral 2596 2597 unnest = self._parse_unnest() 2598 if unnest: 2599 return unnest 2600 2601 values = self._parse_derived_table_values() 2602 if values: 2603 return values 2604 2605 subquery = self._parse_select(table=True) 2606 if subquery: 2607 if not subquery.args.get("pivots"): 2608 subquery.set("pivots", self._parse_pivots()) 2609 return subquery 2610 2611 bracket = parse_bracket and self._parse_bracket(None) 2612 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2613 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2614 2615 if schema: 2616 return self._parse_schema(this=this) 2617 2618 version = self._parse_version() 2619 2620 if version: 2621 this.set("version", version) 2622 2623 if self.ALIAS_POST_TABLESAMPLE: 2624 table_sample = self._parse_table_sample() 2625 2626 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2627 if alias: 2628 this.set("alias", alias) 2629 2630 this.set("hints", self._parse_table_hints()) 2631 2632 if not this.args.get("pivots"): 2633 this.set("pivots", self._parse_pivots()) 2634 2635 if not self.ALIAS_POST_TABLESAMPLE: 2636 table_sample = self._parse_table_sample() 2637 2638 if table_sample: 2639 table_sample.set("this", this) 2640 this = table_sample 2641 2642 if joins: 2643 for join in iter(self._parse_join, None): 2644 this.append("joins", join) 2645 2646 return this 2647 2648 def _parse_version(self) -> t.Optional[exp.Version]: 2649 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2650 this = "TIMESTAMP" 2651 elif self._match(TokenType.VERSION_SNAPSHOT): 2652 this = "VERSION" 2653 else: 2654 return None 2655 2656 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2657 kind = self._prev.text.upper() 2658 start = self._parse_bitwise() 2659 self._match_texts(("TO", "AND")) 2660 end = self._parse_bitwise() 2661 expression: t.Optional[exp.Expression] = self.expression( 2662 exp.Tuple, expressions=[start, end] 2663 ) 2664 elif self._match_text_seq("CONTAINED", "IN"): 2665 kind = "CONTAINED IN" 2666 expression = self.expression( 2667 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2668 ) 2669 elif self._match(TokenType.ALL): 2670 kind = "ALL" 2671 expression = None 2672 else: 2673 self._match_text_seq("AS", "OF") 2674 kind = "AS OF" 2675 expression = self._parse_type() 2676 2677 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2678 2679 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2680 if not self._match(TokenType.UNNEST): 2681 return None 2682 2683 expressions = self._parse_wrapped_csv(self._parse_type) 2684 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2685 2686 alias = self._parse_table_alias() if with_alias else None 2687 2688 if alias: 2689 if self.UNNEST_COLUMN_ONLY: 2690 if alias.args.get("columns"): 2691 self.raise_error("Unexpected extra column alias in unnest.") 2692 2693 alias.set("columns", [alias.this]) 2694 alias.set("this", None) 2695 2696 columns = alias.args.get("columns") or [] 2697 if offset and len(expressions) < len(columns): 2698 offset = columns.pop() 2699 2700 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2701 self._match(TokenType.ALIAS) 2702 offset = self._parse_id_var() or exp.to_identifier("offset") 2703 2704 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2705 2706 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2707 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2708 if not is_derived and not self._match(TokenType.VALUES): 2709 return None 2710 2711 expressions = self._parse_csv(self._parse_value) 2712 alias = self._parse_table_alias() 2713 2714 if is_derived: 2715 self._match_r_paren() 2716 2717 return self.expression( 2718 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2719 ) 2720 2721 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2722 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2723 as_modifier and self._match_text_seq("USING", "SAMPLE") 2724 ): 2725 return None 2726 2727 bucket_numerator = None 2728 bucket_denominator = None 2729 bucket_field = None 2730 percent = None 2731 rows = None 2732 size = None 2733 seed = None 2734 2735 kind = ( 2736 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2737 ) 2738 method = self._parse_var(tokens=(TokenType.ROW,)) 2739 2740 matched_l_paren = self._match(TokenType.L_PAREN) 2741 2742 if self.TABLESAMPLE_CSV: 2743 num = None 2744 expressions = self._parse_csv(self._parse_primary) 2745 else: 2746 expressions = None 2747 num = ( 2748 self._parse_factor() 2749 if self._match(TokenType.NUMBER, advance=False) 2750 else self._parse_primary() 2751 ) 2752 2753 if self._match_text_seq("BUCKET"): 2754 bucket_numerator = self._parse_number() 2755 self._match_text_seq("OUT", "OF") 2756 bucket_denominator = bucket_denominator = self._parse_number() 2757 self._match(TokenType.ON) 2758 bucket_field = self._parse_field() 2759 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2760 percent = num 2761 elif self._match(TokenType.ROWS): 2762 rows = num 2763 elif num: 2764 size = num 2765 2766 if matched_l_paren: 2767 self._match_r_paren() 2768 2769 if self._match(TokenType.L_PAREN): 2770 method = self._parse_var() 2771 seed = self._match(TokenType.COMMA) and self._parse_number() 2772 self._match_r_paren() 2773 elif self._match_texts(("SEED", "REPEATABLE")): 2774 seed = self._parse_wrapped(self._parse_number) 2775 2776 return self.expression( 2777 exp.TableSample, 2778 expressions=expressions, 2779 method=method, 2780 bucket_numerator=bucket_numerator, 2781 bucket_denominator=bucket_denominator, 2782 bucket_field=bucket_field, 2783 percent=percent, 2784 rows=rows, 2785 size=size, 2786 seed=seed, 2787 kind=kind, 2788 ) 2789 2790 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2791 return list(iter(self._parse_pivot, None)) or None 2792 2793 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2794 return list(iter(self._parse_join, None)) or None 2795 2796 # https://duckdb.org/docs/sql/statements/pivot 2797 def _parse_simplified_pivot(self) -> exp.Pivot: 2798 def _parse_on() -> t.Optional[exp.Expression]: 2799 this = self._parse_bitwise() 2800 return self._parse_in(this) if self._match(TokenType.IN) else this 2801 2802 this = self._parse_table() 2803 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2804 using = self._match(TokenType.USING) and self._parse_csv( 2805 lambda: self._parse_alias(self._parse_function()) 2806 ) 2807 group = self._parse_group() 2808 return self.expression( 2809 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2810 ) 2811 2812 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2813 index = self._index 2814 include_nulls = None 2815 2816 if self._match(TokenType.PIVOT): 2817 unpivot = False 2818 elif self._match(TokenType.UNPIVOT): 2819 unpivot = True 2820 2821 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2822 if self._match_text_seq("INCLUDE", "NULLS"): 2823 include_nulls = True 2824 elif self._match_text_seq("EXCLUDE", "NULLS"): 2825 include_nulls = False 2826 else: 2827 return None 2828 2829 expressions = [] 2830 field = None 2831 2832 if not self._match(TokenType.L_PAREN): 2833 self._retreat(index) 2834 return None 2835 2836 if unpivot: 2837 expressions = self._parse_csv(self._parse_column) 2838 else: 2839 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2840 2841 if not expressions: 2842 self.raise_error("Failed to parse PIVOT's aggregation list") 2843 2844 if not self._match(TokenType.FOR): 2845 self.raise_error("Expecting FOR") 2846 2847 value = self._parse_column() 2848 2849 if not self._match(TokenType.IN): 2850 self.raise_error("Expecting IN") 2851 2852 field = self._parse_in(value, alias=True) 2853 2854 self._match_r_paren() 2855 2856 pivot = self.expression( 2857 exp.Pivot, 2858 expressions=expressions, 2859 field=field, 2860 unpivot=unpivot, 2861 include_nulls=include_nulls, 2862 ) 2863 2864 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2865 pivot.set("alias", self._parse_table_alias()) 2866 2867 if not unpivot: 2868 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2869 2870 columns: t.List[exp.Expression] = [] 2871 for fld in pivot.args["field"].expressions: 2872 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2873 for name in names: 2874 if self.PREFIXED_PIVOT_COLUMNS: 2875 name = f"{name}_{field_name}" if name else field_name 2876 else: 2877 name = f"{field_name}_{name}" if name else field_name 2878 2879 columns.append(exp.to_identifier(name)) 2880 2881 pivot.set("columns", columns) 2882 2883 return pivot 2884 2885 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2886 return [agg.alias for agg in aggregations] 2887 2888 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2889 if not skip_where_token and not self._match(TokenType.WHERE): 2890 return None 2891 2892 return self.expression( 2893 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2894 ) 2895 2896 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2897 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2898 return None 2899 2900 elements = defaultdict(list) 2901 2902 if self._match(TokenType.ALL): 2903 return self.expression(exp.Group, all=True) 2904 2905 while True: 2906 expressions = self._parse_csv(self._parse_conjunction) 2907 if expressions: 2908 elements["expressions"].extend(expressions) 2909 2910 grouping_sets = self._parse_grouping_sets() 2911 if grouping_sets: 2912 elements["grouping_sets"].extend(grouping_sets) 2913 2914 rollup = None 2915 cube = None 2916 totals = None 2917 2918 with_ = self._match(TokenType.WITH) 2919 if self._match(TokenType.ROLLUP): 2920 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2921 elements["rollup"].extend(ensure_list(rollup)) 2922 2923 if self._match(TokenType.CUBE): 2924 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2925 elements["cube"].extend(ensure_list(cube)) 2926 2927 if self._match_text_seq("TOTALS"): 2928 totals = True 2929 elements["totals"] = True # type: ignore 2930 2931 if not (grouping_sets or rollup or cube or totals): 2932 break 2933 2934 return self.expression(exp.Group, **elements) # type: ignore 2935 2936 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2937 if not self._match(TokenType.GROUPING_SETS): 2938 return None 2939 2940 return self._parse_wrapped_csv(self._parse_grouping_set) 2941 2942 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2943 if self._match(TokenType.L_PAREN): 2944 grouping_set = self._parse_csv(self._parse_column) 2945 self._match_r_paren() 2946 return self.expression(exp.Tuple, expressions=grouping_set) 2947 2948 return self._parse_column() 2949 2950 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2951 if not skip_having_token and not self._match(TokenType.HAVING): 2952 return None 2953 return self.expression(exp.Having, this=self._parse_conjunction()) 2954 2955 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2956 if not self._match(TokenType.QUALIFY): 2957 return None 2958 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2959 2960 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2961 if skip_start_token: 2962 start = None 2963 elif self._match(TokenType.START_WITH): 2964 start = self._parse_conjunction() 2965 else: 2966 return None 2967 2968 self._match(TokenType.CONNECT_BY) 2969 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2970 exp.Prior, this=self._parse_bitwise() 2971 ) 2972 connect = self._parse_conjunction() 2973 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2974 2975 if not start and self._match(TokenType.START_WITH): 2976 start = self._parse_conjunction() 2977 2978 return self.expression(exp.Connect, start=start, connect=connect) 2979 2980 def _parse_order( 2981 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2982 ) -> t.Optional[exp.Expression]: 2983 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2984 return this 2985 2986 return self.expression( 2987 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2988 ) 2989 2990 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2991 if not self._match(token): 2992 return None 2993 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2994 2995 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 2996 this = parse_method() if parse_method else self._parse_conjunction() 2997 2998 asc = self._match(TokenType.ASC) 2999 desc = self._match(TokenType.DESC) or (asc and False) 3000 3001 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3002 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3003 3004 nulls_first = is_nulls_first or False 3005 explicitly_null_ordered = is_nulls_first or is_nulls_last 3006 3007 if ( 3008 not explicitly_null_ordered 3009 and ( 3010 (not desc and self.NULL_ORDERING == "nulls_are_small") 3011 or (desc and self.NULL_ORDERING != "nulls_are_small") 3012 ) 3013 and self.NULL_ORDERING != "nulls_are_last" 3014 ): 3015 nulls_first = True 3016 3017 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3018 3019 def _parse_limit( 3020 self, this: t.Optional[exp.Expression] = None, top: bool = False 3021 ) -> t.Optional[exp.Expression]: 3022 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3023 comments = self._prev_comments 3024 if top: 3025 limit_paren = self._match(TokenType.L_PAREN) 3026 expression = self._parse_number() 3027 3028 if limit_paren: 3029 self._match_r_paren() 3030 else: 3031 expression = self._parse_term() 3032 3033 if self._match(TokenType.COMMA): 3034 offset = expression 3035 expression = self._parse_term() 3036 else: 3037 offset = None 3038 3039 limit_exp = self.expression( 3040 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3041 ) 3042 3043 return limit_exp 3044 3045 if self._match(TokenType.FETCH): 3046 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3047 direction = self._prev.text if direction else "FIRST" 3048 3049 count = self._parse_field(tokens=self.FETCH_TOKENS) 3050 percent = self._match(TokenType.PERCENT) 3051 3052 self._match_set((TokenType.ROW, TokenType.ROWS)) 3053 3054 only = self._match_text_seq("ONLY") 3055 with_ties = self._match_text_seq("WITH", "TIES") 3056 3057 if only and with_ties: 3058 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3059 3060 return self.expression( 3061 exp.Fetch, 3062 direction=direction, 3063 count=count, 3064 percent=percent, 3065 with_ties=with_ties, 3066 ) 3067 3068 return this 3069 3070 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3071 if not self._match(TokenType.OFFSET): 3072 return this 3073 3074 count = self._parse_term() 3075 self._match_set((TokenType.ROW, TokenType.ROWS)) 3076 return self.expression(exp.Offset, this=this, expression=count) 3077 3078 def _parse_locks(self) -> t.List[exp.Lock]: 3079 locks = [] 3080 while True: 3081 if self._match_text_seq("FOR", "UPDATE"): 3082 update = True 3083 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3084 "LOCK", "IN", "SHARE", "MODE" 3085 ): 3086 update = False 3087 else: 3088 break 3089 3090 expressions = None 3091 if self._match_text_seq("OF"): 3092 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3093 3094 wait: t.Optional[bool | exp.Expression] = None 3095 if self._match_text_seq("NOWAIT"): 3096 wait = True 3097 elif self._match_text_seq("WAIT"): 3098 wait = self._parse_primary() 3099 elif self._match_text_seq("SKIP", "LOCKED"): 3100 wait = False 3101 3102 locks.append( 3103 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3104 ) 3105 3106 return locks 3107 3108 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3109 if not self._match_set(self.SET_OPERATIONS): 3110 return this 3111 3112 token_type = self._prev.token_type 3113 3114 if token_type == TokenType.UNION: 3115 expression = exp.Union 3116 elif token_type == TokenType.EXCEPT: 3117 expression = exp.Except 3118 else: 3119 expression = exp.Intersect 3120 3121 return self.expression( 3122 expression, 3123 this=this, 3124 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3125 by_name=self._match_text_seq("BY", "NAME"), 3126 expression=self._parse_set_operations(self._parse_select(nested=True)), 3127 ) 3128 3129 def _parse_expression(self) -> t.Optional[exp.Expression]: 3130 return self._parse_alias(self._parse_conjunction()) 3131 3132 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3133 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3134 3135 def _parse_equality(self) -> t.Optional[exp.Expression]: 3136 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3137 3138 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3139 return self._parse_tokens(self._parse_range, self.COMPARISON) 3140 3141 def _parse_range(self) -> t.Optional[exp.Expression]: 3142 this = self._parse_bitwise() 3143 negate = self._match(TokenType.NOT) 3144 3145 if self._match_set(self.RANGE_PARSERS): 3146 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3147 if not expression: 3148 return this 3149 3150 this = expression 3151 elif self._match(TokenType.ISNULL): 3152 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3153 3154 # Postgres supports ISNULL and NOTNULL for conditions. 3155 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3156 if self._match(TokenType.NOTNULL): 3157 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3158 this = self.expression(exp.Not, this=this) 3159 3160 if negate: 3161 this = self.expression(exp.Not, this=this) 3162 3163 if self._match(TokenType.IS): 3164 this = self._parse_is(this) 3165 3166 return this 3167 3168 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3169 index = self._index - 1 3170 negate = self._match(TokenType.NOT) 3171 3172 if self._match_text_seq("DISTINCT", "FROM"): 3173 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3174 return self.expression(klass, this=this, expression=self._parse_expression()) 3175 3176 expression = self._parse_null() or self._parse_boolean() 3177 if not expression: 3178 self._retreat(index) 3179 return None 3180 3181 this = self.expression(exp.Is, this=this, expression=expression) 3182 return self.expression(exp.Not, this=this) if negate else this 3183 3184 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3185 unnest = self._parse_unnest(with_alias=False) 3186 if unnest: 3187 this = self.expression(exp.In, this=this, unnest=unnest) 3188 elif self._match(TokenType.L_PAREN): 3189 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3190 3191 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3192 this = self.expression(exp.In, this=this, query=expressions[0]) 3193 else: 3194 this = self.expression(exp.In, this=this, expressions=expressions) 3195 3196 self._match_r_paren(this) 3197 else: 3198 this = self.expression(exp.In, this=this, field=self._parse_field()) 3199 3200 return this 3201 3202 def _parse_between(self, this: exp.Expression) -> exp.Between: 3203 low = self._parse_bitwise() 3204 self._match(TokenType.AND) 3205 high = self._parse_bitwise() 3206 return self.expression(exp.Between, this=this, low=low, high=high) 3207 3208 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3209 if not self._match(TokenType.ESCAPE): 3210 return this 3211 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3212 3213 def _parse_interval(self) -> t.Optional[exp.Interval]: 3214 index = self._index 3215 3216 if not self._match(TokenType.INTERVAL): 3217 return None 3218 3219 if self._match(TokenType.STRING, advance=False): 3220 this = self._parse_primary() 3221 else: 3222 this = self._parse_term() 3223 3224 if not this: 3225 self._retreat(index) 3226 return None 3227 3228 unit = self._parse_function() or self._parse_var(any_token=True) 3229 3230 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3231 # each INTERVAL expression into this canonical form so it's easy to transpile 3232 if this and this.is_number: 3233 this = exp.Literal.string(this.name) 3234 elif this and this.is_string: 3235 parts = this.name.split() 3236 3237 if len(parts) == 2: 3238 if unit: 3239 # This is not actually a unit, it's something else (e.g. a "window side") 3240 unit = None 3241 self._retreat(self._index - 1) 3242 3243 this = exp.Literal.string(parts[0]) 3244 unit = self.expression(exp.Var, this=parts[1]) 3245 3246 return self.expression(exp.Interval, this=this, unit=unit) 3247 3248 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3249 this = self._parse_term() 3250 3251 while True: 3252 if self._match_set(self.BITWISE): 3253 this = self.expression( 3254 self.BITWISE[self._prev.token_type], 3255 this=this, 3256 expression=self._parse_term(), 3257 ) 3258 elif self._match(TokenType.DQMARK): 3259 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3260 elif self._match_pair(TokenType.LT, TokenType.LT): 3261 this = self.expression( 3262 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3263 ) 3264 elif self._match_pair(TokenType.GT, TokenType.GT): 3265 this = self.expression( 3266 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3267 ) 3268 else: 3269 break 3270 3271 return this 3272 3273 def _parse_term(self) -> t.Optional[exp.Expression]: 3274 return self._parse_tokens(self._parse_factor, self.TERM) 3275 3276 def _parse_factor(self) -> t.Optional[exp.Expression]: 3277 return self._parse_tokens(self._parse_unary, self.FACTOR) 3278 3279 def _parse_unary(self) -> t.Optional[exp.Expression]: 3280 if self._match_set(self.UNARY_PARSERS): 3281 return self.UNARY_PARSERS[self._prev.token_type](self) 3282 return self._parse_at_time_zone(self._parse_type()) 3283 3284 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3285 interval = parse_interval and self._parse_interval() 3286 if interval: 3287 return interval 3288 3289 index = self._index 3290 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3291 this = self._parse_column() 3292 3293 if data_type: 3294 if isinstance(this, exp.Literal): 3295 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3296 if parser: 3297 return parser(self, this, data_type) 3298 return self.expression(exp.Cast, this=this, to=data_type) 3299 if not data_type.expressions: 3300 self._retreat(index) 3301 return self._parse_column() 3302 return self._parse_column_ops(data_type) 3303 3304 return this and self._parse_column_ops(this) 3305 3306 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3307 this = self._parse_type() 3308 if not this: 3309 return None 3310 3311 return self.expression( 3312 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3313 ) 3314 3315 def _parse_types( 3316 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3317 ) -> t.Optional[exp.Expression]: 3318 index = self._index 3319 3320 prefix = self._match_text_seq("SYSUDTLIB", ".") 3321 3322 if not self._match_set(self.TYPE_TOKENS): 3323 identifier = allow_identifiers and self._parse_id_var( 3324 any_token=False, tokens=(TokenType.VAR,) 3325 ) 3326 3327 if identifier: 3328 tokens = self._tokenizer.tokenize(identifier.name) 3329 3330 if len(tokens) != 1: 3331 self.raise_error("Unexpected identifier", self._prev) 3332 3333 if tokens[0].token_type in self.TYPE_TOKENS: 3334 self._prev = tokens[0] 3335 elif self.SUPPORTS_USER_DEFINED_TYPES: 3336 type_name = identifier.name 3337 3338 while self._match(TokenType.DOT): 3339 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3340 3341 return exp.DataType.build(type_name, udt=True) 3342 else: 3343 return None 3344 else: 3345 return None 3346 3347 type_token = self._prev.token_type 3348 3349 if type_token == TokenType.PSEUDO_TYPE: 3350 return self.expression(exp.PseudoType, this=self._prev.text) 3351 3352 if type_token == TokenType.OBJECT_IDENTIFIER: 3353 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3354 3355 nested = type_token in self.NESTED_TYPE_TOKENS 3356 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3357 expressions = None 3358 maybe_func = False 3359 3360 if self._match(TokenType.L_PAREN): 3361 if is_struct: 3362 expressions = self._parse_csv(self._parse_struct_types) 3363 elif nested: 3364 expressions = self._parse_csv( 3365 lambda: self._parse_types( 3366 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3367 ) 3368 ) 3369 elif type_token in self.ENUM_TYPE_TOKENS: 3370 expressions = self._parse_csv(self._parse_equality) 3371 else: 3372 expressions = self._parse_csv(self._parse_type_size) 3373 3374 if not expressions or not self._match(TokenType.R_PAREN): 3375 self._retreat(index) 3376 return None 3377 3378 maybe_func = True 3379 3380 this: t.Optional[exp.Expression] = None 3381 values: t.Optional[t.List[exp.Expression]] = None 3382 3383 if nested and self._match(TokenType.LT): 3384 if is_struct: 3385 expressions = self._parse_csv(self._parse_struct_types) 3386 else: 3387 expressions = self._parse_csv( 3388 lambda: self._parse_types( 3389 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3390 ) 3391 ) 3392 3393 if not self._match(TokenType.GT): 3394 self.raise_error("Expecting >") 3395 3396 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3397 values = self._parse_csv(self._parse_conjunction) 3398 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3399 3400 if type_token in self.TIMESTAMPS: 3401 if self._match_text_seq("WITH", "TIME", "ZONE"): 3402 maybe_func = False 3403 tz_type = ( 3404 exp.DataType.Type.TIMETZ 3405 if type_token in self.TIMES 3406 else exp.DataType.Type.TIMESTAMPTZ 3407 ) 3408 this = exp.DataType(this=tz_type, expressions=expressions) 3409 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3410 maybe_func = False 3411 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3412 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3413 maybe_func = False 3414 elif type_token == TokenType.INTERVAL: 3415 unit = self._parse_var() 3416 3417 if self._match_text_seq("TO"): 3418 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3419 else: 3420 span = None 3421 3422 if span or not unit: 3423 this = self.expression( 3424 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3425 ) 3426 else: 3427 this = self.expression(exp.Interval, unit=unit) 3428 3429 if maybe_func and check_func: 3430 index2 = self._index 3431 peek = self._parse_string() 3432 3433 if not peek: 3434 self._retreat(index) 3435 return None 3436 3437 self._retreat(index2) 3438 3439 if not this: 3440 if self._match_text_seq("UNSIGNED"): 3441 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3442 if not unsigned_type_token: 3443 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3444 3445 type_token = unsigned_type_token or type_token 3446 3447 this = exp.DataType( 3448 this=exp.DataType.Type[type_token.value], 3449 expressions=expressions, 3450 nested=nested, 3451 values=values, 3452 prefix=prefix, 3453 ) 3454 3455 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3456 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3457 3458 return this 3459 3460 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3461 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3462 self._match(TokenType.COLON) 3463 return self._parse_column_def(this) 3464 3465 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3466 if not self._match_text_seq("AT", "TIME", "ZONE"): 3467 return this 3468 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3469 3470 def _parse_column(self) -> t.Optional[exp.Expression]: 3471 this = self._parse_field() 3472 if isinstance(this, exp.Identifier): 3473 this = self.expression(exp.Column, this=this) 3474 elif not this: 3475 return self._parse_bracket(this) 3476 return self._parse_column_ops(this) 3477 3478 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3479 this = self._parse_bracket(this) 3480 3481 while self._match_set(self.COLUMN_OPERATORS): 3482 op_token = self._prev.token_type 3483 op = self.COLUMN_OPERATORS.get(op_token) 3484 3485 if op_token == TokenType.DCOLON: 3486 field = self._parse_types() 3487 if not field: 3488 self.raise_error("Expected type") 3489 elif op and self._curr: 3490 self._advance() 3491 value = self._prev.text 3492 field = ( 3493 exp.Literal.number(value) 3494 if self._prev.token_type == TokenType.NUMBER 3495 else exp.Literal.string(value) 3496 ) 3497 else: 3498 field = self._parse_field(anonymous_func=True, any_token=True) 3499 3500 if isinstance(field, exp.Func): 3501 # bigquery allows function calls like x.y.count(...) 3502 # SAFE.SUBSTR(...) 3503 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3504 this = self._replace_columns_with_dots(this) 3505 3506 if op: 3507 this = op(self, this, field) 3508 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3509 this = self.expression( 3510 exp.Column, 3511 this=field, 3512 table=this.this, 3513 db=this.args.get("table"), 3514 catalog=this.args.get("db"), 3515 ) 3516 else: 3517 this = self.expression(exp.Dot, this=this, expression=field) 3518 this = self._parse_bracket(this) 3519 return this 3520 3521 def _parse_primary(self) -> t.Optional[exp.Expression]: 3522 if self._match_set(self.PRIMARY_PARSERS): 3523 token_type = self._prev.token_type 3524 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3525 3526 if token_type == TokenType.STRING: 3527 expressions = [primary] 3528 while self._match(TokenType.STRING): 3529 expressions.append(exp.Literal.string(self._prev.text)) 3530 3531 if len(expressions) > 1: 3532 return self.expression(exp.Concat, expressions=expressions) 3533 3534 return primary 3535 3536 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3537 return exp.Literal.number(f"0.{self._prev.text}") 3538 3539 if self._match(TokenType.L_PAREN): 3540 comments = self._prev_comments 3541 query = self._parse_select() 3542 3543 if query: 3544 expressions = [query] 3545 else: 3546 expressions = self._parse_expressions() 3547 3548 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3549 3550 if isinstance(this, exp.Subqueryable): 3551 this = self._parse_set_operations( 3552 self._parse_subquery(this=this, parse_alias=False) 3553 ) 3554 elif len(expressions) > 1: 3555 this = self.expression(exp.Tuple, expressions=expressions) 3556 else: 3557 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3558 3559 if this: 3560 this.add_comments(comments) 3561 3562 self._match_r_paren(expression=this) 3563 return this 3564 3565 return None 3566 3567 def _parse_field( 3568 self, 3569 any_token: bool = False, 3570 tokens: t.Optional[t.Collection[TokenType]] = None, 3571 anonymous_func: bool = False, 3572 ) -> t.Optional[exp.Expression]: 3573 return ( 3574 self._parse_primary() 3575 or self._parse_function(anonymous=anonymous_func) 3576 or self._parse_id_var(any_token=any_token, tokens=tokens) 3577 ) 3578 3579 def _parse_function( 3580 self, 3581 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3582 anonymous: bool = False, 3583 optional_parens: bool = True, 3584 ) -> t.Optional[exp.Expression]: 3585 if not self._curr: 3586 return None 3587 3588 token_type = self._curr.token_type 3589 this = self._curr.text 3590 upper = this.upper() 3591 3592 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3593 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3594 self._advance() 3595 return parser(self) 3596 3597 if not self._next or self._next.token_type != TokenType.L_PAREN: 3598 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3599 self._advance() 3600 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3601 3602 return None 3603 3604 if token_type not in self.FUNC_TOKENS: 3605 return None 3606 3607 self._advance(2) 3608 3609 parser = self.FUNCTION_PARSERS.get(upper) 3610 if parser and not anonymous: 3611 this = parser(self) 3612 else: 3613 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3614 3615 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3616 this = self.expression(subquery_predicate, this=self._parse_select()) 3617 self._match_r_paren() 3618 return this 3619 3620 if functions is None: 3621 functions = self.FUNCTIONS 3622 3623 function = functions.get(upper) 3624 3625 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3626 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3627 3628 if function and not anonymous: 3629 func = self.validate_expression(function(args), args) 3630 if not self.NORMALIZE_FUNCTIONS: 3631 func.meta["name"] = this 3632 this = func 3633 else: 3634 this = self.expression(exp.Anonymous, this=this, expressions=args) 3635 3636 self._match_r_paren(this) 3637 return self._parse_window(this) 3638 3639 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3640 return self._parse_column_def(self._parse_id_var()) 3641 3642 def _parse_user_defined_function( 3643 self, kind: t.Optional[TokenType] = None 3644 ) -> t.Optional[exp.Expression]: 3645 this = self._parse_id_var() 3646 3647 while self._match(TokenType.DOT): 3648 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3649 3650 if not self._match(TokenType.L_PAREN): 3651 return this 3652 3653 expressions = self._parse_csv(self._parse_function_parameter) 3654 self._match_r_paren() 3655 return self.expression( 3656 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3657 ) 3658 3659 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3660 literal = self._parse_primary() 3661 if literal: 3662 return self.expression(exp.Introducer, this=token.text, expression=literal) 3663 3664 return self.expression(exp.Identifier, this=token.text) 3665 3666 def _parse_session_parameter(self) -> exp.SessionParameter: 3667 kind = None 3668 this = self._parse_id_var() or self._parse_primary() 3669 3670 if this and self._match(TokenType.DOT): 3671 kind = this.name 3672 this = self._parse_var() or self._parse_primary() 3673 3674 return self.expression(exp.SessionParameter, this=this, kind=kind) 3675 3676 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3677 index = self._index 3678 3679 if self._match(TokenType.L_PAREN): 3680 expressions = t.cast( 3681 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3682 ) 3683 3684 if not self._match(TokenType.R_PAREN): 3685 self._retreat(index) 3686 else: 3687 expressions = [self._parse_id_var()] 3688 3689 if self._match_set(self.LAMBDAS): 3690 return self.LAMBDAS[self._prev.token_type](self, expressions) 3691 3692 self._retreat(index) 3693 3694 this: t.Optional[exp.Expression] 3695 3696 if self._match(TokenType.DISTINCT): 3697 this = self.expression( 3698 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3699 ) 3700 else: 3701 this = self._parse_select_or_expression(alias=alias) 3702 3703 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3704 3705 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3706 index = self._index 3707 3708 if not self.errors: 3709 try: 3710 if self._parse_select(nested=True): 3711 return this 3712 except ParseError: 3713 pass 3714 finally: 3715 self.errors.clear() 3716 self._retreat(index) 3717 3718 if not self._match(TokenType.L_PAREN): 3719 return this 3720 3721 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3722 3723 self._match_r_paren() 3724 return self.expression(exp.Schema, this=this, expressions=args) 3725 3726 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3727 return self._parse_column_def(self._parse_field(any_token=True)) 3728 3729 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3730 # column defs are not really columns, they're identifiers 3731 if isinstance(this, exp.Column): 3732 this = this.this 3733 3734 kind = self._parse_types(schema=True) 3735 3736 if self._match_text_seq("FOR", "ORDINALITY"): 3737 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3738 3739 constraints: t.List[exp.Expression] = [] 3740 3741 if not kind and self._match(TokenType.ALIAS): 3742 constraints.append( 3743 self.expression( 3744 exp.ComputedColumnConstraint, 3745 this=self._parse_conjunction(), 3746 persisted=self._match_text_seq("PERSISTED"), 3747 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3748 ) 3749 ) 3750 3751 while True: 3752 constraint = self._parse_column_constraint() 3753 if not constraint: 3754 break 3755 constraints.append(constraint) 3756 3757 if not kind and not constraints: 3758 return this 3759 3760 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3761 3762 def _parse_auto_increment( 3763 self, 3764 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3765 start = None 3766 increment = None 3767 3768 if self._match(TokenType.L_PAREN, advance=False): 3769 args = self._parse_wrapped_csv(self._parse_bitwise) 3770 start = seq_get(args, 0) 3771 increment = seq_get(args, 1) 3772 elif self._match_text_seq("START"): 3773 start = self._parse_bitwise() 3774 self._match_text_seq("INCREMENT") 3775 increment = self._parse_bitwise() 3776 3777 if start and increment: 3778 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3779 3780 return exp.AutoIncrementColumnConstraint() 3781 3782 def _parse_compress(self) -> exp.CompressColumnConstraint: 3783 if self._match(TokenType.L_PAREN, advance=False): 3784 return self.expression( 3785 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3786 ) 3787 3788 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3789 3790 def _parse_generated_as_identity( 3791 self, 3792 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3793 if self._match_text_seq("BY", "DEFAULT"): 3794 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3795 this = self.expression( 3796 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3797 ) 3798 else: 3799 self._match_text_seq("ALWAYS") 3800 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3801 3802 self._match(TokenType.ALIAS) 3803 identity = self._match_text_seq("IDENTITY") 3804 3805 if self._match(TokenType.L_PAREN): 3806 if self._match(TokenType.START_WITH): 3807 this.set("start", self._parse_bitwise()) 3808 if self._match_text_seq("INCREMENT", "BY"): 3809 this.set("increment", self._parse_bitwise()) 3810 if self._match_text_seq("MINVALUE"): 3811 this.set("minvalue", self._parse_bitwise()) 3812 if self._match_text_seq("MAXVALUE"): 3813 this.set("maxvalue", self._parse_bitwise()) 3814 3815 if self._match_text_seq("CYCLE"): 3816 this.set("cycle", True) 3817 elif self._match_text_seq("NO", "CYCLE"): 3818 this.set("cycle", False) 3819 3820 if not identity: 3821 this.set("expression", self._parse_bitwise()) 3822 3823 self._match_r_paren() 3824 3825 return this 3826 3827 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3828 self._match_text_seq("LENGTH") 3829 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3830 3831 def _parse_not_constraint( 3832 self, 3833 ) -> t.Optional[exp.Expression]: 3834 if self._match_text_seq("NULL"): 3835 return self.expression(exp.NotNullColumnConstraint) 3836 if self._match_text_seq("CASESPECIFIC"): 3837 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3838 if self._match_text_seq("FOR", "REPLICATION"): 3839 return self.expression(exp.NotForReplicationColumnConstraint) 3840 return None 3841 3842 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3843 if self._match(TokenType.CONSTRAINT): 3844 this = self._parse_id_var() 3845 else: 3846 this = None 3847 3848 if self._match_texts(self.CONSTRAINT_PARSERS): 3849 return self.expression( 3850 exp.ColumnConstraint, 3851 this=this, 3852 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3853 ) 3854 3855 return this 3856 3857 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3858 if not self._match(TokenType.CONSTRAINT): 3859 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3860 3861 this = self._parse_id_var() 3862 expressions = [] 3863 3864 while True: 3865 constraint = self._parse_unnamed_constraint() or self._parse_function() 3866 if not constraint: 3867 break 3868 expressions.append(constraint) 3869 3870 return self.expression(exp.Constraint, this=this, expressions=expressions) 3871 3872 def _parse_unnamed_constraint( 3873 self, constraints: t.Optional[t.Collection[str]] = None 3874 ) -> t.Optional[exp.Expression]: 3875 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3876 return None 3877 3878 constraint = self._prev.text.upper() 3879 if constraint not in self.CONSTRAINT_PARSERS: 3880 self.raise_error(f"No parser found for schema constraint {constraint}.") 3881 3882 return self.CONSTRAINT_PARSERS[constraint](self) 3883 3884 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3885 self._match_text_seq("KEY") 3886 return self.expression( 3887 exp.UniqueColumnConstraint, 3888 this=self._parse_schema(self._parse_id_var(any_token=False)), 3889 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3890 ) 3891 3892 def _parse_key_constraint_options(self) -> t.List[str]: 3893 options = [] 3894 while True: 3895 if not self._curr: 3896 break 3897 3898 if self._match(TokenType.ON): 3899 action = None 3900 on = self._advance_any() and self._prev.text 3901 3902 if self._match_text_seq("NO", "ACTION"): 3903 action = "NO ACTION" 3904 elif self._match_text_seq("CASCADE"): 3905 action = "CASCADE" 3906 elif self._match_text_seq("RESTRICT"): 3907 action = "RESTRICT" 3908 elif self._match_pair(TokenType.SET, TokenType.NULL): 3909 action = "SET NULL" 3910 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3911 action = "SET DEFAULT" 3912 else: 3913 self.raise_error("Invalid key constraint") 3914 3915 options.append(f"ON {on} {action}") 3916 elif self._match_text_seq("NOT", "ENFORCED"): 3917 options.append("NOT ENFORCED") 3918 elif self._match_text_seq("DEFERRABLE"): 3919 options.append("DEFERRABLE") 3920 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3921 options.append("INITIALLY DEFERRED") 3922 elif self._match_text_seq("NORELY"): 3923 options.append("NORELY") 3924 elif self._match_text_seq("MATCH", "FULL"): 3925 options.append("MATCH FULL") 3926 else: 3927 break 3928 3929 return options 3930 3931 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3932 if match and not self._match(TokenType.REFERENCES): 3933 return None 3934 3935 expressions = None 3936 this = self._parse_table(schema=True) 3937 options = self._parse_key_constraint_options() 3938 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3939 3940 def _parse_foreign_key(self) -> exp.ForeignKey: 3941 expressions = self._parse_wrapped_id_vars() 3942 reference = self._parse_references() 3943 options = {} 3944 3945 while self._match(TokenType.ON): 3946 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3947 self.raise_error("Expected DELETE or UPDATE") 3948 3949 kind = self._prev.text.lower() 3950 3951 if self._match_text_seq("NO", "ACTION"): 3952 action = "NO ACTION" 3953 elif self._match(TokenType.SET): 3954 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3955 action = "SET " + self._prev.text.upper() 3956 else: 3957 self._advance() 3958 action = self._prev.text.upper() 3959 3960 options[kind] = action 3961 3962 return self.expression( 3963 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3964 ) 3965 3966 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3967 return self._parse_field() 3968 3969 def _parse_primary_key( 3970 self, wrapped_optional: bool = False, in_props: bool = False 3971 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3972 desc = ( 3973 self._match_set((TokenType.ASC, TokenType.DESC)) 3974 and self._prev.token_type == TokenType.DESC 3975 ) 3976 3977 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3978 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3979 3980 expressions = self._parse_wrapped_csv( 3981 self._parse_primary_key_part, optional=wrapped_optional 3982 ) 3983 options = self._parse_key_constraint_options() 3984 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3985 3986 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3987 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3988 return this 3989 3990 bracket_kind = self._prev.token_type 3991 3992 if self._match(TokenType.COLON): 3993 expressions: t.List[exp.Expression] = [ 3994 self.expression(exp.Slice, expression=self._parse_conjunction()) 3995 ] 3996 else: 3997 expressions = self._parse_csv( 3998 lambda: self._parse_slice( 3999 self._parse_alias(self._parse_conjunction(), explicit=True) 4000 ) 4001 ) 4002 4003 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4004 if bracket_kind == TokenType.L_BRACE: 4005 this = self.expression(exp.Struct, expressions=expressions) 4006 elif not this or this.name.upper() == "ARRAY": 4007 this = self.expression(exp.Array, expressions=expressions) 4008 else: 4009 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4010 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4011 4012 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4013 self.raise_error("Expected ]") 4014 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4015 self.raise_error("Expected }") 4016 4017 self._add_comments(this) 4018 return self._parse_bracket(this) 4019 4020 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4021 if self._match(TokenType.COLON): 4022 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4023 return this 4024 4025 def _parse_case(self) -> t.Optional[exp.Expression]: 4026 ifs = [] 4027 default = None 4028 4029 comments = self._prev_comments 4030 expression = self._parse_conjunction() 4031 4032 while self._match(TokenType.WHEN): 4033 this = self._parse_conjunction() 4034 self._match(TokenType.THEN) 4035 then = self._parse_conjunction() 4036 ifs.append(self.expression(exp.If, this=this, true=then)) 4037 4038 if self._match(TokenType.ELSE): 4039 default = self._parse_conjunction() 4040 4041 if not self._match(TokenType.END): 4042 self.raise_error("Expected END after CASE", self._prev) 4043 4044 return self._parse_window( 4045 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4046 ) 4047 4048 def _parse_if(self) -> t.Optional[exp.Expression]: 4049 if self._match(TokenType.L_PAREN): 4050 args = self._parse_csv(self._parse_conjunction) 4051 this = self.validate_expression(exp.If.from_arg_list(args), args) 4052 self._match_r_paren() 4053 else: 4054 index = self._index - 1 4055 condition = self._parse_conjunction() 4056 4057 if not condition: 4058 self._retreat(index) 4059 return None 4060 4061 self._match(TokenType.THEN) 4062 true = self._parse_conjunction() 4063 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4064 self._match(TokenType.END) 4065 this = self.expression(exp.If, this=condition, true=true, false=false) 4066 4067 return self._parse_window(this) 4068 4069 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4070 if not self._match_text_seq("VALUE", "FOR"): 4071 self._retreat(self._index - 1) 4072 return None 4073 4074 return self.expression( 4075 exp.NextValueFor, 4076 this=self._parse_column(), 4077 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4078 ) 4079 4080 def _parse_extract(self) -> exp.Extract: 4081 this = self._parse_function() or self._parse_var() or self._parse_type() 4082 4083 if self._match(TokenType.FROM): 4084 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4085 4086 if not self._match(TokenType.COMMA): 4087 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4088 4089 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4090 4091 def _parse_any_value(self) -> exp.AnyValue: 4092 this = self._parse_lambda() 4093 is_max = None 4094 having = None 4095 4096 if self._match(TokenType.HAVING): 4097 self._match_texts(("MAX", "MIN")) 4098 is_max = self._prev.text == "MAX" 4099 having = self._parse_column() 4100 4101 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4102 4103 def _parse_cast(self, strict: bool) -> exp.Expression: 4104 this = self._parse_conjunction() 4105 4106 if not self._match(TokenType.ALIAS): 4107 if self._match(TokenType.COMMA): 4108 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4109 4110 self.raise_error("Expected AS after CAST") 4111 4112 fmt = None 4113 to = self._parse_types() 4114 4115 if not to: 4116 self.raise_error("Expected TYPE after CAST") 4117 elif isinstance(to, exp.Identifier): 4118 to = exp.DataType.build(to.name, udt=True) 4119 elif to.this == exp.DataType.Type.CHAR: 4120 if self._match(TokenType.CHARACTER_SET): 4121 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4122 elif self._match(TokenType.FORMAT): 4123 fmt_string = self._parse_string() 4124 fmt = self._parse_at_time_zone(fmt_string) 4125 4126 if to.this in exp.DataType.TEMPORAL_TYPES: 4127 this = self.expression( 4128 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4129 this=this, 4130 format=exp.Literal.string( 4131 format_time( 4132 fmt_string.this if fmt_string else "", 4133 self.FORMAT_MAPPING or self.TIME_MAPPING, 4134 self.FORMAT_TRIE or self.TIME_TRIE, 4135 ) 4136 ), 4137 ) 4138 4139 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4140 this.set("zone", fmt.args["zone"]) 4141 4142 return this 4143 4144 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4145 4146 def _parse_concat(self) -> t.Optional[exp.Expression]: 4147 args = self._parse_csv(self._parse_conjunction) 4148 if self.CONCAT_NULL_OUTPUTS_STRING: 4149 args = self._ensure_string_if_null(args) 4150 4151 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4152 # we find such a call we replace it with its argument. 4153 if len(args) == 1: 4154 return args[0] 4155 4156 return self.expression( 4157 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4158 ) 4159 4160 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4161 args = self._parse_csv(self._parse_conjunction) 4162 if len(args) < 2: 4163 return self.expression(exp.ConcatWs, expressions=args) 4164 delim, *values = args 4165 if self.CONCAT_NULL_OUTPUTS_STRING: 4166 values = self._ensure_string_if_null(values) 4167 4168 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4169 4170 def _parse_string_agg(self) -> exp.Expression: 4171 if self._match(TokenType.DISTINCT): 4172 args: t.List[t.Optional[exp.Expression]] = [ 4173 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4174 ] 4175 if self._match(TokenType.COMMA): 4176 args.extend(self._parse_csv(self._parse_conjunction)) 4177 else: 4178 args = self._parse_csv(self._parse_conjunction) # type: ignore 4179 4180 index = self._index 4181 if not self._match(TokenType.R_PAREN) and args: 4182 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4183 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4184 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4185 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4186 4187 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4188 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4189 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4190 if not self._match_text_seq("WITHIN", "GROUP"): 4191 self._retreat(index) 4192 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4193 4194 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4195 order = self._parse_order(this=seq_get(args, 0)) 4196 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4197 4198 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4199 this = self._parse_bitwise() 4200 4201 if self._match(TokenType.USING): 4202 to: t.Optional[exp.Expression] = self.expression( 4203 exp.CharacterSet, this=self._parse_var() 4204 ) 4205 elif self._match(TokenType.COMMA): 4206 to = self._parse_types() 4207 else: 4208 to = None 4209 4210 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4211 4212 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4213 """ 4214 There are generally two variants of the DECODE function: 4215 4216 - DECODE(bin, charset) 4217 - DECODE(expression, search, result [, search, result] ... [, default]) 4218 4219 The second variant will always be parsed into a CASE expression. Note that NULL 4220 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4221 instead of relying on pattern matching. 4222 """ 4223 args = self._parse_csv(self._parse_conjunction) 4224 4225 if len(args) < 3: 4226 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4227 4228 expression, *expressions = args 4229 if not expression: 4230 return None 4231 4232 ifs = [] 4233 for search, result in zip(expressions[::2], expressions[1::2]): 4234 if not search or not result: 4235 return None 4236 4237 if isinstance(search, exp.Literal): 4238 ifs.append( 4239 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4240 ) 4241 elif isinstance(search, exp.Null): 4242 ifs.append( 4243 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4244 ) 4245 else: 4246 cond = exp.or_( 4247 exp.EQ(this=expression.copy(), expression=search), 4248 exp.and_( 4249 exp.Is(this=expression.copy(), expression=exp.Null()), 4250 exp.Is(this=search.copy(), expression=exp.Null()), 4251 copy=False, 4252 ), 4253 copy=False, 4254 ) 4255 ifs.append(exp.If(this=cond, true=result)) 4256 4257 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4258 4259 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4260 self._match_text_seq("KEY") 4261 key = self._parse_column() 4262 self._match_set((TokenType.COLON, TokenType.COMMA)) 4263 self._match_text_seq("VALUE") 4264 value = self._parse_bitwise() 4265 4266 if not key and not value: 4267 return None 4268 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4269 4270 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4271 if not this or not self._match_text_seq("FORMAT", "JSON"): 4272 return this 4273 4274 return self.expression(exp.FormatJson, this=this) 4275 4276 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4277 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4278 for value in values: 4279 if self._match_text_seq(value, "ON", on): 4280 return f"{value} ON {on}" 4281 4282 return None 4283 4284 def _parse_json_object(self) -> exp.JSONObject: 4285 star = self._parse_star() 4286 expressions = ( 4287 [star] 4288 if star 4289 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4290 ) 4291 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4292 4293 unique_keys = None 4294 if self._match_text_seq("WITH", "UNIQUE"): 4295 unique_keys = True 4296 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4297 unique_keys = False 4298 4299 self._match_text_seq("KEYS") 4300 4301 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4302 self._parse_type() 4303 ) 4304 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4305 4306 return self.expression( 4307 exp.JSONObject, 4308 expressions=expressions, 4309 null_handling=null_handling, 4310 unique_keys=unique_keys, 4311 return_type=return_type, 4312 encoding=encoding, 4313 ) 4314 4315 def _parse_logarithm(self) -> exp.Func: 4316 # Default argument order is base, expression 4317 args = self._parse_csv(self._parse_range) 4318 4319 if len(args) > 1: 4320 if not self.LOG_BASE_FIRST: 4321 args.reverse() 4322 return exp.Log.from_arg_list(args) 4323 4324 return self.expression( 4325 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4326 ) 4327 4328 def _parse_match_against(self) -> exp.MatchAgainst: 4329 expressions = self._parse_csv(self._parse_column) 4330 4331 self._match_text_seq(")", "AGAINST", "(") 4332 4333 this = self._parse_string() 4334 4335 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4336 modifier = "IN NATURAL LANGUAGE MODE" 4337 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4338 modifier = f"{modifier} WITH QUERY EXPANSION" 4339 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4340 modifier = "IN BOOLEAN MODE" 4341 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4342 modifier = "WITH QUERY EXPANSION" 4343 else: 4344 modifier = None 4345 4346 return self.expression( 4347 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4348 ) 4349 4350 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4351 def _parse_open_json(self) -> exp.OpenJSON: 4352 this = self._parse_bitwise() 4353 path = self._match(TokenType.COMMA) and self._parse_string() 4354 4355 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4356 this = self._parse_field(any_token=True) 4357 kind = self._parse_types() 4358 path = self._parse_string() 4359 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4360 4361 return self.expression( 4362 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4363 ) 4364 4365 expressions = None 4366 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4367 self._match_l_paren() 4368 expressions = self._parse_csv(_parse_open_json_column_def) 4369 4370 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4371 4372 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4373 args = self._parse_csv(self._parse_bitwise) 4374 4375 if self._match(TokenType.IN): 4376 return self.expression( 4377 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4378 ) 4379 4380 if haystack_first: 4381 haystack = seq_get(args, 0) 4382 needle = seq_get(args, 1) 4383 else: 4384 needle = seq_get(args, 0) 4385 haystack = seq_get(args, 1) 4386 4387 return self.expression( 4388 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4389 ) 4390 4391 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4392 args = self._parse_csv(self._parse_table) 4393 return exp.JoinHint(this=func_name.upper(), expressions=args) 4394 4395 def _parse_substring(self) -> exp.Substring: 4396 # Postgres supports the form: substring(string [from int] [for int]) 4397 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4398 4399 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4400 4401 if self._match(TokenType.FROM): 4402 args.append(self._parse_bitwise()) 4403 if self._match(TokenType.FOR): 4404 args.append(self._parse_bitwise()) 4405 4406 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4407 4408 def _parse_trim(self) -> exp.Trim: 4409 # https://www.w3resource.com/sql/character-functions/trim.php 4410 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4411 4412 position = None 4413 collation = None 4414 4415 if self._match_texts(self.TRIM_TYPES): 4416 position = self._prev.text.upper() 4417 4418 expression = self._parse_bitwise() 4419 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4420 this = self._parse_bitwise() 4421 else: 4422 this = expression 4423 expression = None 4424 4425 if self._match(TokenType.COLLATE): 4426 collation = self._parse_bitwise() 4427 4428 return self.expression( 4429 exp.Trim, this=this, position=position, expression=expression, collation=collation 4430 ) 4431 4432 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4433 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4434 4435 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4436 return self._parse_window(self._parse_id_var(), alias=True) 4437 4438 def _parse_respect_or_ignore_nulls( 4439 self, this: t.Optional[exp.Expression] 4440 ) -> t.Optional[exp.Expression]: 4441 if self._match_text_seq("IGNORE", "NULLS"): 4442 return self.expression(exp.IgnoreNulls, this=this) 4443 if self._match_text_seq("RESPECT", "NULLS"): 4444 return self.expression(exp.RespectNulls, this=this) 4445 return this 4446 4447 def _parse_window( 4448 self, this: t.Optional[exp.Expression], alias: bool = False 4449 ) -> t.Optional[exp.Expression]: 4450 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4451 self._match(TokenType.WHERE) 4452 this = self.expression( 4453 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4454 ) 4455 self._match_r_paren() 4456 4457 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4458 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4459 if self._match_text_seq("WITHIN", "GROUP"): 4460 order = self._parse_wrapped(self._parse_order) 4461 this = self.expression(exp.WithinGroup, this=this, expression=order) 4462 4463 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4464 # Some dialects choose to implement and some do not. 4465 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4466 4467 # There is some code above in _parse_lambda that handles 4468 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4469 4470 # The below changes handle 4471 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4472 4473 # Oracle allows both formats 4474 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4475 # and Snowflake chose to do the same for familiarity 4476 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4477 this = self._parse_respect_or_ignore_nulls(this) 4478 4479 # bigquery select from window x AS (partition by ...) 4480 if alias: 4481 over = None 4482 self._match(TokenType.ALIAS) 4483 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4484 return this 4485 else: 4486 over = self._prev.text.upper() 4487 4488 if not self._match(TokenType.L_PAREN): 4489 return self.expression( 4490 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4491 ) 4492 4493 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4494 4495 first = self._match(TokenType.FIRST) 4496 if self._match_text_seq("LAST"): 4497 first = False 4498 4499 partition, order = self._parse_partition_and_order() 4500 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4501 4502 if kind: 4503 self._match(TokenType.BETWEEN) 4504 start = self._parse_window_spec() 4505 self._match(TokenType.AND) 4506 end = self._parse_window_spec() 4507 4508 spec = self.expression( 4509 exp.WindowSpec, 4510 kind=kind, 4511 start=start["value"], 4512 start_side=start["side"], 4513 end=end["value"], 4514 end_side=end["side"], 4515 ) 4516 else: 4517 spec = None 4518 4519 self._match_r_paren() 4520 4521 window = self.expression( 4522 exp.Window, 4523 this=this, 4524 partition_by=partition, 4525 order=order, 4526 spec=spec, 4527 alias=window_alias, 4528 over=over, 4529 first=first, 4530 ) 4531 4532 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4533 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4534 return self._parse_window(window, alias=alias) 4535 4536 return window 4537 4538 def _parse_partition_and_order( 4539 self, 4540 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4541 return self._parse_partition_by(), self._parse_order() 4542 4543 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4544 self._match(TokenType.BETWEEN) 4545 4546 return { 4547 "value": ( 4548 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4549 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4550 or self._parse_bitwise() 4551 ), 4552 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4553 } 4554 4555 def _parse_alias( 4556 self, this: t.Optional[exp.Expression], explicit: bool = False 4557 ) -> t.Optional[exp.Expression]: 4558 any_token = self._match(TokenType.ALIAS) 4559 4560 if explicit and not any_token: 4561 return this 4562 4563 if self._match(TokenType.L_PAREN): 4564 aliases = self.expression( 4565 exp.Aliases, 4566 this=this, 4567 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4568 ) 4569 self._match_r_paren(aliases) 4570 return aliases 4571 4572 alias = self._parse_id_var(any_token) 4573 4574 if alias: 4575 return self.expression(exp.Alias, this=this, alias=alias) 4576 4577 return this 4578 4579 def _parse_id_var( 4580 self, 4581 any_token: bool = True, 4582 tokens: t.Optional[t.Collection[TokenType]] = None, 4583 ) -> t.Optional[exp.Expression]: 4584 identifier = self._parse_identifier() 4585 4586 if identifier: 4587 return identifier 4588 4589 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4590 quoted = self._prev.token_type == TokenType.STRING 4591 return exp.Identifier(this=self._prev.text, quoted=quoted) 4592 4593 return None 4594 4595 def _parse_string(self) -> t.Optional[exp.Expression]: 4596 if self._match(TokenType.STRING): 4597 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4598 return self._parse_placeholder() 4599 4600 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4601 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4602 4603 def _parse_number(self) -> t.Optional[exp.Expression]: 4604 if self._match(TokenType.NUMBER): 4605 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4606 return self._parse_placeholder() 4607 4608 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4609 if self._match(TokenType.IDENTIFIER): 4610 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4611 return self._parse_placeholder() 4612 4613 def _parse_var( 4614 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4615 ) -> t.Optional[exp.Expression]: 4616 if ( 4617 (any_token and self._advance_any()) 4618 or self._match(TokenType.VAR) 4619 or (self._match_set(tokens) if tokens else False) 4620 ): 4621 return self.expression(exp.Var, this=self._prev.text) 4622 return self._parse_placeholder() 4623 4624 def _advance_any(self) -> t.Optional[Token]: 4625 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4626 self._advance() 4627 return self._prev 4628 return None 4629 4630 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4631 return self._parse_var() or self._parse_string() 4632 4633 def _parse_null(self) -> t.Optional[exp.Expression]: 4634 if self._match_set(self.NULL_TOKENS): 4635 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4636 return self._parse_placeholder() 4637 4638 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4639 if self._match(TokenType.TRUE): 4640 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4641 if self._match(TokenType.FALSE): 4642 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4643 return self._parse_placeholder() 4644 4645 def _parse_star(self) -> t.Optional[exp.Expression]: 4646 if self._match(TokenType.STAR): 4647 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4648 return self._parse_placeholder() 4649 4650 def _parse_parameter(self) -> exp.Parameter: 4651 wrapped = self._match(TokenType.L_BRACE) 4652 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4653 self._match(TokenType.R_BRACE) 4654 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4655 4656 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4657 if self._match_set(self.PLACEHOLDER_PARSERS): 4658 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4659 if placeholder: 4660 return placeholder 4661 self._advance(-1) 4662 return None 4663 4664 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4665 if not self._match(TokenType.EXCEPT): 4666 return None 4667 if self._match(TokenType.L_PAREN, advance=False): 4668 return self._parse_wrapped_csv(self._parse_column) 4669 4670 except_column = self._parse_column() 4671 return [except_column] if except_column else None 4672 4673 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4674 if not self._match(TokenType.REPLACE): 4675 return None 4676 if self._match(TokenType.L_PAREN, advance=False): 4677 return self._parse_wrapped_csv(self._parse_expression) 4678 4679 replace_expression = self._parse_expression() 4680 return [replace_expression] if replace_expression else None 4681 4682 def _parse_csv( 4683 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4684 ) -> t.List[exp.Expression]: 4685 parse_result = parse_method() 4686 items = [parse_result] if parse_result is not None else [] 4687 4688 while self._match(sep): 4689 self._add_comments(parse_result) 4690 parse_result = parse_method() 4691 if parse_result is not None: 4692 items.append(parse_result) 4693 4694 return items 4695 4696 def _parse_tokens( 4697 self, parse_method: t.Callable, expressions: t.Dict 4698 ) -> t.Optional[exp.Expression]: 4699 this = parse_method() 4700 4701 while self._match_set(expressions): 4702 this = self.expression( 4703 expressions[self._prev.token_type], 4704 this=this, 4705 comments=self._prev_comments, 4706 expression=parse_method(), 4707 ) 4708 4709 return this 4710 4711 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4712 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4713 4714 def _parse_wrapped_csv( 4715 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4716 ) -> t.List[exp.Expression]: 4717 return self._parse_wrapped( 4718 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4719 ) 4720 4721 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4722 wrapped = self._match(TokenType.L_PAREN) 4723 if not wrapped and not optional: 4724 self.raise_error("Expecting (") 4725 parse_result = parse_method() 4726 if wrapped: 4727 self._match_r_paren() 4728 return parse_result 4729 4730 def _parse_expressions(self) -> t.List[exp.Expression]: 4731 return self._parse_csv(self._parse_expression) 4732 4733 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4734 return self._parse_select() or self._parse_set_operations( 4735 self._parse_expression() if alias else self._parse_conjunction() 4736 ) 4737 4738 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4739 return self._parse_query_modifiers( 4740 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4741 ) 4742 4743 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4744 this = None 4745 if self._match_texts(self.TRANSACTION_KIND): 4746 this = self._prev.text 4747 4748 self._match_texts({"TRANSACTION", "WORK"}) 4749 4750 modes = [] 4751 while True: 4752 mode = [] 4753 while self._match(TokenType.VAR): 4754 mode.append(self._prev.text) 4755 4756 if mode: 4757 modes.append(" ".join(mode)) 4758 if not self._match(TokenType.COMMA): 4759 break 4760 4761 return self.expression(exp.Transaction, this=this, modes=modes) 4762 4763 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4764 chain = None 4765 savepoint = None 4766 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4767 4768 self._match_texts({"TRANSACTION", "WORK"}) 4769 4770 if self._match_text_seq("TO"): 4771 self._match_text_seq("SAVEPOINT") 4772 savepoint = self._parse_id_var() 4773 4774 if self._match(TokenType.AND): 4775 chain = not self._match_text_seq("NO") 4776 self._match_text_seq("CHAIN") 4777 4778 if is_rollback: 4779 return self.expression(exp.Rollback, savepoint=savepoint) 4780 4781 return self.expression(exp.Commit, chain=chain) 4782 4783 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4784 if not self._match_text_seq("ADD"): 4785 return None 4786 4787 self._match(TokenType.COLUMN) 4788 exists_column = self._parse_exists(not_=True) 4789 expression = self._parse_field_def() 4790 4791 if expression: 4792 expression.set("exists", exists_column) 4793 4794 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4795 if self._match_texts(("FIRST", "AFTER")): 4796 position = self._prev.text 4797 column_position = self.expression( 4798 exp.ColumnPosition, this=self._parse_column(), position=position 4799 ) 4800 expression.set("position", column_position) 4801 4802 return expression 4803 4804 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4805 drop = self._match(TokenType.DROP) and self._parse_drop() 4806 if drop and not isinstance(drop, exp.Command): 4807 drop.set("kind", drop.args.get("kind", "COLUMN")) 4808 return drop 4809 4810 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4811 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4812 return self.expression( 4813 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4814 ) 4815 4816 def _parse_add_constraint(self) -> exp.AddConstraint: 4817 this = None 4818 kind = self._prev.token_type 4819 4820 if kind == TokenType.CONSTRAINT: 4821 this = self._parse_id_var() 4822 4823 if self._match_text_seq("CHECK"): 4824 expression = self._parse_wrapped(self._parse_conjunction) 4825 enforced = self._match_text_seq("ENFORCED") 4826 4827 return self.expression( 4828 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4829 ) 4830 4831 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4832 expression = self._parse_foreign_key() 4833 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4834 expression = self._parse_primary_key() 4835 else: 4836 expression = None 4837 4838 return self.expression(exp.AddConstraint, this=this, expression=expression) 4839 4840 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4841 index = self._index - 1 4842 4843 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4844 return self._parse_csv(self._parse_add_constraint) 4845 4846 self._retreat(index) 4847 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4848 return self._parse_csv(self._parse_field_def) 4849 4850 return self._parse_csv(self._parse_add_column) 4851 4852 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4853 self._match(TokenType.COLUMN) 4854 column = self._parse_field(any_token=True) 4855 4856 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4857 return self.expression(exp.AlterColumn, this=column, drop=True) 4858 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4859 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4860 4861 self._match_text_seq("SET", "DATA") 4862 return self.expression( 4863 exp.AlterColumn, 4864 this=column, 4865 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4866 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4867 using=self._match(TokenType.USING) and self._parse_conjunction(), 4868 ) 4869 4870 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4871 index = self._index - 1 4872 4873 partition_exists = self._parse_exists() 4874 if self._match(TokenType.PARTITION, advance=False): 4875 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4876 4877 self._retreat(index) 4878 return self._parse_csv(self._parse_drop_column) 4879 4880 def _parse_alter_table_rename(self) -> exp.RenameTable: 4881 self._match_text_seq("TO") 4882 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4883 4884 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4885 start = self._prev 4886 4887 if not self._match(TokenType.TABLE): 4888 return self._parse_as_command(start) 4889 4890 exists = self._parse_exists() 4891 only = self._match_text_seq("ONLY") 4892 this = self._parse_table(schema=True) 4893 4894 if self._next: 4895 self._advance() 4896 4897 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4898 if parser: 4899 actions = ensure_list(parser(self)) 4900 4901 if not self._curr: 4902 return self.expression( 4903 exp.AlterTable, 4904 this=this, 4905 exists=exists, 4906 actions=actions, 4907 only=only, 4908 ) 4909 4910 return self._parse_as_command(start) 4911 4912 def _parse_merge(self) -> exp.Merge: 4913 self._match(TokenType.INTO) 4914 target = self._parse_table() 4915 4916 if target and self._match(TokenType.ALIAS, advance=False): 4917 target.set("alias", self._parse_table_alias()) 4918 4919 self._match(TokenType.USING) 4920 using = self._parse_table() 4921 4922 self._match(TokenType.ON) 4923 on = self._parse_conjunction() 4924 4925 whens = [] 4926 while self._match(TokenType.WHEN): 4927 matched = not self._match(TokenType.NOT) 4928 self._match_text_seq("MATCHED") 4929 source = ( 4930 False 4931 if self._match_text_seq("BY", "TARGET") 4932 else self._match_text_seq("BY", "SOURCE") 4933 ) 4934 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4935 4936 self._match(TokenType.THEN) 4937 4938 if self._match(TokenType.INSERT): 4939 _this = self._parse_star() 4940 if _this: 4941 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4942 else: 4943 then = self.expression( 4944 exp.Insert, 4945 this=self._parse_value(), 4946 expression=self._match(TokenType.VALUES) and self._parse_value(), 4947 ) 4948 elif self._match(TokenType.UPDATE): 4949 expressions = self._parse_star() 4950 if expressions: 4951 then = self.expression(exp.Update, expressions=expressions) 4952 else: 4953 then = self.expression( 4954 exp.Update, 4955 expressions=self._match(TokenType.SET) 4956 and self._parse_csv(self._parse_equality), 4957 ) 4958 elif self._match(TokenType.DELETE): 4959 then = self.expression(exp.Var, this=self._prev.text) 4960 else: 4961 then = None 4962 4963 whens.append( 4964 self.expression( 4965 exp.When, 4966 matched=matched, 4967 source=source, 4968 condition=condition, 4969 then=then, 4970 ) 4971 ) 4972 4973 return self.expression( 4974 exp.Merge, 4975 this=target, 4976 using=using, 4977 on=on, 4978 expressions=whens, 4979 ) 4980 4981 def _parse_show(self) -> t.Optional[exp.Expression]: 4982 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4983 if parser: 4984 return parser(self) 4985 return self._parse_as_command(self._prev) 4986 4987 def _parse_set_item_assignment( 4988 self, kind: t.Optional[str] = None 4989 ) -> t.Optional[exp.Expression]: 4990 index = self._index 4991 4992 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4993 return self._parse_set_transaction(global_=kind == "GLOBAL") 4994 4995 left = self._parse_primary() or self._parse_id_var() 4996 assignment_delimiter = self._match_texts(("=", "TO")) 4997 4998 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 4999 self._retreat(index) 5000 return None 5001 5002 right = self._parse_statement() or self._parse_id_var() 5003 this = self.expression(exp.EQ, this=left, expression=right) 5004 5005 return self.expression(exp.SetItem, this=this, kind=kind) 5006 5007 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5008 self._match_text_seq("TRANSACTION") 5009 characteristics = self._parse_csv( 5010 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5011 ) 5012 return self.expression( 5013 exp.SetItem, 5014 expressions=characteristics, 5015 kind="TRANSACTION", 5016 **{"global": global_}, # type: ignore 5017 ) 5018 5019 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5020 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5021 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5022 5023 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5024 index = self._index 5025 set_ = self.expression( 5026 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5027 ) 5028 5029 if self._curr: 5030 self._retreat(index) 5031 return self._parse_as_command(self._prev) 5032 5033 return set_ 5034 5035 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5036 for option in options: 5037 if self._match_text_seq(*option.split(" ")): 5038 return exp.var(option) 5039 return None 5040 5041 def _parse_as_command(self, start: Token) -> exp.Command: 5042 while self._curr: 5043 self._advance() 5044 text = self._find_sql(start, self._prev) 5045 size = len(start.text) 5046 return exp.Command(this=text[:size], expression=text[size:]) 5047 5048 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5049 settings = [] 5050 5051 self._match_l_paren() 5052 kind = self._parse_id_var() 5053 5054 if self._match(TokenType.L_PAREN): 5055 while True: 5056 key = self._parse_id_var() 5057 value = self._parse_primary() 5058 5059 if not key and value is None: 5060 break 5061 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5062 self._match(TokenType.R_PAREN) 5063 5064 self._match_r_paren() 5065 5066 return self.expression( 5067 exp.DictProperty, 5068 this=this, 5069 kind=kind.this if kind else None, 5070 settings=settings, 5071 ) 5072 5073 def _parse_dict_range(self, this: str) -> exp.DictRange: 5074 self._match_l_paren() 5075 has_min = self._match_text_seq("MIN") 5076 if has_min: 5077 min = self._parse_var() or self._parse_primary() 5078 self._match_text_seq("MAX") 5079 max = self._parse_var() or self._parse_primary() 5080 else: 5081 max = self._parse_var() or self._parse_primary() 5082 min = exp.Literal.number(0) 5083 self._match_r_paren() 5084 return self.expression(exp.DictRange, this=this, min=min, max=max) 5085 5086 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5087 index = self._index 5088 expression = self._parse_column() 5089 if not self._match(TokenType.IN): 5090 self._retreat(index - 1) 5091 return None 5092 iterator = self._parse_column() 5093 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5094 return self.expression( 5095 exp.Comprehension, 5096 this=this, 5097 expression=expression, 5098 iterator=iterator, 5099 condition=condition, 5100 ) 5101 5102 def _find_parser( 5103 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5104 ) -> t.Optional[t.Callable]: 5105 if not self._curr: 5106 return None 5107 5108 index = self._index 5109 this = [] 5110 while True: 5111 # The current token might be multiple words 5112 curr = self._curr.text.upper() 5113 key = curr.split(" ") 5114 this.append(curr) 5115 5116 self._advance() 5117 result, trie = in_trie(trie, key) 5118 if result == TrieResult.FAILED: 5119 break 5120 5121 if result == TrieResult.EXISTS: 5122 subparser = parsers[" ".join(this)] 5123 return subparser 5124 5125 self._retreat(index) 5126 return None 5127 5128 def _match(self, token_type, advance=True, expression=None): 5129 if not self._curr: 5130 return None 5131 5132 if self._curr.token_type == token_type: 5133 if advance: 5134 self._advance() 5135 self._add_comments(expression) 5136 return True 5137 5138 return None 5139 5140 def _match_set(self, types, advance=True): 5141 if not self._curr: 5142 return None 5143 5144 if self._curr.token_type in types: 5145 if advance: 5146 self._advance() 5147 return True 5148 5149 return None 5150 5151 def _match_pair(self, token_type_a, token_type_b, advance=True): 5152 if not self._curr or not self._next: 5153 return None 5154 5155 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5156 if advance: 5157 self._advance(2) 5158 return True 5159 5160 return None 5161 5162 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5163 if not self._match(TokenType.L_PAREN, expression=expression): 5164 self.raise_error("Expecting (") 5165 5166 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5167 if not self._match(TokenType.R_PAREN, expression=expression): 5168 self.raise_error("Expecting )") 5169 5170 def _match_texts(self, texts, advance=True): 5171 if self._curr and self._curr.text.upper() in texts: 5172 if advance: 5173 self._advance() 5174 return True 5175 return False 5176 5177 def _match_text_seq(self, *texts, advance=True): 5178 index = self._index 5179 for text in texts: 5180 if self._curr and self._curr.text.upper() == text: 5181 self._advance() 5182 else: 5183 self._retreat(index) 5184 return False 5185 5186 if not advance: 5187 self._retreat(index) 5188 5189 return True 5190 5191 @t.overload 5192 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5193 ... 5194 5195 @t.overload 5196 def _replace_columns_with_dots( 5197 self, this: t.Optional[exp.Expression] 5198 ) -> t.Optional[exp.Expression]: 5199 ... 5200 5201 def _replace_columns_with_dots(self, this): 5202 if isinstance(this, exp.Dot): 5203 exp.replace_children(this, self._replace_columns_with_dots) 5204 elif isinstance(this, exp.Column): 5205 exp.replace_children(this, self._replace_columns_with_dots) 5206 table = this.args.get("table") 5207 this = ( 5208 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5209 ) 5210 5211 return this 5212 5213 def _replace_lambda( 5214 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5215 ) -> t.Optional[exp.Expression]: 5216 if not node: 5217 return node 5218 5219 for column in node.find_all(exp.Column): 5220 if column.parts[0].name in lambda_variables: 5221 dot_or_id = column.to_dot() if column.table else column.this 5222 parent = column.parent 5223 5224 while isinstance(parent, exp.Dot): 5225 if not isinstance(parent.parent, exp.Dot): 5226 parent.replace(dot_or_id) 5227 break 5228 parent = parent.parent 5229 else: 5230 if column is node: 5231 node = dot_or_id 5232 else: 5233 column.replace(dot_or_id) 5234 return node 5235 5236 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5237 return [ 5238 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5239 for value in values 5240 if value 5241 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.UDECIMAL, 183 TokenType.BIGDECIMAL, 184 TokenType.UUID, 185 TokenType.GEOGRAPHY, 186 TokenType.GEOMETRY, 187 TokenType.HLLSKETCH, 188 TokenType.HSTORE, 189 TokenType.PSEUDO_TYPE, 190 TokenType.SUPER, 191 TokenType.SERIAL, 192 TokenType.SMALLSERIAL, 193 TokenType.BIGSERIAL, 194 TokenType.XML, 195 TokenType.YEAR, 196 TokenType.UNIQUEIDENTIFIER, 197 TokenType.USERDEFINED, 198 TokenType.MONEY, 199 TokenType.SMALLMONEY, 200 TokenType.ROWVERSION, 201 TokenType.IMAGE, 202 TokenType.VARIANT, 203 TokenType.OBJECT, 204 TokenType.OBJECT_IDENTIFIER, 205 TokenType.INET, 206 TokenType.IPADDRESS, 207 TokenType.IPPREFIX, 208 TokenType.UNKNOWN, 209 TokenType.NULL, 210 *ENUM_TYPE_TOKENS, 211 *NESTED_TYPE_TOKENS, 212 } 213 214 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 215 TokenType.BIGINT: TokenType.UBIGINT, 216 TokenType.INT: TokenType.UINT, 217 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 218 TokenType.SMALLINT: TokenType.USMALLINT, 219 TokenType.TINYINT: TokenType.UTINYINT, 220 TokenType.DECIMAL: TokenType.UDECIMAL, 221 } 222 223 SUBQUERY_PREDICATES = { 224 TokenType.ANY: exp.Any, 225 TokenType.ALL: exp.All, 226 TokenType.EXISTS: exp.Exists, 227 TokenType.SOME: exp.Any, 228 } 229 230 RESERVED_KEYWORDS = { 231 *Tokenizer.SINGLE_TOKENS.values(), 232 TokenType.SELECT, 233 } 234 235 DB_CREATABLES = { 236 TokenType.DATABASE, 237 TokenType.SCHEMA, 238 TokenType.TABLE, 239 TokenType.VIEW, 240 TokenType.DICTIONARY, 241 } 242 243 CREATABLES = { 244 TokenType.COLUMN, 245 TokenType.FUNCTION, 246 TokenType.INDEX, 247 TokenType.PROCEDURE, 248 *DB_CREATABLES, 249 } 250 251 # Tokens that can represent identifiers 252 ID_VAR_TOKENS = { 253 TokenType.VAR, 254 TokenType.ANTI, 255 TokenType.APPLY, 256 TokenType.ASC, 257 TokenType.AUTO_INCREMENT, 258 TokenType.BEGIN, 259 TokenType.CACHE, 260 TokenType.CASE, 261 TokenType.COLLATE, 262 TokenType.COMMAND, 263 TokenType.COMMENT, 264 TokenType.COMMIT, 265 TokenType.CONSTRAINT, 266 TokenType.DEFAULT, 267 TokenType.DELETE, 268 TokenType.DESC, 269 TokenType.DESCRIBE, 270 TokenType.DICTIONARY, 271 TokenType.DIV, 272 TokenType.END, 273 TokenType.EXECUTE, 274 TokenType.ESCAPE, 275 TokenType.FALSE, 276 TokenType.FIRST, 277 TokenType.FILTER, 278 TokenType.FORMAT, 279 TokenType.FULL, 280 TokenType.IS, 281 TokenType.ISNULL, 282 TokenType.INTERVAL, 283 TokenType.KEEP, 284 TokenType.KILL, 285 TokenType.LEFT, 286 TokenType.LOAD, 287 TokenType.MERGE, 288 TokenType.NATURAL, 289 TokenType.NEXT, 290 TokenType.OFFSET, 291 TokenType.ORDINALITY, 292 TokenType.OVERLAPS, 293 TokenType.OVERWRITE, 294 TokenType.PARTITION, 295 TokenType.PERCENT, 296 TokenType.PIVOT, 297 TokenType.PRAGMA, 298 TokenType.RANGE, 299 TokenType.REFERENCES, 300 TokenType.RIGHT, 301 TokenType.ROW, 302 TokenType.ROWS, 303 TokenType.SEMI, 304 TokenType.SET, 305 TokenType.SETTINGS, 306 TokenType.SHOW, 307 TokenType.TEMPORARY, 308 TokenType.TOP, 309 TokenType.TRUE, 310 TokenType.UNIQUE, 311 TokenType.UNPIVOT, 312 TokenType.UPDATE, 313 TokenType.VOLATILE, 314 TokenType.WINDOW, 315 *CREATABLES, 316 *SUBQUERY_PREDICATES, 317 *TYPE_TOKENS, 318 *NO_PAREN_FUNCTIONS, 319 } 320 321 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 322 323 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 324 TokenType.ANTI, 325 TokenType.APPLY, 326 TokenType.ASOF, 327 TokenType.FULL, 328 TokenType.LEFT, 329 TokenType.LOCK, 330 TokenType.NATURAL, 331 TokenType.OFFSET, 332 TokenType.RIGHT, 333 TokenType.SEMI, 334 TokenType.WINDOW, 335 } 336 337 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 338 339 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 340 341 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 342 343 FUNC_TOKENS = { 344 TokenType.COLLATE, 345 TokenType.COMMAND, 346 TokenType.CURRENT_DATE, 347 TokenType.CURRENT_DATETIME, 348 TokenType.CURRENT_TIMESTAMP, 349 TokenType.CURRENT_TIME, 350 TokenType.CURRENT_USER, 351 TokenType.FILTER, 352 TokenType.FIRST, 353 TokenType.FORMAT, 354 TokenType.GLOB, 355 TokenType.IDENTIFIER, 356 TokenType.INDEX, 357 TokenType.ISNULL, 358 TokenType.ILIKE, 359 TokenType.INSERT, 360 TokenType.LIKE, 361 TokenType.MERGE, 362 TokenType.OFFSET, 363 TokenType.PRIMARY_KEY, 364 TokenType.RANGE, 365 TokenType.REPLACE, 366 TokenType.RLIKE, 367 TokenType.ROW, 368 TokenType.UNNEST, 369 TokenType.VAR, 370 TokenType.LEFT, 371 TokenType.RIGHT, 372 TokenType.DATE, 373 TokenType.DATETIME, 374 TokenType.TABLE, 375 TokenType.TIMESTAMP, 376 TokenType.TIMESTAMPTZ, 377 TokenType.WINDOW, 378 TokenType.XOR, 379 *TYPE_TOKENS, 380 *SUBQUERY_PREDICATES, 381 } 382 383 CONJUNCTION = { 384 TokenType.AND: exp.And, 385 TokenType.OR: exp.Or, 386 } 387 388 EQUALITY = { 389 TokenType.EQ: exp.EQ, 390 TokenType.NEQ: exp.NEQ, 391 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 392 } 393 394 COMPARISON = { 395 TokenType.GT: exp.GT, 396 TokenType.GTE: exp.GTE, 397 TokenType.LT: exp.LT, 398 TokenType.LTE: exp.LTE, 399 } 400 401 BITWISE = { 402 TokenType.AMP: exp.BitwiseAnd, 403 TokenType.CARET: exp.BitwiseXor, 404 TokenType.PIPE: exp.BitwiseOr, 405 TokenType.DPIPE: exp.DPipe, 406 } 407 408 TERM = { 409 TokenType.DASH: exp.Sub, 410 TokenType.PLUS: exp.Add, 411 TokenType.MOD: exp.Mod, 412 TokenType.COLLATE: exp.Collate, 413 } 414 415 FACTOR = { 416 TokenType.DIV: exp.IntDiv, 417 TokenType.LR_ARROW: exp.Distance, 418 TokenType.SLASH: exp.Div, 419 TokenType.STAR: exp.Mul, 420 } 421 422 TIMES = { 423 TokenType.TIME, 424 TokenType.TIMETZ, 425 } 426 427 TIMESTAMPS = { 428 TokenType.TIMESTAMP, 429 TokenType.TIMESTAMPTZ, 430 TokenType.TIMESTAMPLTZ, 431 *TIMES, 432 } 433 434 SET_OPERATIONS = { 435 TokenType.UNION, 436 TokenType.INTERSECT, 437 TokenType.EXCEPT, 438 } 439 440 JOIN_METHODS = { 441 TokenType.NATURAL, 442 TokenType.ASOF, 443 } 444 445 JOIN_SIDES = { 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.FULL, 449 } 450 451 JOIN_KINDS = { 452 TokenType.INNER, 453 TokenType.OUTER, 454 TokenType.CROSS, 455 TokenType.SEMI, 456 TokenType.ANTI, 457 } 458 459 JOIN_HINTS: t.Set[str] = set() 460 461 LAMBDAS = { 462 TokenType.ARROW: lambda self, expressions: self.expression( 463 exp.Lambda, 464 this=self._replace_lambda( 465 self._parse_conjunction(), 466 {node.name for node in expressions}, 467 ), 468 expressions=expressions, 469 ), 470 TokenType.FARROW: lambda self, expressions: self.expression( 471 exp.Kwarg, 472 this=exp.var(expressions[0].name), 473 expression=self._parse_conjunction(), 474 ), 475 } 476 477 COLUMN_OPERATORS = { 478 TokenType.DOT: None, 479 TokenType.DCOLON: lambda self, this, to: self.expression( 480 exp.Cast if self.STRICT_CAST else exp.TryCast, 481 this=this, 482 to=to, 483 ), 484 TokenType.ARROW: lambda self, this, path: self.expression( 485 exp.JSONExtract, 486 this=this, 487 expression=path, 488 ), 489 TokenType.DARROW: lambda self, this, path: self.expression( 490 exp.JSONExtractScalar, 491 this=this, 492 expression=path, 493 ), 494 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 495 exp.JSONBExtract, 496 this=this, 497 expression=path, 498 ), 499 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 500 exp.JSONBExtractScalar, 501 this=this, 502 expression=path, 503 ), 504 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 505 exp.JSONBContains, 506 this=this, 507 expression=key, 508 ), 509 } 510 511 EXPRESSION_PARSERS = { 512 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 513 exp.Column: lambda self: self._parse_column(), 514 exp.Condition: lambda self: self._parse_conjunction(), 515 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 516 exp.Expression: lambda self: self._parse_statement(), 517 exp.From: lambda self: self._parse_from(), 518 exp.Group: lambda self: self._parse_group(), 519 exp.Having: lambda self: self._parse_having(), 520 exp.Identifier: lambda self: self._parse_id_var(), 521 exp.Join: lambda self: self._parse_join(), 522 exp.Lambda: lambda self: self._parse_lambda(), 523 exp.Lateral: lambda self: self._parse_lateral(), 524 exp.Limit: lambda self: self._parse_limit(), 525 exp.Offset: lambda self: self._parse_offset(), 526 exp.Order: lambda self: self._parse_order(), 527 exp.Ordered: lambda self: self._parse_ordered(), 528 exp.Properties: lambda self: self._parse_properties(), 529 exp.Qualify: lambda self: self._parse_qualify(), 530 exp.Returning: lambda self: self._parse_returning(), 531 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 532 exp.Table: lambda self: self._parse_table_parts(), 533 exp.TableAlias: lambda self: self._parse_table_alias(), 534 exp.Where: lambda self: self._parse_where(), 535 exp.Window: lambda self: self._parse_named_window(), 536 exp.With: lambda self: self._parse_with(), 537 "JOIN_TYPE": lambda self: self._parse_join_parts(), 538 } 539 540 STATEMENT_PARSERS = { 541 TokenType.ALTER: lambda self: self._parse_alter(), 542 TokenType.BEGIN: lambda self: self._parse_transaction(), 543 TokenType.CACHE: lambda self: self._parse_cache(), 544 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 545 TokenType.COMMENT: lambda self: self._parse_comment(), 546 TokenType.CREATE: lambda self: self._parse_create(), 547 TokenType.DELETE: lambda self: self._parse_delete(), 548 TokenType.DESC: lambda self: self._parse_describe(), 549 TokenType.DESCRIBE: lambda self: self._parse_describe(), 550 TokenType.DROP: lambda self: self._parse_drop(), 551 TokenType.INSERT: lambda self: self._parse_insert(), 552 TokenType.KILL: lambda self: self._parse_kill(), 553 TokenType.LOAD: lambda self: self._parse_load(), 554 TokenType.MERGE: lambda self: self._parse_merge(), 555 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 556 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 557 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 558 TokenType.SET: lambda self: self._parse_set(), 559 TokenType.UNCACHE: lambda self: self._parse_uncache(), 560 TokenType.UPDATE: lambda self: self._parse_update(), 561 TokenType.USE: lambda self: self.expression( 562 exp.Use, 563 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 564 and exp.var(self._prev.text), 565 this=self._parse_table(schema=False), 566 ), 567 } 568 569 UNARY_PARSERS = { 570 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 571 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 572 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 573 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 574 } 575 576 PRIMARY_PARSERS = { 577 TokenType.STRING: lambda self, token: self.expression( 578 exp.Literal, this=token.text, is_string=True 579 ), 580 TokenType.NUMBER: lambda self, token: self.expression( 581 exp.Literal, this=token.text, is_string=False 582 ), 583 TokenType.STAR: lambda self, _: self.expression( 584 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 585 ), 586 TokenType.NULL: lambda self, _: self.expression(exp.Null), 587 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 588 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 589 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 590 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 591 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 592 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 593 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 594 exp.National, this=token.text 595 ), 596 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 597 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 598 exp.RawString, this=token.text 599 ), 600 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 601 } 602 603 PLACEHOLDER_PARSERS = { 604 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 605 TokenType.PARAMETER: lambda self: self._parse_parameter(), 606 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 607 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 608 else None, 609 } 610 611 RANGE_PARSERS = { 612 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 613 TokenType.GLOB: binary_range_parser(exp.Glob), 614 TokenType.ILIKE: binary_range_parser(exp.ILike), 615 TokenType.IN: lambda self, this: self._parse_in(this), 616 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 617 TokenType.IS: lambda self, this: self._parse_is(this), 618 TokenType.LIKE: binary_range_parser(exp.Like), 619 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 620 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 621 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 622 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 623 } 624 625 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 626 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 627 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 628 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 629 "CHARACTER SET": lambda self: self._parse_character_set(), 630 "CHECKSUM": lambda self: self._parse_checksum(), 631 "CLUSTER BY": lambda self: self._parse_cluster(), 632 "CLUSTERED": lambda self: self._parse_clustered_by(), 633 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 634 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 635 "COPY": lambda self: self._parse_copy_property(), 636 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 637 "DEFINER": lambda self: self._parse_definer(), 638 "DETERMINISTIC": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 640 ), 641 "DISTKEY": lambda self: self._parse_distkey(), 642 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 643 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 644 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 645 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 646 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 647 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "FREESPACE": lambda self: self._parse_freespace(), 649 "HEAP": lambda self: self.expression(exp.HeapProperty), 650 "IMMUTABLE": lambda self: self.expression( 651 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 652 ), 653 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 654 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 655 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 656 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 657 "LIKE": lambda self: self._parse_create_like(), 658 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 659 "LOCK": lambda self: self._parse_locking(), 660 "LOCKING": lambda self: self._parse_locking(), 661 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 662 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 663 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 664 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 665 "NO": lambda self: self._parse_no_property(), 666 "ON": lambda self: self._parse_on_property(), 667 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 668 "PARTITION BY": lambda self: self._parse_partitioned_by(), 669 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 670 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 671 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 672 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 673 "RETURNS": lambda self: self._parse_returns(), 674 "ROW": lambda self: self._parse_row(), 675 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 676 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 677 "SETTINGS": lambda self: self.expression( 678 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 679 ), 680 "SORTKEY": lambda self: self._parse_sortkey(), 681 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 682 "STABLE": lambda self: self.expression( 683 exp.StabilityProperty, this=exp.Literal.string("STABLE") 684 ), 685 "STORED": lambda self: self._parse_stored(), 686 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 687 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 688 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 689 "TO": lambda self: self._parse_to_table(), 690 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 691 "TTL": lambda self: self._parse_ttl(), 692 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 693 "VOLATILE": lambda self: self._parse_volatile_property(), 694 "WITH": lambda self: self._parse_with_property(), 695 } 696 697 CONSTRAINT_PARSERS = { 698 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 699 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 700 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 701 "CHARACTER SET": lambda self: self.expression( 702 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 703 ), 704 "CHECK": lambda self: self.expression( 705 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 706 ), 707 "COLLATE": lambda self: self.expression( 708 exp.CollateColumnConstraint, this=self._parse_var() 709 ), 710 "COMMENT": lambda self: self.expression( 711 exp.CommentColumnConstraint, this=self._parse_string() 712 ), 713 "COMPRESS": lambda self: self._parse_compress(), 714 "CLUSTERED": lambda self: self.expression( 715 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 716 ), 717 "NONCLUSTERED": lambda self: self.expression( 718 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 719 ), 720 "DEFAULT": lambda self: self.expression( 721 exp.DefaultColumnConstraint, this=self._parse_bitwise() 722 ), 723 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 724 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 725 "FORMAT": lambda self: self.expression( 726 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 727 ), 728 "GENERATED": lambda self: self._parse_generated_as_identity(), 729 "IDENTITY": lambda self: self._parse_auto_increment(), 730 "INLINE": lambda self: self._parse_inline(), 731 "LIKE": lambda self: self._parse_create_like(), 732 "NOT": lambda self: self._parse_not_constraint(), 733 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 734 "ON": lambda self: ( 735 self._match(TokenType.UPDATE) 736 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 737 ) 738 or self.expression(exp.OnProperty, this=self._parse_id_var()), 739 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 740 "PRIMARY KEY": lambda self: self._parse_primary_key(), 741 "REFERENCES": lambda self: self._parse_references(match=False), 742 "TITLE": lambda self: self.expression( 743 exp.TitleColumnConstraint, this=self._parse_var_or_string() 744 ), 745 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 746 "UNIQUE": lambda self: self._parse_unique(), 747 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 748 "WITH": lambda self: self.expression( 749 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 750 ), 751 } 752 753 ALTER_PARSERS = { 754 "ADD": lambda self: self._parse_alter_table_add(), 755 "ALTER": lambda self: self._parse_alter_table_alter(), 756 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 757 "DROP": lambda self: self._parse_alter_table_drop(), 758 "RENAME": lambda self: self._parse_alter_table_rename(), 759 } 760 761 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 762 763 NO_PAREN_FUNCTION_PARSERS = { 764 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 765 "CASE": lambda self: self._parse_case(), 766 "IF": lambda self: self._parse_if(), 767 "NEXT": lambda self: self._parse_next_value_for(), 768 } 769 770 INVALID_FUNC_NAME_TOKENS = { 771 TokenType.IDENTIFIER, 772 TokenType.STRING, 773 } 774 775 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 776 777 FUNCTION_PARSERS = { 778 "ANY_VALUE": lambda self: self._parse_any_value(), 779 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 780 "CONCAT": lambda self: self._parse_concat(), 781 "CONCAT_WS": lambda self: self._parse_concat_ws(), 782 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 783 "DECODE": lambda self: self._parse_decode(), 784 "EXTRACT": lambda self: self._parse_extract(), 785 "JSON_OBJECT": lambda self: self._parse_json_object(), 786 "LOG": lambda self: self._parse_logarithm(), 787 "MATCH": lambda self: self._parse_match_against(), 788 "OPENJSON": lambda self: self._parse_open_json(), 789 "POSITION": lambda self: self._parse_position(), 790 "SAFE_CAST": lambda self: self._parse_cast(False), 791 "STRING_AGG": lambda self: self._parse_string_agg(), 792 "SUBSTRING": lambda self: self._parse_substring(), 793 "TRIM": lambda self: self._parse_trim(), 794 "TRY_CAST": lambda self: self._parse_cast(False), 795 "TRY_CONVERT": lambda self: self._parse_convert(False), 796 } 797 798 QUERY_MODIFIER_PARSERS = { 799 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 800 TokenType.WHERE: lambda self: ("where", self._parse_where()), 801 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 802 TokenType.HAVING: lambda self: ("having", self._parse_having()), 803 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 804 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 805 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 806 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 807 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 808 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 809 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 810 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 811 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 812 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 813 TokenType.CLUSTER_BY: lambda self: ( 814 "cluster", 815 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 816 ), 817 TokenType.DISTRIBUTE_BY: lambda self: ( 818 "distribute", 819 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 820 ), 821 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 822 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 823 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 824 } 825 826 SET_PARSERS = { 827 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 828 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 829 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 830 "TRANSACTION": lambda self: self._parse_set_transaction(), 831 } 832 833 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 834 835 TYPE_LITERAL_PARSERS = { 836 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 837 } 838 839 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 840 841 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 842 843 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 844 845 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 846 TRANSACTION_CHARACTERISTICS = { 847 "ISOLATION LEVEL REPEATABLE READ", 848 "ISOLATION LEVEL READ COMMITTED", 849 "ISOLATION LEVEL READ UNCOMMITTED", 850 "ISOLATION LEVEL SERIALIZABLE", 851 "READ WRITE", 852 "READ ONLY", 853 } 854 855 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 856 857 CLONE_KEYWORDS = {"CLONE", "COPY"} 858 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 859 860 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 861 862 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 863 864 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 865 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 866 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 867 868 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 869 870 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 871 872 DISTINCT_TOKENS = {TokenType.DISTINCT} 873 874 NULL_TOKENS = {TokenType.NULL} 875 876 STRICT_CAST = True 877 878 # A NULL arg in CONCAT yields NULL by default 879 CONCAT_NULL_OUTPUTS_STRING = False 880 881 PREFIXED_PIVOT_COLUMNS = False 882 IDENTIFY_PIVOT_STRINGS = False 883 884 LOG_BASE_FIRST = True 885 LOG_DEFAULTS_TO_LN = False 886 887 # Whether or not ADD is present for each column added by ALTER TABLE 888 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 889 890 # Whether or not the table sample clause expects CSV syntax 891 TABLESAMPLE_CSV = False 892 893 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments. 894 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 895 896 __slots__ = ( 897 "error_level", 898 "error_message_context", 899 "max_errors", 900 "sql", 901 "errors", 902 "_tokens", 903 "_index", 904 "_curr", 905 "_next", 906 "_prev", 907 "_prev_comments", 908 "_tokenizer", 909 ) 910 911 # Autofilled 912 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 913 INDEX_OFFSET: int = 0 914 UNNEST_COLUMN_ONLY: bool = False 915 ALIAS_POST_TABLESAMPLE: bool = False 916 STRICT_STRING_CONCAT = False 917 SUPPORTS_USER_DEFINED_TYPES = True 918 NORMALIZE_FUNCTIONS = "upper" 919 NULL_ORDERING: str = "nulls_are_small" 920 SHOW_TRIE: t.Dict = {} 921 SET_TRIE: t.Dict = {} 922 FORMAT_MAPPING: t.Dict[str, str] = {} 923 FORMAT_TRIE: t.Dict = {} 924 TIME_MAPPING: t.Dict[str, str] = {} 925 TIME_TRIE: t.Dict = {} 926 927 def __init__( 928 self, 929 error_level: t.Optional[ErrorLevel] = None, 930 error_message_context: int = 100, 931 max_errors: int = 3, 932 ): 933 self.error_level = error_level or ErrorLevel.IMMEDIATE 934 self.error_message_context = error_message_context 935 self.max_errors = max_errors 936 self._tokenizer = self.TOKENIZER_CLASS() 937 self.reset() 938 939 def reset(self): 940 self.sql = "" 941 self.errors = [] 942 self._tokens = [] 943 self._index = 0 944 self._curr = None 945 self._next = None 946 self._prev = None 947 self._prev_comments = None 948 949 def parse( 950 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 951 ) -> t.List[t.Optional[exp.Expression]]: 952 """ 953 Parses a list of tokens and returns a list of syntax trees, one tree 954 per parsed SQL statement. 955 956 Args: 957 raw_tokens: The list of tokens. 958 sql: The original SQL string, used to produce helpful debug messages. 959 960 Returns: 961 The list of the produced syntax trees. 962 """ 963 return self._parse( 964 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 965 ) 966 967 def parse_into( 968 self, 969 expression_types: exp.IntoType, 970 raw_tokens: t.List[Token], 971 sql: t.Optional[str] = None, 972 ) -> t.List[t.Optional[exp.Expression]]: 973 """ 974 Parses a list of tokens into a given Expression type. If a collection of Expression 975 types is given instead, this method will try to parse the token list into each one 976 of them, stopping at the first for which the parsing succeeds. 977 978 Args: 979 expression_types: The expression type(s) to try and parse the token list into. 980 raw_tokens: The list of tokens. 981 sql: The original SQL string, used to produce helpful debug messages. 982 983 Returns: 984 The target Expression. 985 """ 986 errors = [] 987 for expression_type in ensure_list(expression_types): 988 parser = self.EXPRESSION_PARSERS.get(expression_type) 989 if not parser: 990 raise TypeError(f"No parser registered for {expression_type}") 991 992 try: 993 return self._parse(parser, raw_tokens, sql) 994 except ParseError as e: 995 e.errors[0]["into_expression"] = expression_type 996 errors.append(e) 997 998 raise ParseError( 999 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1000 errors=merge_errors(errors), 1001 ) from errors[-1] 1002 1003 def _parse( 1004 self, 1005 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1006 raw_tokens: t.List[Token], 1007 sql: t.Optional[str] = None, 1008 ) -> t.List[t.Optional[exp.Expression]]: 1009 self.reset() 1010 self.sql = sql or "" 1011 1012 total = len(raw_tokens) 1013 chunks: t.List[t.List[Token]] = [[]] 1014 1015 for i, token in enumerate(raw_tokens): 1016 if token.token_type == TokenType.SEMICOLON: 1017 if i < total - 1: 1018 chunks.append([]) 1019 else: 1020 chunks[-1].append(token) 1021 1022 expressions = [] 1023 1024 for tokens in chunks: 1025 self._index = -1 1026 self._tokens = tokens 1027 self._advance() 1028 1029 expressions.append(parse_method(self)) 1030 1031 if self._index < len(self._tokens): 1032 self.raise_error("Invalid expression / Unexpected token") 1033 1034 self.check_errors() 1035 1036 return expressions 1037 1038 def check_errors(self) -> None: 1039 """Logs or raises any found errors, depending on the chosen error level setting.""" 1040 if self.error_level == ErrorLevel.WARN: 1041 for error in self.errors: 1042 logger.error(str(error)) 1043 elif self.error_level == ErrorLevel.RAISE and self.errors: 1044 raise ParseError( 1045 concat_messages(self.errors, self.max_errors), 1046 errors=merge_errors(self.errors), 1047 ) 1048 1049 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1050 """ 1051 Appends an error in the list of recorded errors or raises it, depending on the chosen 1052 error level setting. 1053 """ 1054 token = token or self._curr or self._prev or Token.string("") 1055 start = token.start 1056 end = token.end + 1 1057 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1058 highlight = self.sql[start:end] 1059 end_context = self.sql[end : end + self.error_message_context] 1060 1061 error = ParseError.new( 1062 f"{message}. Line {token.line}, Col: {token.col}.\n" 1063 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1064 description=message, 1065 line=token.line, 1066 col=token.col, 1067 start_context=start_context, 1068 highlight=highlight, 1069 end_context=end_context, 1070 ) 1071 1072 if self.error_level == ErrorLevel.IMMEDIATE: 1073 raise error 1074 1075 self.errors.append(error) 1076 1077 def expression( 1078 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1079 ) -> E: 1080 """ 1081 Creates a new, validated Expression. 1082 1083 Args: 1084 exp_class: The expression class to instantiate. 1085 comments: An optional list of comments to attach to the expression. 1086 kwargs: The arguments to set for the expression along with their respective values. 1087 1088 Returns: 1089 The target expression. 1090 """ 1091 instance = exp_class(**kwargs) 1092 instance.add_comments(comments) if comments else self._add_comments(instance) 1093 return self.validate_expression(instance) 1094 1095 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1096 if expression and self._prev_comments: 1097 expression.add_comments(self._prev_comments) 1098 self._prev_comments = None 1099 1100 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1101 """ 1102 Validates an Expression, making sure that all its mandatory arguments are set. 1103 1104 Args: 1105 expression: The expression to validate. 1106 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1107 1108 Returns: 1109 The validated expression. 1110 """ 1111 if self.error_level != ErrorLevel.IGNORE: 1112 for error_message in expression.error_messages(args): 1113 self.raise_error(error_message) 1114 1115 return expression 1116 1117 def _find_sql(self, start: Token, end: Token) -> str: 1118 return self.sql[start.start : end.end + 1] 1119 1120 def _advance(self, times: int = 1) -> None: 1121 self._index += times 1122 self._curr = seq_get(self._tokens, self._index) 1123 self._next = seq_get(self._tokens, self._index + 1) 1124 1125 if self._index > 0: 1126 self._prev = self._tokens[self._index - 1] 1127 self._prev_comments = self._prev.comments 1128 else: 1129 self._prev = None 1130 self._prev_comments = None 1131 1132 def _retreat(self, index: int) -> None: 1133 if index != self._index: 1134 self._advance(index - self._index) 1135 1136 def _parse_command(self) -> exp.Command: 1137 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1138 1139 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1140 start = self._prev 1141 exists = self._parse_exists() if allow_exists else None 1142 1143 self._match(TokenType.ON) 1144 1145 kind = self._match_set(self.CREATABLES) and self._prev 1146 if not kind: 1147 return self._parse_as_command(start) 1148 1149 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1150 this = self._parse_user_defined_function(kind=kind.token_type) 1151 elif kind.token_type == TokenType.TABLE: 1152 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1153 elif kind.token_type == TokenType.COLUMN: 1154 this = self._parse_column() 1155 else: 1156 this = self._parse_id_var() 1157 1158 self._match(TokenType.IS) 1159 1160 return self.expression( 1161 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1162 ) 1163 1164 def _parse_to_table( 1165 self, 1166 ) -> exp.ToTableProperty: 1167 table = self._parse_table_parts(schema=True) 1168 return self.expression(exp.ToTableProperty, this=table) 1169 1170 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1171 def _parse_ttl(self) -> exp.Expression: 1172 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1173 this = self._parse_bitwise() 1174 1175 if self._match_text_seq("DELETE"): 1176 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1177 if self._match_text_seq("RECOMPRESS"): 1178 return self.expression( 1179 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1180 ) 1181 if self._match_text_seq("TO", "DISK"): 1182 return self.expression( 1183 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1184 ) 1185 if self._match_text_seq("TO", "VOLUME"): 1186 return self.expression( 1187 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1188 ) 1189 1190 return this 1191 1192 expressions = self._parse_csv(_parse_ttl_action) 1193 where = self._parse_where() 1194 group = self._parse_group() 1195 1196 aggregates = None 1197 if group and self._match(TokenType.SET): 1198 aggregates = self._parse_csv(self._parse_set_item) 1199 1200 return self.expression( 1201 exp.MergeTreeTTL, 1202 expressions=expressions, 1203 where=where, 1204 group=group, 1205 aggregates=aggregates, 1206 ) 1207 1208 def _parse_statement(self) -> t.Optional[exp.Expression]: 1209 if self._curr is None: 1210 return None 1211 1212 if self._match_set(self.STATEMENT_PARSERS): 1213 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1214 1215 if self._match_set(Tokenizer.COMMANDS): 1216 return self._parse_command() 1217 1218 expression = self._parse_expression() 1219 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1220 return self._parse_query_modifiers(expression) 1221 1222 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1223 start = self._prev 1224 temporary = self._match(TokenType.TEMPORARY) 1225 materialized = self._match_text_seq("MATERIALIZED") 1226 1227 kind = self._match_set(self.CREATABLES) and self._prev.text 1228 if not kind: 1229 return self._parse_as_command(start) 1230 1231 return self.expression( 1232 exp.Drop, 1233 comments=start.comments, 1234 exists=exists or self._parse_exists(), 1235 this=self._parse_table(schema=True), 1236 kind=kind, 1237 temporary=temporary, 1238 materialized=materialized, 1239 cascade=self._match_text_seq("CASCADE"), 1240 constraints=self._match_text_seq("CONSTRAINTS"), 1241 purge=self._match_text_seq("PURGE"), 1242 ) 1243 1244 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1245 return ( 1246 self._match_text_seq("IF") 1247 and (not not_ or self._match(TokenType.NOT)) 1248 and self._match(TokenType.EXISTS) 1249 ) 1250 1251 def _parse_create(self) -> exp.Create | exp.Command: 1252 # Note: this can't be None because we've matched a statement parser 1253 start = self._prev 1254 comments = self._prev_comments 1255 1256 replace = start.text.upper() == "REPLACE" or self._match_pair( 1257 TokenType.OR, TokenType.REPLACE 1258 ) 1259 unique = self._match(TokenType.UNIQUE) 1260 1261 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1262 self._advance() 1263 1264 properties = None 1265 create_token = self._match_set(self.CREATABLES) and self._prev 1266 1267 if not create_token: 1268 # exp.Properties.Location.POST_CREATE 1269 properties = self._parse_properties() 1270 create_token = self._match_set(self.CREATABLES) and self._prev 1271 1272 if not properties or not create_token: 1273 return self._parse_as_command(start) 1274 1275 exists = self._parse_exists(not_=True) 1276 this = None 1277 expression: t.Optional[exp.Expression] = None 1278 indexes = None 1279 no_schema_binding = None 1280 begin = None 1281 clone = None 1282 1283 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1284 nonlocal properties 1285 if properties and temp_props: 1286 properties.expressions.extend(temp_props.expressions) 1287 elif temp_props: 1288 properties = temp_props 1289 1290 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1291 this = self._parse_user_defined_function(kind=create_token.token_type) 1292 1293 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1294 extend_props(self._parse_properties()) 1295 1296 self._match(TokenType.ALIAS) 1297 1298 if self._match(TokenType.COMMAND): 1299 expression = self._parse_as_command(self._prev) 1300 else: 1301 begin = self._match(TokenType.BEGIN) 1302 return_ = self._match_text_seq("RETURN") 1303 1304 if self._match(TokenType.STRING, advance=False): 1305 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1306 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1307 expression = self._parse_string() 1308 extend_props(self._parse_properties()) 1309 else: 1310 expression = self._parse_statement() 1311 1312 if return_: 1313 expression = self.expression(exp.Return, this=expression) 1314 elif create_token.token_type == TokenType.INDEX: 1315 this = self._parse_index(index=self._parse_id_var()) 1316 elif create_token.token_type in self.DB_CREATABLES: 1317 table_parts = self._parse_table_parts(schema=True) 1318 1319 # exp.Properties.Location.POST_NAME 1320 self._match(TokenType.COMMA) 1321 extend_props(self._parse_properties(before=True)) 1322 1323 this = self._parse_schema(this=table_parts) 1324 1325 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1326 extend_props(self._parse_properties()) 1327 1328 self._match(TokenType.ALIAS) 1329 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1330 # exp.Properties.Location.POST_ALIAS 1331 extend_props(self._parse_properties()) 1332 1333 expression = self._parse_ddl_select() 1334 1335 if create_token.token_type == TokenType.TABLE: 1336 # exp.Properties.Location.POST_EXPRESSION 1337 extend_props(self._parse_properties()) 1338 1339 indexes = [] 1340 while True: 1341 index = self._parse_index() 1342 1343 # exp.Properties.Location.POST_INDEX 1344 extend_props(self._parse_properties()) 1345 1346 if not index: 1347 break 1348 else: 1349 self._match(TokenType.COMMA) 1350 indexes.append(index) 1351 elif create_token.token_type == TokenType.VIEW: 1352 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1353 no_schema_binding = True 1354 1355 shallow = self._match_text_seq("SHALLOW") 1356 1357 if self._match_texts(self.CLONE_KEYWORDS): 1358 copy = self._prev.text.lower() == "copy" 1359 clone = self._parse_table(schema=True) 1360 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1361 clone_kind = ( 1362 self._match(TokenType.L_PAREN) 1363 and self._match_texts(self.CLONE_KINDS) 1364 and self._prev.text.upper() 1365 ) 1366 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1367 self._match(TokenType.R_PAREN) 1368 clone = self.expression( 1369 exp.Clone, 1370 this=clone, 1371 when=when, 1372 kind=clone_kind, 1373 shallow=shallow, 1374 expression=clone_expression, 1375 copy=copy, 1376 ) 1377 1378 return self.expression( 1379 exp.Create, 1380 comments=comments, 1381 this=this, 1382 kind=create_token.text, 1383 replace=replace, 1384 unique=unique, 1385 expression=expression, 1386 exists=exists, 1387 properties=properties, 1388 indexes=indexes, 1389 no_schema_binding=no_schema_binding, 1390 begin=begin, 1391 clone=clone, 1392 ) 1393 1394 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1395 # only used for teradata currently 1396 self._match(TokenType.COMMA) 1397 1398 kwargs = { 1399 "no": self._match_text_seq("NO"), 1400 "dual": self._match_text_seq("DUAL"), 1401 "before": self._match_text_seq("BEFORE"), 1402 "default": self._match_text_seq("DEFAULT"), 1403 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1404 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1405 "after": self._match_text_seq("AFTER"), 1406 "minimum": self._match_texts(("MIN", "MINIMUM")), 1407 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1408 } 1409 1410 if self._match_texts(self.PROPERTY_PARSERS): 1411 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1412 try: 1413 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1414 except TypeError: 1415 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1416 1417 return None 1418 1419 def _parse_property(self) -> t.Optional[exp.Expression]: 1420 if self._match_texts(self.PROPERTY_PARSERS): 1421 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1422 1423 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1424 return self._parse_character_set(default=True) 1425 1426 if self._match_text_seq("COMPOUND", "SORTKEY"): 1427 return self._parse_sortkey(compound=True) 1428 1429 if self._match_text_seq("SQL", "SECURITY"): 1430 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1431 1432 index = self._index 1433 key = self._parse_column() 1434 1435 if not self._match(TokenType.EQ): 1436 self._retreat(index) 1437 return None 1438 1439 return self.expression( 1440 exp.Property, 1441 this=key.to_dot() if isinstance(key, exp.Column) else key, 1442 value=self._parse_column() or self._parse_var(any_token=True), 1443 ) 1444 1445 def _parse_stored(self) -> exp.FileFormatProperty: 1446 self._match(TokenType.ALIAS) 1447 1448 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1449 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1450 1451 return self.expression( 1452 exp.FileFormatProperty, 1453 this=self.expression( 1454 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1455 ) 1456 if input_format or output_format 1457 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1458 ) 1459 1460 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1461 self._match(TokenType.EQ) 1462 self._match(TokenType.ALIAS) 1463 return self.expression(exp_class, this=self._parse_field()) 1464 1465 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1466 properties = [] 1467 while True: 1468 if before: 1469 prop = self._parse_property_before() 1470 else: 1471 prop = self._parse_property() 1472 1473 if not prop: 1474 break 1475 for p in ensure_list(prop): 1476 properties.append(p) 1477 1478 if properties: 1479 return self.expression(exp.Properties, expressions=properties) 1480 1481 return None 1482 1483 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1484 return self.expression( 1485 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1486 ) 1487 1488 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1489 if self._index >= 2: 1490 pre_volatile_token = self._tokens[self._index - 2] 1491 else: 1492 pre_volatile_token = None 1493 1494 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1495 return exp.VolatileProperty() 1496 1497 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1498 1499 def _parse_with_property( 1500 self, 1501 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1502 if self._match(TokenType.L_PAREN, advance=False): 1503 return self._parse_wrapped_csv(self._parse_property) 1504 1505 if self._match_text_seq("JOURNAL"): 1506 return self._parse_withjournaltable() 1507 1508 if self._match_text_seq("DATA"): 1509 return self._parse_withdata(no=False) 1510 elif self._match_text_seq("NO", "DATA"): 1511 return self._parse_withdata(no=True) 1512 1513 if not self._next: 1514 return None 1515 1516 return self._parse_withisolatedloading() 1517 1518 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1519 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1520 self._match(TokenType.EQ) 1521 1522 user = self._parse_id_var() 1523 self._match(TokenType.PARAMETER) 1524 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1525 1526 if not user or not host: 1527 return None 1528 1529 return exp.DefinerProperty(this=f"{user}@{host}") 1530 1531 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1532 self._match(TokenType.TABLE) 1533 self._match(TokenType.EQ) 1534 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1535 1536 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1537 return self.expression(exp.LogProperty, no=no) 1538 1539 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1540 return self.expression(exp.JournalProperty, **kwargs) 1541 1542 def _parse_checksum(self) -> exp.ChecksumProperty: 1543 self._match(TokenType.EQ) 1544 1545 on = None 1546 if self._match(TokenType.ON): 1547 on = True 1548 elif self._match_text_seq("OFF"): 1549 on = False 1550 1551 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1552 1553 def _parse_cluster(self) -> exp.Cluster: 1554 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1555 1556 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1557 self._match_text_seq("BY") 1558 1559 self._match_l_paren() 1560 expressions = self._parse_csv(self._parse_column) 1561 self._match_r_paren() 1562 1563 if self._match_text_seq("SORTED", "BY"): 1564 self._match_l_paren() 1565 sorted_by = self._parse_csv(self._parse_ordered) 1566 self._match_r_paren() 1567 else: 1568 sorted_by = None 1569 1570 self._match(TokenType.INTO) 1571 buckets = self._parse_number() 1572 self._match_text_seq("BUCKETS") 1573 1574 return self.expression( 1575 exp.ClusteredByProperty, 1576 expressions=expressions, 1577 sorted_by=sorted_by, 1578 buckets=buckets, 1579 ) 1580 1581 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1582 if not self._match_text_seq("GRANTS"): 1583 self._retreat(self._index - 1) 1584 return None 1585 1586 return self.expression(exp.CopyGrantsProperty) 1587 1588 def _parse_freespace(self) -> exp.FreespaceProperty: 1589 self._match(TokenType.EQ) 1590 return self.expression( 1591 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1592 ) 1593 1594 def _parse_mergeblockratio( 1595 self, no: bool = False, default: bool = False 1596 ) -> exp.MergeBlockRatioProperty: 1597 if self._match(TokenType.EQ): 1598 return self.expression( 1599 exp.MergeBlockRatioProperty, 1600 this=self._parse_number(), 1601 percent=self._match(TokenType.PERCENT), 1602 ) 1603 1604 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1605 1606 def _parse_datablocksize( 1607 self, 1608 default: t.Optional[bool] = None, 1609 minimum: t.Optional[bool] = None, 1610 maximum: t.Optional[bool] = None, 1611 ) -> exp.DataBlocksizeProperty: 1612 self._match(TokenType.EQ) 1613 size = self._parse_number() 1614 1615 units = None 1616 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1617 units = self._prev.text 1618 1619 return self.expression( 1620 exp.DataBlocksizeProperty, 1621 size=size, 1622 units=units, 1623 default=default, 1624 minimum=minimum, 1625 maximum=maximum, 1626 ) 1627 1628 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1629 self._match(TokenType.EQ) 1630 always = self._match_text_seq("ALWAYS") 1631 manual = self._match_text_seq("MANUAL") 1632 never = self._match_text_seq("NEVER") 1633 default = self._match_text_seq("DEFAULT") 1634 1635 autotemp = None 1636 if self._match_text_seq("AUTOTEMP"): 1637 autotemp = self._parse_schema() 1638 1639 return self.expression( 1640 exp.BlockCompressionProperty, 1641 always=always, 1642 manual=manual, 1643 never=never, 1644 default=default, 1645 autotemp=autotemp, 1646 ) 1647 1648 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1649 no = self._match_text_seq("NO") 1650 concurrent = self._match_text_seq("CONCURRENT") 1651 self._match_text_seq("ISOLATED", "LOADING") 1652 for_all = self._match_text_seq("FOR", "ALL") 1653 for_insert = self._match_text_seq("FOR", "INSERT") 1654 for_none = self._match_text_seq("FOR", "NONE") 1655 return self.expression( 1656 exp.IsolatedLoadingProperty, 1657 no=no, 1658 concurrent=concurrent, 1659 for_all=for_all, 1660 for_insert=for_insert, 1661 for_none=for_none, 1662 ) 1663 1664 def _parse_locking(self) -> exp.LockingProperty: 1665 if self._match(TokenType.TABLE): 1666 kind = "TABLE" 1667 elif self._match(TokenType.VIEW): 1668 kind = "VIEW" 1669 elif self._match(TokenType.ROW): 1670 kind = "ROW" 1671 elif self._match_text_seq("DATABASE"): 1672 kind = "DATABASE" 1673 else: 1674 kind = None 1675 1676 if kind in ("DATABASE", "TABLE", "VIEW"): 1677 this = self._parse_table_parts() 1678 else: 1679 this = None 1680 1681 if self._match(TokenType.FOR): 1682 for_or_in = "FOR" 1683 elif self._match(TokenType.IN): 1684 for_or_in = "IN" 1685 else: 1686 for_or_in = None 1687 1688 if self._match_text_seq("ACCESS"): 1689 lock_type = "ACCESS" 1690 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1691 lock_type = "EXCLUSIVE" 1692 elif self._match_text_seq("SHARE"): 1693 lock_type = "SHARE" 1694 elif self._match_text_seq("READ"): 1695 lock_type = "READ" 1696 elif self._match_text_seq("WRITE"): 1697 lock_type = "WRITE" 1698 elif self._match_text_seq("CHECKSUM"): 1699 lock_type = "CHECKSUM" 1700 else: 1701 lock_type = None 1702 1703 override = self._match_text_seq("OVERRIDE") 1704 1705 return self.expression( 1706 exp.LockingProperty, 1707 this=this, 1708 kind=kind, 1709 for_or_in=for_or_in, 1710 lock_type=lock_type, 1711 override=override, 1712 ) 1713 1714 def _parse_partition_by(self) -> t.List[exp.Expression]: 1715 if self._match(TokenType.PARTITION_BY): 1716 return self._parse_csv(self._parse_conjunction) 1717 return [] 1718 1719 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1720 self._match(TokenType.EQ) 1721 return self.expression( 1722 exp.PartitionedByProperty, 1723 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1724 ) 1725 1726 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1727 if self._match_text_seq("AND", "STATISTICS"): 1728 statistics = True 1729 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1730 statistics = False 1731 else: 1732 statistics = None 1733 1734 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1735 1736 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1737 if self._match_text_seq("PRIMARY", "INDEX"): 1738 return exp.NoPrimaryIndexProperty() 1739 return None 1740 1741 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1742 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1743 return exp.OnCommitProperty() 1744 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1745 return exp.OnCommitProperty(delete=True) 1746 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1747 1748 def _parse_distkey(self) -> exp.DistKeyProperty: 1749 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1750 1751 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1752 table = self._parse_table(schema=True) 1753 1754 options = [] 1755 while self._match_texts(("INCLUDING", "EXCLUDING")): 1756 this = self._prev.text.upper() 1757 1758 id_var = self._parse_id_var() 1759 if not id_var: 1760 return None 1761 1762 options.append( 1763 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1764 ) 1765 1766 return self.expression(exp.LikeProperty, this=table, expressions=options) 1767 1768 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1769 return self.expression( 1770 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1771 ) 1772 1773 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1774 self._match(TokenType.EQ) 1775 return self.expression( 1776 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1777 ) 1778 1779 def _parse_returns(self) -> exp.ReturnsProperty: 1780 value: t.Optional[exp.Expression] 1781 is_table = self._match(TokenType.TABLE) 1782 1783 if is_table: 1784 if self._match(TokenType.LT): 1785 value = self.expression( 1786 exp.Schema, 1787 this="TABLE", 1788 expressions=self._parse_csv(self._parse_struct_types), 1789 ) 1790 if not self._match(TokenType.GT): 1791 self.raise_error("Expecting >") 1792 else: 1793 value = self._parse_schema(exp.var("TABLE")) 1794 else: 1795 value = self._parse_types() 1796 1797 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1798 1799 def _parse_describe(self) -> exp.Describe: 1800 kind = self._match_set(self.CREATABLES) and self._prev.text 1801 this = self._parse_table(schema=True) 1802 properties = self._parse_properties() 1803 expressions = properties.expressions if properties else None 1804 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1805 1806 def _parse_insert(self) -> exp.Insert: 1807 comments = ensure_list(self._prev_comments) 1808 overwrite = self._match(TokenType.OVERWRITE) 1809 ignore = self._match(TokenType.IGNORE) 1810 local = self._match_text_seq("LOCAL") 1811 alternative = None 1812 1813 if self._match_text_seq("DIRECTORY"): 1814 this: t.Optional[exp.Expression] = self.expression( 1815 exp.Directory, 1816 this=self._parse_var_or_string(), 1817 local=local, 1818 row_format=self._parse_row_format(match_row=True), 1819 ) 1820 else: 1821 if self._match(TokenType.OR): 1822 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1823 1824 self._match(TokenType.INTO) 1825 comments += ensure_list(self._prev_comments) 1826 self._match(TokenType.TABLE) 1827 this = self._parse_table(schema=True) 1828 1829 returning = self._parse_returning() 1830 1831 return self.expression( 1832 exp.Insert, 1833 comments=comments, 1834 this=this, 1835 by_name=self._match_text_seq("BY", "NAME"), 1836 exists=self._parse_exists(), 1837 partition=self._parse_partition(), 1838 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1839 and self._parse_conjunction(), 1840 expression=self._parse_ddl_select(), 1841 conflict=self._parse_on_conflict(), 1842 returning=returning or self._parse_returning(), 1843 overwrite=overwrite, 1844 alternative=alternative, 1845 ignore=ignore, 1846 ) 1847 1848 def _parse_kill(self) -> exp.Kill: 1849 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1850 1851 return self.expression( 1852 exp.Kill, 1853 this=self._parse_primary(), 1854 kind=kind, 1855 ) 1856 1857 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1858 conflict = self._match_text_seq("ON", "CONFLICT") 1859 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1860 1861 if not conflict and not duplicate: 1862 return None 1863 1864 nothing = None 1865 expressions = None 1866 key = None 1867 constraint = None 1868 1869 if conflict: 1870 if self._match_text_seq("ON", "CONSTRAINT"): 1871 constraint = self._parse_id_var() 1872 else: 1873 key = self._parse_csv(self._parse_value) 1874 1875 self._match_text_seq("DO") 1876 if self._match_text_seq("NOTHING"): 1877 nothing = True 1878 else: 1879 self._match(TokenType.UPDATE) 1880 self._match(TokenType.SET) 1881 expressions = self._parse_csv(self._parse_equality) 1882 1883 return self.expression( 1884 exp.OnConflict, 1885 duplicate=duplicate, 1886 expressions=expressions, 1887 nothing=nothing, 1888 key=key, 1889 constraint=constraint, 1890 ) 1891 1892 def _parse_returning(self) -> t.Optional[exp.Returning]: 1893 if not self._match(TokenType.RETURNING): 1894 return None 1895 return self.expression( 1896 exp.Returning, 1897 expressions=self._parse_csv(self._parse_expression), 1898 into=self._match(TokenType.INTO) and self._parse_table_part(), 1899 ) 1900 1901 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1902 if not self._match(TokenType.FORMAT): 1903 return None 1904 return self._parse_row_format() 1905 1906 def _parse_row_format( 1907 self, match_row: bool = False 1908 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1909 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1910 return None 1911 1912 if self._match_text_seq("SERDE"): 1913 this = self._parse_string() 1914 1915 serde_properties = None 1916 if self._match(TokenType.SERDE_PROPERTIES): 1917 serde_properties = self.expression( 1918 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1919 ) 1920 1921 return self.expression( 1922 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1923 ) 1924 1925 self._match_text_seq("DELIMITED") 1926 1927 kwargs = {} 1928 1929 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1930 kwargs["fields"] = self._parse_string() 1931 if self._match_text_seq("ESCAPED", "BY"): 1932 kwargs["escaped"] = self._parse_string() 1933 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1934 kwargs["collection_items"] = self._parse_string() 1935 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1936 kwargs["map_keys"] = self._parse_string() 1937 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1938 kwargs["lines"] = self._parse_string() 1939 if self._match_text_seq("NULL", "DEFINED", "AS"): 1940 kwargs["null"] = self._parse_string() 1941 1942 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1943 1944 def _parse_load(self) -> exp.LoadData | exp.Command: 1945 if self._match_text_seq("DATA"): 1946 local = self._match_text_seq("LOCAL") 1947 self._match_text_seq("INPATH") 1948 inpath = self._parse_string() 1949 overwrite = self._match(TokenType.OVERWRITE) 1950 self._match_pair(TokenType.INTO, TokenType.TABLE) 1951 1952 return self.expression( 1953 exp.LoadData, 1954 this=self._parse_table(schema=True), 1955 local=local, 1956 overwrite=overwrite, 1957 inpath=inpath, 1958 partition=self._parse_partition(), 1959 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1960 serde=self._match_text_seq("SERDE") and self._parse_string(), 1961 ) 1962 return self._parse_as_command(self._prev) 1963 1964 def _parse_delete(self) -> exp.Delete: 1965 # This handles MySQL's "Multiple-Table Syntax" 1966 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1967 tables = None 1968 comments = self._prev_comments 1969 if not self._match(TokenType.FROM, advance=False): 1970 tables = self._parse_csv(self._parse_table) or None 1971 1972 returning = self._parse_returning() 1973 1974 return self.expression( 1975 exp.Delete, 1976 comments=comments, 1977 tables=tables, 1978 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1979 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1980 where=self._parse_where(), 1981 returning=returning or self._parse_returning(), 1982 limit=self._parse_limit(), 1983 ) 1984 1985 def _parse_update(self) -> exp.Update: 1986 comments = self._prev_comments 1987 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1988 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1989 returning = self._parse_returning() 1990 return self.expression( 1991 exp.Update, 1992 comments=comments, 1993 **{ # type: ignore 1994 "this": this, 1995 "expressions": expressions, 1996 "from": self._parse_from(joins=True), 1997 "where": self._parse_where(), 1998 "returning": returning or self._parse_returning(), 1999 "order": self._parse_order(), 2000 "limit": self._parse_limit(), 2001 }, 2002 ) 2003 2004 def _parse_uncache(self) -> exp.Uncache: 2005 if not self._match(TokenType.TABLE): 2006 self.raise_error("Expecting TABLE after UNCACHE") 2007 2008 return self.expression( 2009 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2010 ) 2011 2012 def _parse_cache(self) -> exp.Cache: 2013 lazy = self._match_text_seq("LAZY") 2014 self._match(TokenType.TABLE) 2015 table = self._parse_table(schema=True) 2016 2017 options = [] 2018 if self._match_text_seq("OPTIONS"): 2019 self._match_l_paren() 2020 k = self._parse_string() 2021 self._match(TokenType.EQ) 2022 v = self._parse_string() 2023 options = [k, v] 2024 self._match_r_paren() 2025 2026 self._match(TokenType.ALIAS) 2027 return self.expression( 2028 exp.Cache, 2029 this=table, 2030 lazy=lazy, 2031 options=options, 2032 expression=self._parse_select(nested=True), 2033 ) 2034 2035 def _parse_partition(self) -> t.Optional[exp.Partition]: 2036 if not self._match(TokenType.PARTITION): 2037 return None 2038 2039 return self.expression( 2040 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2041 ) 2042 2043 def _parse_value(self) -> exp.Tuple: 2044 if self._match(TokenType.L_PAREN): 2045 expressions = self._parse_csv(self._parse_conjunction) 2046 self._match_r_paren() 2047 return self.expression(exp.Tuple, expressions=expressions) 2048 2049 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2050 # https://prestodb.io/docs/current/sql/values.html 2051 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2052 2053 def _parse_projections(self) -> t.List[exp.Expression]: 2054 return self._parse_expressions() 2055 2056 def _parse_select( 2057 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2058 ) -> t.Optional[exp.Expression]: 2059 cte = self._parse_with() 2060 2061 if cte: 2062 this = self._parse_statement() 2063 2064 if not this: 2065 self.raise_error("Failed to parse any statement following CTE") 2066 return cte 2067 2068 if "with" in this.arg_types: 2069 this.set("with", cte) 2070 else: 2071 self.raise_error(f"{this.key} does not support CTE") 2072 this = cte 2073 2074 return this 2075 2076 # duckdb supports leading with FROM x 2077 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2078 2079 if self._match(TokenType.SELECT): 2080 comments = self._prev_comments 2081 2082 hint = self._parse_hint() 2083 all_ = self._match(TokenType.ALL) 2084 distinct = self._match_set(self.DISTINCT_TOKENS) 2085 2086 kind = ( 2087 self._match(TokenType.ALIAS) 2088 and self._match_texts(("STRUCT", "VALUE")) 2089 and self._prev.text 2090 ) 2091 2092 if distinct: 2093 distinct = self.expression( 2094 exp.Distinct, 2095 on=self._parse_value() if self._match(TokenType.ON) else None, 2096 ) 2097 2098 if all_ and distinct: 2099 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2100 2101 limit = self._parse_limit(top=True) 2102 projections = self._parse_projections() 2103 2104 this = self.expression( 2105 exp.Select, 2106 kind=kind, 2107 hint=hint, 2108 distinct=distinct, 2109 expressions=projections, 2110 limit=limit, 2111 ) 2112 this.comments = comments 2113 2114 into = self._parse_into() 2115 if into: 2116 this.set("into", into) 2117 2118 if not from_: 2119 from_ = self._parse_from() 2120 2121 if from_: 2122 this.set("from", from_) 2123 2124 this = self._parse_query_modifiers(this) 2125 elif (table or nested) and self._match(TokenType.L_PAREN): 2126 if self._match(TokenType.PIVOT): 2127 this = self._parse_simplified_pivot() 2128 elif self._match(TokenType.FROM): 2129 this = exp.select("*").from_( 2130 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2131 ) 2132 else: 2133 this = self._parse_table() if table else self._parse_select(nested=True) 2134 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2135 2136 self._match_r_paren() 2137 2138 # We return early here so that the UNION isn't attached to the subquery by the 2139 # following call to _parse_set_operations, but instead becomes the parent node 2140 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2141 elif self._match(TokenType.VALUES): 2142 this = self.expression( 2143 exp.Values, 2144 expressions=self._parse_csv(self._parse_value), 2145 alias=self._parse_table_alias(), 2146 ) 2147 elif from_: 2148 this = exp.select("*").from_(from_.this, copy=False) 2149 else: 2150 this = None 2151 2152 return self._parse_set_operations(this) 2153 2154 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2155 if not skip_with_token and not self._match(TokenType.WITH): 2156 return None 2157 2158 comments = self._prev_comments 2159 recursive = self._match(TokenType.RECURSIVE) 2160 2161 expressions = [] 2162 while True: 2163 expressions.append(self._parse_cte()) 2164 2165 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2166 break 2167 else: 2168 self._match(TokenType.WITH) 2169 2170 return self.expression( 2171 exp.With, comments=comments, expressions=expressions, recursive=recursive 2172 ) 2173 2174 def _parse_cte(self) -> exp.CTE: 2175 alias = self._parse_table_alias() 2176 if not alias or not alias.this: 2177 self.raise_error("Expected CTE to have alias") 2178 2179 self._match(TokenType.ALIAS) 2180 return self.expression( 2181 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2182 ) 2183 2184 def _parse_table_alias( 2185 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2186 ) -> t.Optional[exp.TableAlias]: 2187 any_token = self._match(TokenType.ALIAS) 2188 alias = ( 2189 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2190 or self._parse_string_as_identifier() 2191 ) 2192 2193 index = self._index 2194 if self._match(TokenType.L_PAREN): 2195 columns = self._parse_csv(self._parse_function_parameter) 2196 self._match_r_paren() if columns else self._retreat(index) 2197 else: 2198 columns = None 2199 2200 if not alias and not columns: 2201 return None 2202 2203 return self.expression(exp.TableAlias, this=alias, columns=columns) 2204 2205 def _parse_subquery( 2206 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2207 ) -> t.Optional[exp.Subquery]: 2208 if not this: 2209 return None 2210 2211 return self.expression( 2212 exp.Subquery, 2213 this=this, 2214 pivots=self._parse_pivots(), 2215 alias=self._parse_table_alias() if parse_alias else None, 2216 ) 2217 2218 def _parse_query_modifiers( 2219 self, this: t.Optional[exp.Expression] 2220 ) -> t.Optional[exp.Expression]: 2221 if isinstance(this, self.MODIFIABLES): 2222 for join in iter(self._parse_join, None): 2223 this.append("joins", join) 2224 for lateral in iter(self._parse_lateral, None): 2225 this.append("laterals", lateral) 2226 2227 while True: 2228 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2229 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2230 key, expression = parser(self) 2231 2232 if expression: 2233 this.set(key, expression) 2234 if key == "limit": 2235 offset = expression.args.pop("offset", None) 2236 if offset: 2237 this.set("offset", exp.Offset(expression=offset)) 2238 continue 2239 break 2240 return this 2241 2242 def _parse_hint(self) -> t.Optional[exp.Hint]: 2243 if self._match(TokenType.HINT): 2244 hints = [] 2245 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2246 hints.extend(hint) 2247 2248 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2249 self.raise_error("Expected */ after HINT") 2250 2251 return self.expression(exp.Hint, expressions=hints) 2252 2253 return None 2254 2255 def _parse_into(self) -> t.Optional[exp.Into]: 2256 if not self._match(TokenType.INTO): 2257 return None 2258 2259 temp = self._match(TokenType.TEMPORARY) 2260 unlogged = self._match_text_seq("UNLOGGED") 2261 self._match(TokenType.TABLE) 2262 2263 return self.expression( 2264 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2265 ) 2266 2267 def _parse_from( 2268 self, joins: bool = False, skip_from_token: bool = False 2269 ) -> t.Optional[exp.From]: 2270 if not skip_from_token and not self._match(TokenType.FROM): 2271 return None 2272 2273 return self.expression( 2274 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2275 ) 2276 2277 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2278 if not self._match(TokenType.MATCH_RECOGNIZE): 2279 return None 2280 2281 self._match_l_paren() 2282 2283 partition = self._parse_partition_by() 2284 order = self._parse_order() 2285 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2286 2287 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2288 rows = exp.var("ONE ROW PER MATCH") 2289 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2290 text = "ALL ROWS PER MATCH" 2291 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2292 text += f" SHOW EMPTY MATCHES" 2293 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2294 text += f" OMIT EMPTY MATCHES" 2295 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2296 text += f" WITH UNMATCHED ROWS" 2297 rows = exp.var(text) 2298 else: 2299 rows = None 2300 2301 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2302 text = "AFTER MATCH SKIP" 2303 if self._match_text_seq("PAST", "LAST", "ROW"): 2304 text += f" PAST LAST ROW" 2305 elif self._match_text_seq("TO", "NEXT", "ROW"): 2306 text += f" TO NEXT ROW" 2307 elif self._match_text_seq("TO", "FIRST"): 2308 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2309 elif self._match_text_seq("TO", "LAST"): 2310 text += f" TO LAST {self._advance_any().text}" # type: ignore 2311 after = exp.var(text) 2312 else: 2313 after = None 2314 2315 if self._match_text_seq("PATTERN"): 2316 self._match_l_paren() 2317 2318 if not self._curr: 2319 self.raise_error("Expecting )", self._curr) 2320 2321 paren = 1 2322 start = self._curr 2323 2324 while self._curr and paren > 0: 2325 if self._curr.token_type == TokenType.L_PAREN: 2326 paren += 1 2327 if self._curr.token_type == TokenType.R_PAREN: 2328 paren -= 1 2329 2330 end = self._prev 2331 self._advance() 2332 2333 if paren > 0: 2334 self.raise_error("Expecting )", self._curr) 2335 2336 pattern = exp.var(self._find_sql(start, end)) 2337 else: 2338 pattern = None 2339 2340 define = ( 2341 self._parse_csv( 2342 lambda: self.expression( 2343 exp.Alias, 2344 alias=self._parse_id_var(any_token=True), 2345 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2346 ) 2347 ) 2348 if self._match_text_seq("DEFINE") 2349 else None 2350 ) 2351 2352 self._match_r_paren() 2353 2354 return self.expression( 2355 exp.MatchRecognize, 2356 partition_by=partition, 2357 order=order, 2358 measures=measures, 2359 rows=rows, 2360 after=after, 2361 pattern=pattern, 2362 define=define, 2363 alias=self._parse_table_alias(), 2364 ) 2365 2366 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2367 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2368 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2369 2370 if outer_apply or cross_apply: 2371 this = self._parse_select(table=True) 2372 view = None 2373 outer = not cross_apply 2374 elif self._match(TokenType.LATERAL): 2375 this = self._parse_select(table=True) 2376 view = self._match(TokenType.VIEW) 2377 outer = self._match(TokenType.OUTER) 2378 else: 2379 return None 2380 2381 if not this: 2382 this = ( 2383 self._parse_unnest() 2384 or self._parse_function() 2385 or self._parse_id_var(any_token=False) 2386 ) 2387 2388 while self._match(TokenType.DOT): 2389 this = exp.Dot( 2390 this=this, 2391 expression=self._parse_function() or self._parse_id_var(any_token=False), 2392 ) 2393 2394 if view: 2395 table = self._parse_id_var(any_token=False) 2396 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2397 table_alias: t.Optional[exp.TableAlias] = self.expression( 2398 exp.TableAlias, this=table, columns=columns 2399 ) 2400 elif isinstance(this, exp.Subquery) and this.alias: 2401 # Ensures parity between the Subquery's and the Lateral's "alias" args 2402 table_alias = this.args["alias"].copy() 2403 else: 2404 table_alias = self._parse_table_alias() 2405 2406 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2407 2408 def _parse_join_parts( 2409 self, 2410 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2411 return ( 2412 self._match_set(self.JOIN_METHODS) and self._prev, 2413 self._match_set(self.JOIN_SIDES) and self._prev, 2414 self._match_set(self.JOIN_KINDS) and self._prev, 2415 ) 2416 2417 def _parse_join( 2418 self, skip_join_token: bool = False, parse_bracket: bool = False 2419 ) -> t.Optional[exp.Join]: 2420 if self._match(TokenType.COMMA): 2421 return self.expression(exp.Join, this=self._parse_table()) 2422 2423 index = self._index 2424 method, side, kind = self._parse_join_parts() 2425 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2426 join = self._match(TokenType.JOIN) 2427 2428 if not skip_join_token and not join: 2429 self._retreat(index) 2430 kind = None 2431 method = None 2432 side = None 2433 2434 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2435 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2436 2437 if not skip_join_token and not join and not outer_apply and not cross_apply: 2438 return None 2439 2440 if outer_apply: 2441 side = Token(TokenType.LEFT, "LEFT") 2442 2443 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2444 2445 if method: 2446 kwargs["method"] = method.text 2447 if side: 2448 kwargs["side"] = side.text 2449 if kind: 2450 kwargs["kind"] = kind.text 2451 if hint: 2452 kwargs["hint"] = hint 2453 2454 if self._match(TokenType.ON): 2455 kwargs["on"] = self._parse_conjunction() 2456 elif self._match(TokenType.USING): 2457 kwargs["using"] = self._parse_wrapped_id_vars() 2458 elif not (kind and kind.token_type == TokenType.CROSS): 2459 index = self._index 2460 joins = self._parse_joins() 2461 2462 if joins and self._match(TokenType.ON): 2463 kwargs["on"] = self._parse_conjunction() 2464 elif joins and self._match(TokenType.USING): 2465 kwargs["using"] = self._parse_wrapped_id_vars() 2466 else: 2467 joins = None 2468 self._retreat(index) 2469 2470 kwargs["this"].set("joins", joins) 2471 2472 comments = [c for token in (method, side, kind) if token for c in token.comments] 2473 return self.expression(exp.Join, comments=comments, **kwargs) 2474 2475 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2476 this = self._parse_conjunction() 2477 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2478 return this 2479 2480 opclass = self._parse_var(any_token=True) 2481 if opclass: 2482 return self.expression(exp.Opclass, this=this, expression=opclass) 2483 2484 return this 2485 2486 def _parse_index( 2487 self, 2488 index: t.Optional[exp.Expression] = None, 2489 ) -> t.Optional[exp.Index]: 2490 if index: 2491 unique = None 2492 primary = None 2493 amp = None 2494 2495 self._match(TokenType.ON) 2496 self._match(TokenType.TABLE) # hive 2497 table = self._parse_table_parts(schema=True) 2498 else: 2499 unique = self._match(TokenType.UNIQUE) 2500 primary = self._match_text_seq("PRIMARY") 2501 amp = self._match_text_seq("AMP") 2502 2503 if not self._match(TokenType.INDEX): 2504 return None 2505 2506 index = self._parse_id_var() 2507 table = None 2508 2509 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2510 2511 if self._match(TokenType.L_PAREN, advance=False): 2512 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2513 else: 2514 columns = None 2515 2516 return self.expression( 2517 exp.Index, 2518 this=index, 2519 table=table, 2520 using=using, 2521 columns=columns, 2522 unique=unique, 2523 primary=primary, 2524 amp=amp, 2525 partition_by=self._parse_partition_by(), 2526 where=self._parse_where(), 2527 ) 2528 2529 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2530 hints: t.List[exp.Expression] = [] 2531 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2532 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2533 hints.append( 2534 self.expression( 2535 exp.WithTableHint, 2536 expressions=self._parse_csv( 2537 lambda: self._parse_function() or self._parse_var(any_token=True) 2538 ), 2539 ) 2540 ) 2541 self._match_r_paren() 2542 else: 2543 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2544 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2545 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2546 2547 self._match_texts({"INDEX", "KEY"}) 2548 if self._match(TokenType.FOR): 2549 hint.set("target", self._advance_any() and self._prev.text.upper()) 2550 2551 hint.set("expressions", self._parse_wrapped_id_vars()) 2552 hints.append(hint) 2553 2554 return hints or None 2555 2556 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2557 return ( 2558 (not schema and self._parse_function(optional_parens=False)) 2559 or self._parse_id_var(any_token=False) 2560 or self._parse_string_as_identifier() 2561 or self._parse_placeholder() 2562 ) 2563 2564 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2565 catalog = None 2566 db = None 2567 table = self._parse_table_part(schema=schema) 2568 2569 while self._match(TokenType.DOT): 2570 if catalog: 2571 # This allows nesting the table in arbitrarily many dot expressions if needed 2572 table = self.expression( 2573 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2574 ) 2575 else: 2576 catalog = db 2577 db = table 2578 table = self._parse_table_part(schema=schema) 2579 2580 if not table: 2581 self.raise_error(f"Expected table name but got {self._curr}") 2582 2583 return self.expression( 2584 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2585 ) 2586 2587 def _parse_table( 2588 self, 2589 schema: bool = False, 2590 joins: bool = False, 2591 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2592 parse_bracket: bool = False, 2593 ) -> t.Optional[exp.Expression]: 2594 lateral = self._parse_lateral() 2595 if lateral: 2596 return lateral 2597 2598 unnest = self._parse_unnest() 2599 if unnest: 2600 return unnest 2601 2602 values = self._parse_derived_table_values() 2603 if values: 2604 return values 2605 2606 subquery = self._parse_select(table=True) 2607 if subquery: 2608 if not subquery.args.get("pivots"): 2609 subquery.set("pivots", self._parse_pivots()) 2610 return subquery 2611 2612 bracket = parse_bracket and self._parse_bracket(None) 2613 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2614 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2615 2616 if schema: 2617 return self._parse_schema(this=this) 2618 2619 version = self._parse_version() 2620 2621 if version: 2622 this.set("version", version) 2623 2624 if self.ALIAS_POST_TABLESAMPLE: 2625 table_sample = self._parse_table_sample() 2626 2627 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2628 if alias: 2629 this.set("alias", alias) 2630 2631 this.set("hints", self._parse_table_hints()) 2632 2633 if not this.args.get("pivots"): 2634 this.set("pivots", self._parse_pivots()) 2635 2636 if not self.ALIAS_POST_TABLESAMPLE: 2637 table_sample = self._parse_table_sample() 2638 2639 if table_sample: 2640 table_sample.set("this", this) 2641 this = table_sample 2642 2643 if joins: 2644 for join in iter(self._parse_join, None): 2645 this.append("joins", join) 2646 2647 return this 2648 2649 def _parse_version(self) -> t.Optional[exp.Version]: 2650 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2651 this = "TIMESTAMP" 2652 elif self._match(TokenType.VERSION_SNAPSHOT): 2653 this = "VERSION" 2654 else: 2655 return None 2656 2657 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2658 kind = self._prev.text.upper() 2659 start = self._parse_bitwise() 2660 self._match_texts(("TO", "AND")) 2661 end = self._parse_bitwise() 2662 expression: t.Optional[exp.Expression] = self.expression( 2663 exp.Tuple, expressions=[start, end] 2664 ) 2665 elif self._match_text_seq("CONTAINED", "IN"): 2666 kind = "CONTAINED IN" 2667 expression = self.expression( 2668 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2669 ) 2670 elif self._match(TokenType.ALL): 2671 kind = "ALL" 2672 expression = None 2673 else: 2674 self._match_text_seq("AS", "OF") 2675 kind = "AS OF" 2676 expression = self._parse_type() 2677 2678 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2679 2680 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2681 if not self._match(TokenType.UNNEST): 2682 return None 2683 2684 expressions = self._parse_wrapped_csv(self._parse_type) 2685 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2686 2687 alias = self._parse_table_alias() if with_alias else None 2688 2689 if alias: 2690 if self.UNNEST_COLUMN_ONLY: 2691 if alias.args.get("columns"): 2692 self.raise_error("Unexpected extra column alias in unnest.") 2693 2694 alias.set("columns", [alias.this]) 2695 alias.set("this", None) 2696 2697 columns = alias.args.get("columns") or [] 2698 if offset and len(expressions) < len(columns): 2699 offset = columns.pop() 2700 2701 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2702 self._match(TokenType.ALIAS) 2703 offset = self._parse_id_var() or exp.to_identifier("offset") 2704 2705 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2706 2707 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2708 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2709 if not is_derived and not self._match(TokenType.VALUES): 2710 return None 2711 2712 expressions = self._parse_csv(self._parse_value) 2713 alias = self._parse_table_alias() 2714 2715 if is_derived: 2716 self._match_r_paren() 2717 2718 return self.expression( 2719 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2720 ) 2721 2722 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2723 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2724 as_modifier and self._match_text_seq("USING", "SAMPLE") 2725 ): 2726 return None 2727 2728 bucket_numerator = None 2729 bucket_denominator = None 2730 bucket_field = None 2731 percent = None 2732 rows = None 2733 size = None 2734 seed = None 2735 2736 kind = ( 2737 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2738 ) 2739 method = self._parse_var(tokens=(TokenType.ROW,)) 2740 2741 matched_l_paren = self._match(TokenType.L_PAREN) 2742 2743 if self.TABLESAMPLE_CSV: 2744 num = None 2745 expressions = self._parse_csv(self._parse_primary) 2746 else: 2747 expressions = None 2748 num = ( 2749 self._parse_factor() 2750 if self._match(TokenType.NUMBER, advance=False) 2751 else self._parse_primary() 2752 ) 2753 2754 if self._match_text_seq("BUCKET"): 2755 bucket_numerator = self._parse_number() 2756 self._match_text_seq("OUT", "OF") 2757 bucket_denominator = bucket_denominator = self._parse_number() 2758 self._match(TokenType.ON) 2759 bucket_field = self._parse_field() 2760 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2761 percent = num 2762 elif self._match(TokenType.ROWS): 2763 rows = num 2764 elif num: 2765 size = num 2766 2767 if matched_l_paren: 2768 self._match_r_paren() 2769 2770 if self._match(TokenType.L_PAREN): 2771 method = self._parse_var() 2772 seed = self._match(TokenType.COMMA) and self._parse_number() 2773 self._match_r_paren() 2774 elif self._match_texts(("SEED", "REPEATABLE")): 2775 seed = self._parse_wrapped(self._parse_number) 2776 2777 return self.expression( 2778 exp.TableSample, 2779 expressions=expressions, 2780 method=method, 2781 bucket_numerator=bucket_numerator, 2782 bucket_denominator=bucket_denominator, 2783 bucket_field=bucket_field, 2784 percent=percent, 2785 rows=rows, 2786 size=size, 2787 seed=seed, 2788 kind=kind, 2789 ) 2790 2791 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2792 return list(iter(self._parse_pivot, None)) or None 2793 2794 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2795 return list(iter(self._parse_join, None)) or None 2796 2797 # https://duckdb.org/docs/sql/statements/pivot 2798 def _parse_simplified_pivot(self) -> exp.Pivot: 2799 def _parse_on() -> t.Optional[exp.Expression]: 2800 this = self._parse_bitwise() 2801 return self._parse_in(this) if self._match(TokenType.IN) else this 2802 2803 this = self._parse_table() 2804 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2805 using = self._match(TokenType.USING) and self._parse_csv( 2806 lambda: self._parse_alias(self._parse_function()) 2807 ) 2808 group = self._parse_group() 2809 return self.expression( 2810 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2811 ) 2812 2813 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2814 index = self._index 2815 include_nulls = None 2816 2817 if self._match(TokenType.PIVOT): 2818 unpivot = False 2819 elif self._match(TokenType.UNPIVOT): 2820 unpivot = True 2821 2822 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2823 if self._match_text_seq("INCLUDE", "NULLS"): 2824 include_nulls = True 2825 elif self._match_text_seq("EXCLUDE", "NULLS"): 2826 include_nulls = False 2827 else: 2828 return None 2829 2830 expressions = [] 2831 field = None 2832 2833 if not self._match(TokenType.L_PAREN): 2834 self._retreat(index) 2835 return None 2836 2837 if unpivot: 2838 expressions = self._parse_csv(self._parse_column) 2839 else: 2840 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2841 2842 if not expressions: 2843 self.raise_error("Failed to parse PIVOT's aggregation list") 2844 2845 if not self._match(TokenType.FOR): 2846 self.raise_error("Expecting FOR") 2847 2848 value = self._parse_column() 2849 2850 if not self._match(TokenType.IN): 2851 self.raise_error("Expecting IN") 2852 2853 field = self._parse_in(value, alias=True) 2854 2855 self._match_r_paren() 2856 2857 pivot = self.expression( 2858 exp.Pivot, 2859 expressions=expressions, 2860 field=field, 2861 unpivot=unpivot, 2862 include_nulls=include_nulls, 2863 ) 2864 2865 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2866 pivot.set("alias", self._parse_table_alias()) 2867 2868 if not unpivot: 2869 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2870 2871 columns: t.List[exp.Expression] = [] 2872 for fld in pivot.args["field"].expressions: 2873 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2874 for name in names: 2875 if self.PREFIXED_PIVOT_COLUMNS: 2876 name = f"{name}_{field_name}" if name else field_name 2877 else: 2878 name = f"{field_name}_{name}" if name else field_name 2879 2880 columns.append(exp.to_identifier(name)) 2881 2882 pivot.set("columns", columns) 2883 2884 return pivot 2885 2886 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2887 return [agg.alias for agg in aggregations] 2888 2889 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2890 if not skip_where_token and not self._match(TokenType.WHERE): 2891 return None 2892 2893 return self.expression( 2894 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2895 ) 2896 2897 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2898 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2899 return None 2900 2901 elements = defaultdict(list) 2902 2903 if self._match(TokenType.ALL): 2904 return self.expression(exp.Group, all=True) 2905 2906 while True: 2907 expressions = self._parse_csv(self._parse_conjunction) 2908 if expressions: 2909 elements["expressions"].extend(expressions) 2910 2911 grouping_sets = self._parse_grouping_sets() 2912 if grouping_sets: 2913 elements["grouping_sets"].extend(grouping_sets) 2914 2915 rollup = None 2916 cube = None 2917 totals = None 2918 2919 with_ = self._match(TokenType.WITH) 2920 if self._match(TokenType.ROLLUP): 2921 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2922 elements["rollup"].extend(ensure_list(rollup)) 2923 2924 if self._match(TokenType.CUBE): 2925 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2926 elements["cube"].extend(ensure_list(cube)) 2927 2928 if self._match_text_seq("TOTALS"): 2929 totals = True 2930 elements["totals"] = True # type: ignore 2931 2932 if not (grouping_sets or rollup or cube or totals): 2933 break 2934 2935 return self.expression(exp.Group, **elements) # type: ignore 2936 2937 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2938 if not self._match(TokenType.GROUPING_SETS): 2939 return None 2940 2941 return self._parse_wrapped_csv(self._parse_grouping_set) 2942 2943 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2944 if self._match(TokenType.L_PAREN): 2945 grouping_set = self._parse_csv(self._parse_column) 2946 self._match_r_paren() 2947 return self.expression(exp.Tuple, expressions=grouping_set) 2948 2949 return self._parse_column() 2950 2951 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2952 if not skip_having_token and not self._match(TokenType.HAVING): 2953 return None 2954 return self.expression(exp.Having, this=self._parse_conjunction()) 2955 2956 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2957 if not self._match(TokenType.QUALIFY): 2958 return None 2959 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2960 2961 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2962 if skip_start_token: 2963 start = None 2964 elif self._match(TokenType.START_WITH): 2965 start = self._parse_conjunction() 2966 else: 2967 return None 2968 2969 self._match(TokenType.CONNECT_BY) 2970 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2971 exp.Prior, this=self._parse_bitwise() 2972 ) 2973 connect = self._parse_conjunction() 2974 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2975 2976 if not start and self._match(TokenType.START_WITH): 2977 start = self._parse_conjunction() 2978 2979 return self.expression(exp.Connect, start=start, connect=connect) 2980 2981 def _parse_order( 2982 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2983 ) -> t.Optional[exp.Expression]: 2984 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2985 return this 2986 2987 return self.expression( 2988 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2989 ) 2990 2991 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2992 if not self._match(token): 2993 return None 2994 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2995 2996 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 2997 this = parse_method() if parse_method else self._parse_conjunction() 2998 2999 asc = self._match(TokenType.ASC) 3000 desc = self._match(TokenType.DESC) or (asc and False) 3001 3002 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3003 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3004 3005 nulls_first = is_nulls_first or False 3006 explicitly_null_ordered = is_nulls_first or is_nulls_last 3007 3008 if ( 3009 not explicitly_null_ordered 3010 and ( 3011 (not desc and self.NULL_ORDERING == "nulls_are_small") 3012 or (desc and self.NULL_ORDERING != "nulls_are_small") 3013 ) 3014 and self.NULL_ORDERING != "nulls_are_last" 3015 ): 3016 nulls_first = True 3017 3018 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3019 3020 def _parse_limit( 3021 self, this: t.Optional[exp.Expression] = None, top: bool = False 3022 ) -> t.Optional[exp.Expression]: 3023 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3024 comments = self._prev_comments 3025 if top: 3026 limit_paren = self._match(TokenType.L_PAREN) 3027 expression = self._parse_number() 3028 3029 if limit_paren: 3030 self._match_r_paren() 3031 else: 3032 expression = self._parse_term() 3033 3034 if self._match(TokenType.COMMA): 3035 offset = expression 3036 expression = self._parse_term() 3037 else: 3038 offset = None 3039 3040 limit_exp = self.expression( 3041 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3042 ) 3043 3044 return limit_exp 3045 3046 if self._match(TokenType.FETCH): 3047 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3048 direction = self._prev.text if direction else "FIRST" 3049 3050 count = self._parse_field(tokens=self.FETCH_TOKENS) 3051 percent = self._match(TokenType.PERCENT) 3052 3053 self._match_set((TokenType.ROW, TokenType.ROWS)) 3054 3055 only = self._match_text_seq("ONLY") 3056 with_ties = self._match_text_seq("WITH", "TIES") 3057 3058 if only and with_ties: 3059 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3060 3061 return self.expression( 3062 exp.Fetch, 3063 direction=direction, 3064 count=count, 3065 percent=percent, 3066 with_ties=with_ties, 3067 ) 3068 3069 return this 3070 3071 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3072 if not self._match(TokenType.OFFSET): 3073 return this 3074 3075 count = self._parse_term() 3076 self._match_set((TokenType.ROW, TokenType.ROWS)) 3077 return self.expression(exp.Offset, this=this, expression=count) 3078 3079 def _parse_locks(self) -> t.List[exp.Lock]: 3080 locks = [] 3081 while True: 3082 if self._match_text_seq("FOR", "UPDATE"): 3083 update = True 3084 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3085 "LOCK", "IN", "SHARE", "MODE" 3086 ): 3087 update = False 3088 else: 3089 break 3090 3091 expressions = None 3092 if self._match_text_seq("OF"): 3093 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3094 3095 wait: t.Optional[bool | exp.Expression] = None 3096 if self._match_text_seq("NOWAIT"): 3097 wait = True 3098 elif self._match_text_seq("WAIT"): 3099 wait = self._parse_primary() 3100 elif self._match_text_seq("SKIP", "LOCKED"): 3101 wait = False 3102 3103 locks.append( 3104 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3105 ) 3106 3107 return locks 3108 3109 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3110 if not self._match_set(self.SET_OPERATIONS): 3111 return this 3112 3113 token_type = self._prev.token_type 3114 3115 if token_type == TokenType.UNION: 3116 expression = exp.Union 3117 elif token_type == TokenType.EXCEPT: 3118 expression = exp.Except 3119 else: 3120 expression = exp.Intersect 3121 3122 return self.expression( 3123 expression, 3124 this=this, 3125 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3126 by_name=self._match_text_seq("BY", "NAME"), 3127 expression=self._parse_set_operations(self._parse_select(nested=True)), 3128 ) 3129 3130 def _parse_expression(self) -> t.Optional[exp.Expression]: 3131 return self._parse_alias(self._parse_conjunction()) 3132 3133 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3134 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3135 3136 def _parse_equality(self) -> t.Optional[exp.Expression]: 3137 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3138 3139 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3140 return self._parse_tokens(self._parse_range, self.COMPARISON) 3141 3142 def _parse_range(self) -> t.Optional[exp.Expression]: 3143 this = self._parse_bitwise() 3144 negate = self._match(TokenType.NOT) 3145 3146 if self._match_set(self.RANGE_PARSERS): 3147 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3148 if not expression: 3149 return this 3150 3151 this = expression 3152 elif self._match(TokenType.ISNULL): 3153 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3154 3155 # Postgres supports ISNULL and NOTNULL for conditions. 3156 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3157 if self._match(TokenType.NOTNULL): 3158 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3159 this = self.expression(exp.Not, this=this) 3160 3161 if negate: 3162 this = self.expression(exp.Not, this=this) 3163 3164 if self._match(TokenType.IS): 3165 this = self._parse_is(this) 3166 3167 return this 3168 3169 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3170 index = self._index - 1 3171 negate = self._match(TokenType.NOT) 3172 3173 if self._match_text_seq("DISTINCT", "FROM"): 3174 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3175 return self.expression(klass, this=this, expression=self._parse_expression()) 3176 3177 expression = self._parse_null() or self._parse_boolean() 3178 if not expression: 3179 self._retreat(index) 3180 return None 3181 3182 this = self.expression(exp.Is, this=this, expression=expression) 3183 return self.expression(exp.Not, this=this) if negate else this 3184 3185 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3186 unnest = self._parse_unnest(with_alias=False) 3187 if unnest: 3188 this = self.expression(exp.In, this=this, unnest=unnest) 3189 elif self._match(TokenType.L_PAREN): 3190 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3191 3192 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3193 this = self.expression(exp.In, this=this, query=expressions[0]) 3194 else: 3195 this = self.expression(exp.In, this=this, expressions=expressions) 3196 3197 self._match_r_paren(this) 3198 else: 3199 this = self.expression(exp.In, this=this, field=self._parse_field()) 3200 3201 return this 3202 3203 def _parse_between(self, this: exp.Expression) -> exp.Between: 3204 low = self._parse_bitwise() 3205 self._match(TokenType.AND) 3206 high = self._parse_bitwise() 3207 return self.expression(exp.Between, this=this, low=low, high=high) 3208 3209 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3210 if not self._match(TokenType.ESCAPE): 3211 return this 3212 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3213 3214 def _parse_interval(self) -> t.Optional[exp.Interval]: 3215 index = self._index 3216 3217 if not self._match(TokenType.INTERVAL): 3218 return None 3219 3220 if self._match(TokenType.STRING, advance=False): 3221 this = self._parse_primary() 3222 else: 3223 this = self._parse_term() 3224 3225 if not this: 3226 self._retreat(index) 3227 return None 3228 3229 unit = self._parse_function() or self._parse_var(any_token=True) 3230 3231 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3232 # each INTERVAL expression into this canonical form so it's easy to transpile 3233 if this and this.is_number: 3234 this = exp.Literal.string(this.name) 3235 elif this and this.is_string: 3236 parts = this.name.split() 3237 3238 if len(parts) == 2: 3239 if unit: 3240 # This is not actually a unit, it's something else (e.g. a "window side") 3241 unit = None 3242 self._retreat(self._index - 1) 3243 3244 this = exp.Literal.string(parts[0]) 3245 unit = self.expression(exp.Var, this=parts[1]) 3246 3247 return self.expression(exp.Interval, this=this, unit=unit) 3248 3249 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3250 this = self._parse_term() 3251 3252 while True: 3253 if self._match_set(self.BITWISE): 3254 this = self.expression( 3255 self.BITWISE[self._prev.token_type], 3256 this=this, 3257 expression=self._parse_term(), 3258 ) 3259 elif self._match(TokenType.DQMARK): 3260 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3261 elif self._match_pair(TokenType.LT, TokenType.LT): 3262 this = self.expression( 3263 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3264 ) 3265 elif self._match_pair(TokenType.GT, TokenType.GT): 3266 this = self.expression( 3267 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3268 ) 3269 else: 3270 break 3271 3272 return this 3273 3274 def _parse_term(self) -> t.Optional[exp.Expression]: 3275 return self._parse_tokens(self._parse_factor, self.TERM) 3276 3277 def _parse_factor(self) -> t.Optional[exp.Expression]: 3278 return self._parse_tokens(self._parse_unary, self.FACTOR) 3279 3280 def _parse_unary(self) -> t.Optional[exp.Expression]: 3281 if self._match_set(self.UNARY_PARSERS): 3282 return self.UNARY_PARSERS[self._prev.token_type](self) 3283 return self._parse_at_time_zone(self._parse_type()) 3284 3285 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3286 interval = parse_interval and self._parse_interval() 3287 if interval: 3288 return interval 3289 3290 index = self._index 3291 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3292 this = self._parse_column() 3293 3294 if data_type: 3295 if isinstance(this, exp.Literal): 3296 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3297 if parser: 3298 return parser(self, this, data_type) 3299 return self.expression(exp.Cast, this=this, to=data_type) 3300 if not data_type.expressions: 3301 self._retreat(index) 3302 return self._parse_column() 3303 return self._parse_column_ops(data_type) 3304 3305 return this and self._parse_column_ops(this) 3306 3307 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3308 this = self._parse_type() 3309 if not this: 3310 return None 3311 3312 return self.expression( 3313 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3314 ) 3315 3316 def _parse_types( 3317 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3318 ) -> t.Optional[exp.Expression]: 3319 index = self._index 3320 3321 prefix = self._match_text_seq("SYSUDTLIB", ".") 3322 3323 if not self._match_set(self.TYPE_TOKENS): 3324 identifier = allow_identifiers and self._parse_id_var( 3325 any_token=False, tokens=(TokenType.VAR,) 3326 ) 3327 3328 if identifier: 3329 tokens = self._tokenizer.tokenize(identifier.name) 3330 3331 if len(tokens) != 1: 3332 self.raise_error("Unexpected identifier", self._prev) 3333 3334 if tokens[0].token_type in self.TYPE_TOKENS: 3335 self._prev = tokens[0] 3336 elif self.SUPPORTS_USER_DEFINED_TYPES: 3337 type_name = identifier.name 3338 3339 while self._match(TokenType.DOT): 3340 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3341 3342 return exp.DataType.build(type_name, udt=True) 3343 else: 3344 return None 3345 else: 3346 return None 3347 3348 type_token = self._prev.token_type 3349 3350 if type_token == TokenType.PSEUDO_TYPE: 3351 return self.expression(exp.PseudoType, this=self._prev.text) 3352 3353 if type_token == TokenType.OBJECT_IDENTIFIER: 3354 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3355 3356 nested = type_token in self.NESTED_TYPE_TOKENS 3357 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3358 expressions = None 3359 maybe_func = False 3360 3361 if self._match(TokenType.L_PAREN): 3362 if is_struct: 3363 expressions = self._parse_csv(self._parse_struct_types) 3364 elif nested: 3365 expressions = self._parse_csv( 3366 lambda: self._parse_types( 3367 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3368 ) 3369 ) 3370 elif type_token in self.ENUM_TYPE_TOKENS: 3371 expressions = self._parse_csv(self._parse_equality) 3372 else: 3373 expressions = self._parse_csv(self._parse_type_size) 3374 3375 if not expressions or not self._match(TokenType.R_PAREN): 3376 self._retreat(index) 3377 return None 3378 3379 maybe_func = True 3380 3381 this: t.Optional[exp.Expression] = None 3382 values: t.Optional[t.List[exp.Expression]] = None 3383 3384 if nested and self._match(TokenType.LT): 3385 if is_struct: 3386 expressions = self._parse_csv(self._parse_struct_types) 3387 else: 3388 expressions = self._parse_csv( 3389 lambda: self._parse_types( 3390 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3391 ) 3392 ) 3393 3394 if not self._match(TokenType.GT): 3395 self.raise_error("Expecting >") 3396 3397 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3398 values = self._parse_csv(self._parse_conjunction) 3399 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3400 3401 if type_token in self.TIMESTAMPS: 3402 if self._match_text_seq("WITH", "TIME", "ZONE"): 3403 maybe_func = False 3404 tz_type = ( 3405 exp.DataType.Type.TIMETZ 3406 if type_token in self.TIMES 3407 else exp.DataType.Type.TIMESTAMPTZ 3408 ) 3409 this = exp.DataType(this=tz_type, expressions=expressions) 3410 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3411 maybe_func = False 3412 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3413 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3414 maybe_func = False 3415 elif type_token == TokenType.INTERVAL: 3416 unit = self._parse_var() 3417 3418 if self._match_text_seq("TO"): 3419 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3420 else: 3421 span = None 3422 3423 if span or not unit: 3424 this = self.expression( 3425 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3426 ) 3427 else: 3428 this = self.expression(exp.Interval, unit=unit) 3429 3430 if maybe_func and check_func: 3431 index2 = self._index 3432 peek = self._parse_string() 3433 3434 if not peek: 3435 self._retreat(index) 3436 return None 3437 3438 self._retreat(index2) 3439 3440 if not this: 3441 if self._match_text_seq("UNSIGNED"): 3442 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3443 if not unsigned_type_token: 3444 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3445 3446 type_token = unsigned_type_token or type_token 3447 3448 this = exp.DataType( 3449 this=exp.DataType.Type[type_token.value], 3450 expressions=expressions, 3451 nested=nested, 3452 values=values, 3453 prefix=prefix, 3454 ) 3455 3456 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3457 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3458 3459 return this 3460 3461 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3462 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3463 self._match(TokenType.COLON) 3464 return self._parse_column_def(this) 3465 3466 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3467 if not self._match_text_seq("AT", "TIME", "ZONE"): 3468 return this 3469 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3470 3471 def _parse_column(self) -> t.Optional[exp.Expression]: 3472 this = self._parse_field() 3473 if isinstance(this, exp.Identifier): 3474 this = self.expression(exp.Column, this=this) 3475 elif not this: 3476 return self._parse_bracket(this) 3477 return self._parse_column_ops(this) 3478 3479 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3480 this = self._parse_bracket(this) 3481 3482 while self._match_set(self.COLUMN_OPERATORS): 3483 op_token = self._prev.token_type 3484 op = self.COLUMN_OPERATORS.get(op_token) 3485 3486 if op_token == TokenType.DCOLON: 3487 field = self._parse_types() 3488 if not field: 3489 self.raise_error("Expected type") 3490 elif op and self._curr: 3491 self._advance() 3492 value = self._prev.text 3493 field = ( 3494 exp.Literal.number(value) 3495 if self._prev.token_type == TokenType.NUMBER 3496 else exp.Literal.string(value) 3497 ) 3498 else: 3499 field = self._parse_field(anonymous_func=True, any_token=True) 3500 3501 if isinstance(field, exp.Func): 3502 # bigquery allows function calls like x.y.count(...) 3503 # SAFE.SUBSTR(...) 3504 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3505 this = self._replace_columns_with_dots(this) 3506 3507 if op: 3508 this = op(self, this, field) 3509 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3510 this = self.expression( 3511 exp.Column, 3512 this=field, 3513 table=this.this, 3514 db=this.args.get("table"), 3515 catalog=this.args.get("db"), 3516 ) 3517 else: 3518 this = self.expression(exp.Dot, this=this, expression=field) 3519 this = self._parse_bracket(this) 3520 return this 3521 3522 def _parse_primary(self) -> t.Optional[exp.Expression]: 3523 if self._match_set(self.PRIMARY_PARSERS): 3524 token_type = self._prev.token_type 3525 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3526 3527 if token_type == TokenType.STRING: 3528 expressions = [primary] 3529 while self._match(TokenType.STRING): 3530 expressions.append(exp.Literal.string(self._prev.text)) 3531 3532 if len(expressions) > 1: 3533 return self.expression(exp.Concat, expressions=expressions) 3534 3535 return primary 3536 3537 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3538 return exp.Literal.number(f"0.{self._prev.text}") 3539 3540 if self._match(TokenType.L_PAREN): 3541 comments = self._prev_comments 3542 query = self._parse_select() 3543 3544 if query: 3545 expressions = [query] 3546 else: 3547 expressions = self._parse_expressions() 3548 3549 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3550 3551 if isinstance(this, exp.Subqueryable): 3552 this = self._parse_set_operations( 3553 self._parse_subquery(this=this, parse_alias=False) 3554 ) 3555 elif len(expressions) > 1: 3556 this = self.expression(exp.Tuple, expressions=expressions) 3557 else: 3558 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3559 3560 if this: 3561 this.add_comments(comments) 3562 3563 self._match_r_paren(expression=this) 3564 return this 3565 3566 return None 3567 3568 def _parse_field( 3569 self, 3570 any_token: bool = False, 3571 tokens: t.Optional[t.Collection[TokenType]] = None, 3572 anonymous_func: bool = False, 3573 ) -> t.Optional[exp.Expression]: 3574 return ( 3575 self._parse_primary() 3576 or self._parse_function(anonymous=anonymous_func) 3577 or self._parse_id_var(any_token=any_token, tokens=tokens) 3578 ) 3579 3580 def _parse_function( 3581 self, 3582 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3583 anonymous: bool = False, 3584 optional_parens: bool = True, 3585 ) -> t.Optional[exp.Expression]: 3586 if not self._curr: 3587 return None 3588 3589 token_type = self._curr.token_type 3590 this = self._curr.text 3591 upper = this.upper() 3592 3593 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3594 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3595 self._advance() 3596 return parser(self) 3597 3598 if not self._next or self._next.token_type != TokenType.L_PAREN: 3599 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3600 self._advance() 3601 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3602 3603 return None 3604 3605 if token_type not in self.FUNC_TOKENS: 3606 return None 3607 3608 self._advance(2) 3609 3610 parser = self.FUNCTION_PARSERS.get(upper) 3611 if parser and not anonymous: 3612 this = parser(self) 3613 else: 3614 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3615 3616 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3617 this = self.expression(subquery_predicate, this=self._parse_select()) 3618 self._match_r_paren() 3619 return this 3620 3621 if functions is None: 3622 functions = self.FUNCTIONS 3623 3624 function = functions.get(upper) 3625 3626 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3627 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3628 3629 if function and not anonymous: 3630 func = self.validate_expression(function(args), args) 3631 if not self.NORMALIZE_FUNCTIONS: 3632 func.meta["name"] = this 3633 this = func 3634 else: 3635 this = self.expression(exp.Anonymous, this=this, expressions=args) 3636 3637 self._match_r_paren(this) 3638 return self._parse_window(this) 3639 3640 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3641 return self._parse_column_def(self._parse_id_var()) 3642 3643 def _parse_user_defined_function( 3644 self, kind: t.Optional[TokenType] = None 3645 ) -> t.Optional[exp.Expression]: 3646 this = self._parse_id_var() 3647 3648 while self._match(TokenType.DOT): 3649 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3650 3651 if not self._match(TokenType.L_PAREN): 3652 return this 3653 3654 expressions = self._parse_csv(self._parse_function_parameter) 3655 self._match_r_paren() 3656 return self.expression( 3657 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3658 ) 3659 3660 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3661 literal = self._parse_primary() 3662 if literal: 3663 return self.expression(exp.Introducer, this=token.text, expression=literal) 3664 3665 return self.expression(exp.Identifier, this=token.text) 3666 3667 def _parse_session_parameter(self) -> exp.SessionParameter: 3668 kind = None 3669 this = self._parse_id_var() or self._parse_primary() 3670 3671 if this and self._match(TokenType.DOT): 3672 kind = this.name 3673 this = self._parse_var() or self._parse_primary() 3674 3675 return self.expression(exp.SessionParameter, this=this, kind=kind) 3676 3677 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3678 index = self._index 3679 3680 if self._match(TokenType.L_PAREN): 3681 expressions = t.cast( 3682 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3683 ) 3684 3685 if not self._match(TokenType.R_PAREN): 3686 self._retreat(index) 3687 else: 3688 expressions = [self._parse_id_var()] 3689 3690 if self._match_set(self.LAMBDAS): 3691 return self.LAMBDAS[self._prev.token_type](self, expressions) 3692 3693 self._retreat(index) 3694 3695 this: t.Optional[exp.Expression] 3696 3697 if self._match(TokenType.DISTINCT): 3698 this = self.expression( 3699 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3700 ) 3701 else: 3702 this = self._parse_select_or_expression(alias=alias) 3703 3704 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3705 3706 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3707 index = self._index 3708 3709 if not self.errors: 3710 try: 3711 if self._parse_select(nested=True): 3712 return this 3713 except ParseError: 3714 pass 3715 finally: 3716 self.errors.clear() 3717 self._retreat(index) 3718 3719 if not self._match(TokenType.L_PAREN): 3720 return this 3721 3722 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3723 3724 self._match_r_paren() 3725 return self.expression(exp.Schema, this=this, expressions=args) 3726 3727 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3728 return self._parse_column_def(self._parse_field(any_token=True)) 3729 3730 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3731 # column defs are not really columns, they're identifiers 3732 if isinstance(this, exp.Column): 3733 this = this.this 3734 3735 kind = self._parse_types(schema=True) 3736 3737 if self._match_text_seq("FOR", "ORDINALITY"): 3738 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3739 3740 constraints: t.List[exp.Expression] = [] 3741 3742 if not kind and self._match(TokenType.ALIAS): 3743 constraints.append( 3744 self.expression( 3745 exp.ComputedColumnConstraint, 3746 this=self._parse_conjunction(), 3747 persisted=self._match_text_seq("PERSISTED"), 3748 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3749 ) 3750 ) 3751 3752 while True: 3753 constraint = self._parse_column_constraint() 3754 if not constraint: 3755 break 3756 constraints.append(constraint) 3757 3758 if not kind and not constraints: 3759 return this 3760 3761 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3762 3763 def _parse_auto_increment( 3764 self, 3765 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3766 start = None 3767 increment = None 3768 3769 if self._match(TokenType.L_PAREN, advance=False): 3770 args = self._parse_wrapped_csv(self._parse_bitwise) 3771 start = seq_get(args, 0) 3772 increment = seq_get(args, 1) 3773 elif self._match_text_seq("START"): 3774 start = self._parse_bitwise() 3775 self._match_text_seq("INCREMENT") 3776 increment = self._parse_bitwise() 3777 3778 if start and increment: 3779 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3780 3781 return exp.AutoIncrementColumnConstraint() 3782 3783 def _parse_compress(self) -> exp.CompressColumnConstraint: 3784 if self._match(TokenType.L_PAREN, advance=False): 3785 return self.expression( 3786 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3787 ) 3788 3789 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3790 3791 def _parse_generated_as_identity( 3792 self, 3793 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3794 if self._match_text_seq("BY", "DEFAULT"): 3795 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3796 this = self.expression( 3797 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3798 ) 3799 else: 3800 self._match_text_seq("ALWAYS") 3801 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3802 3803 self._match(TokenType.ALIAS) 3804 identity = self._match_text_seq("IDENTITY") 3805 3806 if self._match(TokenType.L_PAREN): 3807 if self._match(TokenType.START_WITH): 3808 this.set("start", self._parse_bitwise()) 3809 if self._match_text_seq("INCREMENT", "BY"): 3810 this.set("increment", self._parse_bitwise()) 3811 if self._match_text_seq("MINVALUE"): 3812 this.set("minvalue", self._parse_bitwise()) 3813 if self._match_text_seq("MAXVALUE"): 3814 this.set("maxvalue", self._parse_bitwise()) 3815 3816 if self._match_text_seq("CYCLE"): 3817 this.set("cycle", True) 3818 elif self._match_text_seq("NO", "CYCLE"): 3819 this.set("cycle", False) 3820 3821 if not identity: 3822 this.set("expression", self._parse_bitwise()) 3823 3824 self._match_r_paren() 3825 3826 return this 3827 3828 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3829 self._match_text_seq("LENGTH") 3830 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3831 3832 def _parse_not_constraint( 3833 self, 3834 ) -> t.Optional[exp.Expression]: 3835 if self._match_text_seq("NULL"): 3836 return self.expression(exp.NotNullColumnConstraint) 3837 if self._match_text_seq("CASESPECIFIC"): 3838 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3839 if self._match_text_seq("FOR", "REPLICATION"): 3840 return self.expression(exp.NotForReplicationColumnConstraint) 3841 return None 3842 3843 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3844 if self._match(TokenType.CONSTRAINT): 3845 this = self._parse_id_var() 3846 else: 3847 this = None 3848 3849 if self._match_texts(self.CONSTRAINT_PARSERS): 3850 return self.expression( 3851 exp.ColumnConstraint, 3852 this=this, 3853 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3854 ) 3855 3856 return this 3857 3858 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3859 if not self._match(TokenType.CONSTRAINT): 3860 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3861 3862 this = self._parse_id_var() 3863 expressions = [] 3864 3865 while True: 3866 constraint = self._parse_unnamed_constraint() or self._parse_function() 3867 if not constraint: 3868 break 3869 expressions.append(constraint) 3870 3871 return self.expression(exp.Constraint, this=this, expressions=expressions) 3872 3873 def _parse_unnamed_constraint( 3874 self, constraints: t.Optional[t.Collection[str]] = None 3875 ) -> t.Optional[exp.Expression]: 3876 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3877 return None 3878 3879 constraint = self._prev.text.upper() 3880 if constraint not in self.CONSTRAINT_PARSERS: 3881 self.raise_error(f"No parser found for schema constraint {constraint}.") 3882 3883 return self.CONSTRAINT_PARSERS[constraint](self) 3884 3885 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3886 self._match_text_seq("KEY") 3887 return self.expression( 3888 exp.UniqueColumnConstraint, 3889 this=self._parse_schema(self._parse_id_var(any_token=False)), 3890 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3891 ) 3892 3893 def _parse_key_constraint_options(self) -> t.List[str]: 3894 options = [] 3895 while True: 3896 if not self._curr: 3897 break 3898 3899 if self._match(TokenType.ON): 3900 action = None 3901 on = self._advance_any() and self._prev.text 3902 3903 if self._match_text_seq("NO", "ACTION"): 3904 action = "NO ACTION" 3905 elif self._match_text_seq("CASCADE"): 3906 action = "CASCADE" 3907 elif self._match_text_seq("RESTRICT"): 3908 action = "RESTRICT" 3909 elif self._match_pair(TokenType.SET, TokenType.NULL): 3910 action = "SET NULL" 3911 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3912 action = "SET DEFAULT" 3913 else: 3914 self.raise_error("Invalid key constraint") 3915 3916 options.append(f"ON {on} {action}") 3917 elif self._match_text_seq("NOT", "ENFORCED"): 3918 options.append("NOT ENFORCED") 3919 elif self._match_text_seq("DEFERRABLE"): 3920 options.append("DEFERRABLE") 3921 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3922 options.append("INITIALLY DEFERRED") 3923 elif self._match_text_seq("NORELY"): 3924 options.append("NORELY") 3925 elif self._match_text_seq("MATCH", "FULL"): 3926 options.append("MATCH FULL") 3927 else: 3928 break 3929 3930 return options 3931 3932 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3933 if match and not self._match(TokenType.REFERENCES): 3934 return None 3935 3936 expressions = None 3937 this = self._parse_table(schema=True) 3938 options = self._parse_key_constraint_options() 3939 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3940 3941 def _parse_foreign_key(self) -> exp.ForeignKey: 3942 expressions = self._parse_wrapped_id_vars() 3943 reference = self._parse_references() 3944 options = {} 3945 3946 while self._match(TokenType.ON): 3947 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3948 self.raise_error("Expected DELETE or UPDATE") 3949 3950 kind = self._prev.text.lower() 3951 3952 if self._match_text_seq("NO", "ACTION"): 3953 action = "NO ACTION" 3954 elif self._match(TokenType.SET): 3955 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3956 action = "SET " + self._prev.text.upper() 3957 else: 3958 self._advance() 3959 action = self._prev.text.upper() 3960 3961 options[kind] = action 3962 3963 return self.expression( 3964 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3965 ) 3966 3967 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3968 return self._parse_field() 3969 3970 def _parse_primary_key( 3971 self, wrapped_optional: bool = False, in_props: bool = False 3972 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3973 desc = ( 3974 self._match_set((TokenType.ASC, TokenType.DESC)) 3975 and self._prev.token_type == TokenType.DESC 3976 ) 3977 3978 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3979 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3980 3981 expressions = self._parse_wrapped_csv( 3982 self._parse_primary_key_part, optional=wrapped_optional 3983 ) 3984 options = self._parse_key_constraint_options() 3985 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3986 3987 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3988 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3989 return this 3990 3991 bracket_kind = self._prev.token_type 3992 3993 if self._match(TokenType.COLON): 3994 expressions: t.List[exp.Expression] = [ 3995 self.expression(exp.Slice, expression=self._parse_conjunction()) 3996 ] 3997 else: 3998 expressions = self._parse_csv( 3999 lambda: self._parse_slice( 4000 self._parse_alias(self._parse_conjunction(), explicit=True) 4001 ) 4002 ) 4003 4004 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4005 if bracket_kind == TokenType.L_BRACE: 4006 this = self.expression(exp.Struct, expressions=expressions) 4007 elif not this or this.name.upper() == "ARRAY": 4008 this = self.expression(exp.Array, expressions=expressions) 4009 else: 4010 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4011 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4012 4013 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4014 self.raise_error("Expected ]") 4015 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4016 self.raise_error("Expected }") 4017 4018 self._add_comments(this) 4019 return self._parse_bracket(this) 4020 4021 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4022 if self._match(TokenType.COLON): 4023 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4024 return this 4025 4026 def _parse_case(self) -> t.Optional[exp.Expression]: 4027 ifs = [] 4028 default = None 4029 4030 comments = self._prev_comments 4031 expression = self._parse_conjunction() 4032 4033 while self._match(TokenType.WHEN): 4034 this = self._parse_conjunction() 4035 self._match(TokenType.THEN) 4036 then = self._parse_conjunction() 4037 ifs.append(self.expression(exp.If, this=this, true=then)) 4038 4039 if self._match(TokenType.ELSE): 4040 default = self._parse_conjunction() 4041 4042 if not self._match(TokenType.END): 4043 self.raise_error("Expected END after CASE", self._prev) 4044 4045 return self._parse_window( 4046 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4047 ) 4048 4049 def _parse_if(self) -> t.Optional[exp.Expression]: 4050 if self._match(TokenType.L_PAREN): 4051 args = self._parse_csv(self._parse_conjunction) 4052 this = self.validate_expression(exp.If.from_arg_list(args), args) 4053 self._match_r_paren() 4054 else: 4055 index = self._index - 1 4056 condition = self._parse_conjunction() 4057 4058 if not condition: 4059 self._retreat(index) 4060 return None 4061 4062 self._match(TokenType.THEN) 4063 true = self._parse_conjunction() 4064 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4065 self._match(TokenType.END) 4066 this = self.expression(exp.If, this=condition, true=true, false=false) 4067 4068 return self._parse_window(this) 4069 4070 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4071 if not self._match_text_seq("VALUE", "FOR"): 4072 self._retreat(self._index - 1) 4073 return None 4074 4075 return self.expression( 4076 exp.NextValueFor, 4077 this=self._parse_column(), 4078 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4079 ) 4080 4081 def _parse_extract(self) -> exp.Extract: 4082 this = self._parse_function() or self._parse_var() or self._parse_type() 4083 4084 if self._match(TokenType.FROM): 4085 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4086 4087 if not self._match(TokenType.COMMA): 4088 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4089 4090 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4091 4092 def _parse_any_value(self) -> exp.AnyValue: 4093 this = self._parse_lambda() 4094 is_max = None 4095 having = None 4096 4097 if self._match(TokenType.HAVING): 4098 self._match_texts(("MAX", "MIN")) 4099 is_max = self._prev.text == "MAX" 4100 having = self._parse_column() 4101 4102 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4103 4104 def _parse_cast(self, strict: bool) -> exp.Expression: 4105 this = self._parse_conjunction() 4106 4107 if not self._match(TokenType.ALIAS): 4108 if self._match(TokenType.COMMA): 4109 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4110 4111 self.raise_error("Expected AS after CAST") 4112 4113 fmt = None 4114 to = self._parse_types() 4115 4116 if not to: 4117 self.raise_error("Expected TYPE after CAST") 4118 elif isinstance(to, exp.Identifier): 4119 to = exp.DataType.build(to.name, udt=True) 4120 elif to.this == exp.DataType.Type.CHAR: 4121 if self._match(TokenType.CHARACTER_SET): 4122 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4123 elif self._match(TokenType.FORMAT): 4124 fmt_string = self._parse_string() 4125 fmt = self._parse_at_time_zone(fmt_string) 4126 4127 if to.this in exp.DataType.TEMPORAL_TYPES: 4128 this = self.expression( 4129 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4130 this=this, 4131 format=exp.Literal.string( 4132 format_time( 4133 fmt_string.this if fmt_string else "", 4134 self.FORMAT_MAPPING or self.TIME_MAPPING, 4135 self.FORMAT_TRIE or self.TIME_TRIE, 4136 ) 4137 ), 4138 ) 4139 4140 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4141 this.set("zone", fmt.args["zone"]) 4142 4143 return this 4144 4145 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4146 4147 def _parse_concat(self) -> t.Optional[exp.Expression]: 4148 args = self._parse_csv(self._parse_conjunction) 4149 if self.CONCAT_NULL_OUTPUTS_STRING: 4150 args = self._ensure_string_if_null(args) 4151 4152 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4153 # we find such a call we replace it with its argument. 4154 if len(args) == 1: 4155 return args[0] 4156 4157 return self.expression( 4158 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4159 ) 4160 4161 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4162 args = self._parse_csv(self._parse_conjunction) 4163 if len(args) < 2: 4164 return self.expression(exp.ConcatWs, expressions=args) 4165 delim, *values = args 4166 if self.CONCAT_NULL_OUTPUTS_STRING: 4167 values = self._ensure_string_if_null(values) 4168 4169 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4170 4171 def _parse_string_agg(self) -> exp.Expression: 4172 if self._match(TokenType.DISTINCT): 4173 args: t.List[t.Optional[exp.Expression]] = [ 4174 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4175 ] 4176 if self._match(TokenType.COMMA): 4177 args.extend(self._parse_csv(self._parse_conjunction)) 4178 else: 4179 args = self._parse_csv(self._parse_conjunction) # type: ignore 4180 4181 index = self._index 4182 if not self._match(TokenType.R_PAREN) and args: 4183 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4184 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4185 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4186 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4187 4188 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4189 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4190 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4191 if not self._match_text_seq("WITHIN", "GROUP"): 4192 self._retreat(index) 4193 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4194 4195 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4196 order = self._parse_order(this=seq_get(args, 0)) 4197 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4198 4199 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4200 this = self._parse_bitwise() 4201 4202 if self._match(TokenType.USING): 4203 to: t.Optional[exp.Expression] = self.expression( 4204 exp.CharacterSet, this=self._parse_var() 4205 ) 4206 elif self._match(TokenType.COMMA): 4207 to = self._parse_types() 4208 else: 4209 to = None 4210 4211 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4212 4213 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4214 """ 4215 There are generally two variants of the DECODE function: 4216 4217 - DECODE(bin, charset) 4218 - DECODE(expression, search, result [, search, result] ... [, default]) 4219 4220 The second variant will always be parsed into a CASE expression. Note that NULL 4221 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4222 instead of relying on pattern matching. 4223 """ 4224 args = self._parse_csv(self._parse_conjunction) 4225 4226 if len(args) < 3: 4227 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4228 4229 expression, *expressions = args 4230 if not expression: 4231 return None 4232 4233 ifs = [] 4234 for search, result in zip(expressions[::2], expressions[1::2]): 4235 if not search or not result: 4236 return None 4237 4238 if isinstance(search, exp.Literal): 4239 ifs.append( 4240 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4241 ) 4242 elif isinstance(search, exp.Null): 4243 ifs.append( 4244 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4245 ) 4246 else: 4247 cond = exp.or_( 4248 exp.EQ(this=expression.copy(), expression=search), 4249 exp.and_( 4250 exp.Is(this=expression.copy(), expression=exp.Null()), 4251 exp.Is(this=search.copy(), expression=exp.Null()), 4252 copy=False, 4253 ), 4254 copy=False, 4255 ) 4256 ifs.append(exp.If(this=cond, true=result)) 4257 4258 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4259 4260 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4261 self._match_text_seq("KEY") 4262 key = self._parse_column() 4263 self._match_set((TokenType.COLON, TokenType.COMMA)) 4264 self._match_text_seq("VALUE") 4265 value = self._parse_bitwise() 4266 4267 if not key and not value: 4268 return None 4269 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4270 4271 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4272 if not this or not self._match_text_seq("FORMAT", "JSON"): 4273 return this 4274 4275 return self.expression(exp.FormatJson, this=this) 4276 4277 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4278 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4279 for value in values: 4280 if self._match_text_seq(value, "ON", on): 4281 return f"{value} ON {on}" 4282 4283 return None 4284 4285 def _parse_json_object(self) -> exp.JSONObject: 4286 star = self._parse_star() 4287 expressions = ( 4288 [star] 4289 if star 4290 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4291 ) 4292 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4293 4294 unique_keys = None 4295 if self._match_text_seq("WITH", "UNIQUE"): 4296 unique_keys = True 4297 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4298 unique_keys = False 4299 4300 self._match_text_seq("KEYS") 4301 4302 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4303 self._parse_type() 4304 ) 4305 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4306 4307 return self.expression( 4308 exp.JSONObject, 4309 expressions=expressions, 4310 null_handling=null_handling, 4311 unique_keys=unique_keys, 4312 return_type=return_type, 4313 encoding=encoding, 4314 ) 4315 4316 def _parse_logarithm(self) -> exp.Func: 4317 # Default argument order is base, expression 4318 args = self._parse_csv(self._parse_range) 4319 4320 if len(args) > 1: 4321 if not self.LOG_BASE_FIRST: 4322 args.reverse() 4323 return exp.Log.from_arg_list(args) 4324 4325 return self.expression( 4326 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4327 ) 4328 4329 def _parse_match_against(self) -> exp.MatchAgainst: 4330 expressions = self._parse_csv(self._parse_column) 4331 4332 self._match_text_seq(")", "AGAINST", "(") 4333 4334 this = self._parse_string() 4335 4336 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4337 modifier = "IN NATURAL LANGUAGE MODE" 4338 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4339 modifier = f"{modifier} WITH QUERY EXPANSION" 4340 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4341 modifier = "IN BOOLEAN MODE" 4342 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4343 modifier = "WITH QUERY EXPANSION" 4344 else: 4345 modifier = None 4346 4347 return self.expression( 4348 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4349 ) 4350 4351 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4352 def _parse_open_json(self) -> exp.OpenJSON: 4353 this = self._parse_bitwise() 4354 path = self._match(TokenType.COMMA) and self._parse_string() 4355 4356 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4357 this = self._parse_field(any_token=True) 4358 kind = self._parse_types() 4359 path = self._parse_string() 4360 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4361 4362 return self.expression( 4363 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4364 ) 4365 4366 expressions = None 4367 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4368 self._match_l_paren() 4369 expressions = self._parse_csv(_parse_open_json_column_def) 4370 4371 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4372 4373 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4374 args = self._parse_csv(self._parse_bitwise) 4375 4376 if self._match(TokenType.IN): 4377 return self.expression( 4378 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4379 ) 4380 4381 if haystack_first: 4382 haystack = seq_get(args, 0) 4383 needle = seq_get(args, 1) 4384 else: 4385 needle = seq_get(args, 0) 4386 haystack = seq_get(args, 1) 4387 4388 return self.expression( 4389 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4390 ) 4391 4392 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4393 args = self._parse_csv(self._parse_table) 4394 return exp.JoinHint(this=func_name.upper(), expressions=args) 4395 4396 def _parse_substring(self) -> exp.Substring: 4397 # Postgres supports the form: substring(string [from int] [for int]) 4398 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4399 4400 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4401 4402 if self._match(TokenType.FROM): 4403 args.append(self._parse_bitwise()) 4404 if self._match(TokenType.FOR): 4405 args.append(self._parse_bitwise()) 4406 4407 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4408 4409 def _parse_trim(self) -> exp.Trim: 4410 # https://www.w3resource.com/sql/character-functions/trim.php 4411 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4412 4413 position = None 4414 collation = None 4415 4416 if self._match_texts(self.TRIM_TYPES): 4417 position = self._prev.text.upper() 4418 4419 expression = self._parse_bitwise() 4420 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4421 this = self._parse_bitwise() 4422 else: 4423 this = expression 4424 expression = None 4425 4426 if self._match(TokenType.COLLATE): 4427 collation = self._parse_bitwise() 4428 4429 return self.expression( 4430 exp.Trim, this=this, position=position, expression=expression, collation=collation 4431 ) 4432 4433 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4434 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4435 4436 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4437 return self._parse_window(self._parse_id_var(), alias=True) 4438 4439 def _parse_respect_or_ignore_nulls( 4440 self, this: t.Optional[exp.Expression] 4441 ) -> t.Optional[exp.Expression]: 4442 if self._match_text_seq("IGNORE", "NULLS"): 4443 return self.expression(exp.IgnoreNulls, this=this) 4444 if self._match_text_seq("RESPECT", "NULLS"): 4445 return self.expression(exp.RespectNulls, this=this) 4446 return this 4447 4448 def _parse_window( 4449 self, this: t.Optional[exp.Expression], alias: bool = False 4450 ) -> t.Optional[exp.Expression]: 4451 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4452 self._match(TokenType.WHERE) 4453 this = self.expression( 4454 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4455 ) 4456 self._match_r_paren() 4457 4458 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4459 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4460 if self._match_text_seq("WITHIN", "GROUP"): 4461 order = self._parse_wrapped(self._parse_order) 4462 this = self.expression(exp.WithinGroup, this=this, expression=order) 4463 4464 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4465 # Some dialects choose to implement and some do not. 4466 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4467 4468 # There is some code above in _parse_lambda that handles 4469 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4470 4471 # The below changes handle 4472 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4473 4474 # Oracle allows both formats 4475 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4476 # and Snowflake chose to do the same for familiarity 4477 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4478 this = self._parse_respect_or_ignore_nulls(this) 4479 4480 # bigquery select from window x AS (partition by ...) 4481 if alias: 4482 over = None 4483 self._match(TokenType.ALIAS) 4484 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4485 return this 4486 else: 4487 over = self._prev.text.upper() 4488 4489 if not self._match(TokenType.L_PAREN): 4490 return self.expression( 4491 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4492 ) 4493 4494 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4495 4496 first = self._match(TokenType.FIRST) 4497 if self._match_text_seq("LAST"): 4498 first = False 4499 4500 partition, order = self._parse_partition_and_order() 4501 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4502 4503 if kind: 4504 self._match(TokenType.BETWEEN) 4505 start = self._parse_window_spec() 4506 self._match(TokenType.AND) 4507 end = self._parse_window_spec() 4508 4509 spec = self.expression( 4510 exp.WindowSpec, 4511 kind=kind, 4512 start=start["value"], 4513 start_side=start["side"], 4514 end=end["value"], 4515 end_side=end["side"], 4516 ) 4517 else: 4518 spec = None 4519 4520 self._match_r_paren() 4521 4522 window = self.expression( 4523 exp.Window, 4524 this=this, 4525 partition_by=partition, 4526 order=order, 4527 spec=spec, 4528 alias=window_alias, 4529 over=over, 4530 first=first, 4531 ) 4532 4533 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4534 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4535 return self._parse_window(window, alias=alias) 4536 4537 return window 4538 4539 def _parse_partition_and_order( 4540 self, 4541 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4542 return self._parse_partition_by(), self._parse_order() 4543 4544 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4545 self._match(TokenType.BETWEEN) 4546 4547 return { 4548 "value": ( 4549 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4550 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4551 or self._parse_bitwise() 4552 ), 4553 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4554 } 4555 4556 def _parse_alias( 4557 self, this: t.Optional[exp.Expression], explicit: bool = False 4558 ) -> t.Optional[exp.Expression]: 4559 any_token = self._match(TokenType.ALIAS) 4560 4561 if explicit and not any_token: 4562 return this 4563 4564 if self._match(TokenType.L_PAREN): 4565 aliases = self.expression( 4566 exp.Aliases, 4567 this=this, 4568 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4569 ) 4570 self._match_r_paren(aliases) 4571 return aliases 4572 4573 alias = self._parse_id_var(any_token) 4574 4575 if alias: 4576 return self.expression(exp.Alias, this=this, alias=alias) 4577 4578 return this 4579 4580 def _parse_id_var( 4581 self, 4582 any_token: bool = True, 4583 tokens: t.Optional[t.Collection[TokenType]] = None, 4584 ) -> t.Optional[exp.Expression]: 4585 identifier = self._parse_identifier() 4586 4587 if identifier: 4588 return identifier 4589 4590 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4591 quoted = self._prev.token_type == TokenType.STRING 4592 return exp.Identifier(this=self._prev.text, quoted=quoted) 4593 4594 return None 4595 4596 def _parse_string(self) -> t.Optional[exp.Expression]: 4597 if self._match(TokenType.STRING): 4598 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4599 return self._parse_placeholder() 4600 4601 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4602 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4603 4604 def _parse_number(self) -> t.Optional[exp.Expression]: 4605 if self._match(TokenType.NUMBER): 4606 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4607 return self._parse_placeholder() 4608 4609 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4610 if self._match(TokenType.IDENTIFIER): 4611 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4612 return self._parse_placeholder() 4613 4614 def _parse_var( 4615 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4616 ) -> t.Optional[exp.Expression]: 4617 if ( 4618 (any_token and self._advance_any()) 4619 or self._match(TokenType.VAR) 4620 or (self._match_set(tokens) if tokens else False) 4621 ): 4622 return self.expression(exp.Var, this=self._prev.text) 4623 return self._parse_placeholder() 4624 4625 def _advance_any(self) -> t.Optional[Token]: 4626 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4627 self._advance() 4628 return self._prev 4629 return None 4630 4631 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4632 return self._parse_var() or self._parse_string() 4633 4634 def _parse_null(self) -> t.Optional[exp.Expression]: 4635 if self._match_set(self.NULL_TOKENS): 4636 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4637 return self._parse_placeholder() 4638 4639 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4640 if self._match(TokenType.TRUE): 4641 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4642 if self._match(TokenType.FALSE): 4643 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4644 return self._parse_placeholder() 4645 4646 def _parse_star(self) -> t.Optional[exp.Expression]: 4647 if self._match(TokenType.STAR): 4648 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4649 return self._parse_placeholder() 4650 4651 def _parse_parameter(self) -> exp.Parameter: 4652 wrapped = self._match(TokenType.L_BRACE) 4653 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4654 self._match(TokenType.R_BRACE) 4655 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4656 4657 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4658 if self._match_set(self.PLACEHOLDER_PARSERS): 4659 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4660 if placeholder: 4661 return placeholder 4662 self._advance(-1) 4663 return None 4664 4665 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4666 if not self._match(TokenType.EXCEPT): 4667 return None 4668 if self._match(TokenType.L_PAREN, advance=False): 4669 return self._parse_wrapped_csv(self._parse_column) 4670 4671 except_column = self._parse_column() 4672 return [except_column] if except_column else None 4673 4674 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4675 if not self._match(TokenType.REPLACE): 4676 return None 4677 if self._match(TokenType.L_PAREN, advance=False): 4678 return self._parse_wrapped_csv(self._parse_expression) 4679 4680 replace_expression = self._parse_expression() 4681 return [replace_expression] if replace_expression else None 4682 4683 def _parse_csv( 4684 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4685 ) -> t.List[exp.Expression]: 4686 parse_result = parse_method() 4687 items = [parse_result] if parse_result is not None else [] 4688 4689 while self._match(sep): 4690 self._add_comments(parse_result) 4691 parse_result = parse_method() 4692 if parse_result is not None: 4693 items.append(parse_result) 4694 4695 return items 4696 4697 def _parse_tokens( 4698 self, parse_method: t.Callable, expressions: t.Dict 4699 ) -> t.Optional[exp.Expression]: 4700 this = parse_method() 4701 4702 while self._match_set(expressions): 4703 this = self.expression( 4704 expressions[self._prev.token_type], 4705 this=this, 4706 comments=self._prev_comments, 4707 expression=parse_method(), 4708 ) 4709 4710 return this 4711 4712 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4713 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4714 4715 def _parse_wrapped_csv( 4716 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4717 ) -> t.List[exp.Expression]: 4718 return self._parse_wrapped( 4719 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4720 ) 4721 4722 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4723 wrapped = self._match(TokenType.L_PAREN) 4724 if not wrapped and not optional: 4725 self.raise_error("Expecting (") 4726 parse_result = parse_method() 4727 if wrapped: 4728 self._match_r_paren() 4729 return parse_result 4730 4731 def _parse_expressions(self) -> t.List[exp.Expression]: 4732 return self._parse_csv(self._parse_expression) 4733 4734 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4735 return self._parse_select() or self._parse_set_operations( 4736 self._parse_expression() if alias else self._parse_conjunction() 4737 ) 4738 4739 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4740 return self._parse_query_modifiers( 4741 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4742 ) 4743 4744 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4745 this = None 4746 if self._match_texts(self.TRANSACTION_KIND): 4747 this = self._prev.text 4748 4749 self._match_texts({"TRANSACTION", "WORK"}) 4750 4751 modes = [] 4752 while True: 4753 mode = [] 4754 while self._match(TokenType.VAR): 4755 mode.append(self._prev.text) 4756 4757 if mode: 4758 modes.append(" ".join(mode)) 4759 if not self._match(TokenType.COMMA): 4760 break 4761 4762 return self.expression(exp.Transaction, this=this, modes=modes) 4763 4764 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4765 chain = None 4766 savepoint = None 4767 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4768 4769 self._match_texts({"TRANSACTION", "WORK"}) 4770 4771 if self._match_text_seq("TO"): 4772 self._match_text_seq("SAVEPOINT") 4773 savepoint = self._parse_id_var() 4774 4775 if self._match(TokenType.AND): 4776 chain = not self._match_text_seq("NO") 4777 self._match_text_seq("CHAIN") 4778 4779 if is_rollback: 4780 return self.expression(exp.Rollback, savepoint=savepoint) 4781 4782 return self.expression(exp.Commit, chain=chain) 4783 4784 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4785 if not self._match_text_seq("ADD"): 4786 return None 4787 4788 self._match(TokenType.COLUMN) 4789 exists_column = self._parse_exists(not_=True) 4790 expression = self._parse_field_def() 4791 4792 if expression: 4793 expression.set("exists", exists_column) 4794 4795 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4796 if self._match_texts(("FIRST", "AFTER")): 4797 position = self._prev.text 4798 column_position = self.expression( 4799 exp.ColumnPosition, this=self._parse_column(), position=position 4800 ) 4801 expression.set("position", column_position) 4802 4803 return expression 4804 4805 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4806 drop = self._match(TokenType.DROP) and self._parse_drop() 4807 if drop and not isinstance(drop, exp.Command): 4808 drop.set("kind", drop.args.get("kind", "COLUMN")) 4809 return drop 4810 4811 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4812 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4813 return self.expression( 4814 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4815 ) 4816 4817 def _parse_add_constraint(self) -> exp.AddConstraint: 4818 this = None 4819 kind = self._prev.token_type 4820 4821 if kind == TokenType.CONSTRAINT: 4822 this = self._parse_id_var() 4823 4824 if self._match_text_seq("CHECK"): 4825 expression = self._parse_wrapped(self._parse_conjunction) 4826 enforced = self._match_text_seq("ENFORCED") 4827 4828 return self.expression( 4829 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4830 ) 4831 4832 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4833 expression = self._parse_foreign_key() 4834 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4835 expression = self._parse_primary_key() 4836 else: 4837 expression = None 4838 4839 return self.expression(exp.AddConstraint, this=this, expression=expression) 4840 4841 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4842 index = self._index - 1 4843 4844 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4845 return self._parse_csv(self._parse_add_constraint) 4846 4847 self._retreat(index) 4848 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4849 return self._parse_csv(self._parse_field_def) 4850 4851 return self._parse_csv(self._parse_add_column) 4852 4853 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4854 self._match(TokenType.COLUMN) 4855 column = self._parse_field(any_token=True) 4856 4857 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4858 return self.expression(exp.AlterColumn, this=column, drop=True) 4859 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4860 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4861 4862 self._match_text_seq("SET", "DATA") 4863 return self.expression( 4864 exp.AlterColumn, 4865 this=column, 4866 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4867 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4868 using=self._match(TokenType.USING) and self._parse_conjunction(), 4869 ) 4870 4871 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4872 index = self._index - 1 4873 4874 partition_exists = self._parse_exists() 4875 if self._match(TokenType.PARTITION, advance=False): 4876 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4877 4878 self._retreat(index) 4879 return self._parse_csv(self._parse_drop_column) 4880 4881 def _parse_alter_table_rename(self) -> exp.RenameTable: 4882 self._match_text_seq("TO") 4883 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4884 4885 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4886 start = self._prev 4887 4888 if not self._match(TokenType.TABLE): 4889 return self._parse_as_command(start) 4890 4891 exists = self._parse_exists() 4892 only = self._match_text_seq("ONLY") 4893 this = self._parse_table(schema=True) 4894 4895 if self._next: 4896 self._advance() 4897 4898 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4899 if parser: 4900 actions = ensure_list(parser(self)) 4901 4902 if not self._curr: 4903 return self.expression( 4904 exp.AlterTable, 4905 this=this, 4906 exists=exists, 4907 actions=actions, 4908 only=only, 4909 ) 4910 4911 return self._parse_as_command(start) 4912 4913 def _parse_merge(self) -> exp.Merge: 4914 self._match(TokenType.INTO) 4915 target = self._parse_table() 4916 4917 if target and self._match(TokenType.ALIAS, advance=False): 4918 target.set("alias", self._parse_table_alias()) 4919 4920 self._match(TokenType.USING) 4921 using = self._parse_table() 4922 4923 self._match(TokenType.ON) 4924 on = self._parse_conjunction() 4925 4926 whens = [] 4927 while self._match(TokenType.WHEN): 4928 matched = not self._match(TokenType.NOT) 4929 self._match_text_seq("MATCHED") 4930 source = ( 4931 False 4932 if self._match_text_seq("BY", "TARGET") 4933 else self._match_text_seq("BY", "SOURCE") 4934 ) 4935 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4936 4937 self._match(TokenType.THEN) 4938 4939 if self._match(TokenType.INSERT): 4940 _this = self._parse_star() 4941 if _this: 4942 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4943 else: 4944 then = self.expression( 4945 exp.Insert, 4946 this=self._parse_value(), 4947 expression=self._match(TokenType.VALUES) and self._parse_value(), 4948 ) 4949 elif self._match(TokenType.UPDATE): 4950 expressions = self._parse_star() 4951 if expressions: 4952 then = self.expression(exp.Update, expressions=expressions) 4953 else: 4954 then = self.expression( 4955 exp.Update, 4956 expressions=self._match(TokenType.SET) 4957 and self._parse_csv(self._parse_equality), 4958 ) 4959 elif self._match(TokenType.DELETE): 4960 then = self.expression(exp.Var, this=self._prev.text) 4961 else: 4962 then = None 4963 4964 whens.append( 4965 self.expression( 4966 exp.When, 4967 matched=matched, 4968 source=source, 4969 condition=condition, 4970 then=then, 4971 ) 4972 ) 4973 4974 return self.expression( 4975 exp.Merge, 4976 this=target, 4977 using=using, 4978 on=on, 4979 expressions=whens, 4980 ) 4981 4982 def _parse_show(self) -> t.Optional[exp.Expression]: 4983 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4984 if parser: 4985 return parser(self) 4986 return self._parse_as_command(self._prev) 4987 4988 def _parse_set_item_assignment( 4989 self, kind: t.Optional[str] = None 4990 ) -> t.Optional[exp.Expression]: 4991 index = self._index 4992 4993 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4994 return self._parse_set_transaction(global_=kind == "GLOBAL") 4995 4996 left = self._parse_primary() or self._parse_id_var() 4997 assignment_delimiter = self._match_texts(("=", "TO")) 4998 4999 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5000 self._retreat(index) 5001 return None 5002 5003 right = self._parse_statement() or self._parse_id_var() 5004 this = self.expression(exp.EQ, this=left, expression=right) 5005 5006 return self.expression(exp.SetItem, this=this, kind=kind) 5007 5008 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5009 self._match_text_seq("TRANSACTION") 5010 characteristics = self._parse_csv( 5011 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5012 ) 5013 return self.expression( 5014 exp.SetItem, 5015 expressions=characteristics, 5016 kind="TRANSACTION", 5017 **{"global": global_}, # type: ignore 5018 ) 5019 5020 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5021 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5022 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5023 5024 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5025 index = self._index 5026 set_ = self.expression( 5027 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5028 ) 5029 5030 if self._curr: 5031 self._retreat(index) 5032 return self._parse_as_command(self._prev) 5033 5034 return set_ 5035 5036 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5037 for option in options: 5038 if self._match_text_seq(*option.split(" ")): 5039 return exp.var(option) 5040 return None 5041 5042 def _parse_as_command(self, start: Token) -> exp.Command: 5043 while self._curr: 5044 self._advance() 5045 text = self._find_sql(start, self._prev) 5046 size = len(start.text) 5047 return exp.Command(this=text[:size], expression=text[size:]) 5048 5049 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5050 settings = [] 5051 5052 self._match_l_paren() 5053 kind = self._parse_id_var() 5054 5055 if self._match(TokenType.L_PAREN): 5056 while True: 5057 key = self._parse_id_var() 5058 value = self._parse_primary() 5059 5060 if not key and value is None: 5061 break 5062 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5063 self._match(TokenType.R_PAREN) 5064 5065 self._match_r_paren() 5066 5067 return self.expression( 5068 exp.DictProperty, 5069 this=this, 5070 kind=kind.this if kind else None, 5071 settings=settings, 5072 ) 5073 5074 def _parse_dict_range(self, this: str) -> exp.DictRange: 5075 self._match_l_paren() 5076 has_min = self._match_text_seq("MIN") 5077 if has_min: 5078 min = self._parse_var() or self._parse_primary() 5079 self._match_text_seq("MAX") 5080 max = self._parse_var() or self._parse_primary() 5081 else: 5082 max = self._parse_var() or self._parse_primary() 5083 min = exp.Literal.number(0) 5084 self._match_r_paren() 5085 return self.expression(exp.DictRange, this=this, min=min, max=max) 5086 5087 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5088 index = self._index 5089 expression = self._parse_column() 5090 if not self._match(TokenType.IN): 5091 self._retreat(index - 1) 5092 return None 5093 iterator = self._parse_column() 5094 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5095 return self.expression( 5096 exp.Comprehension, 5097 this=this, 5098 expression=expression, 5099 iterator=iterator, 5100 condition=condition, 5101 ) 5102 5103 def _find_parser( 5104 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5105 ) -> t.Optional[t.Callable]: 5106 if not self._curr: 5107 return None 5108 5109 index = self._index 5110 this = [] 5111 while True: 5112 # The current token might be multiple words 5113 curr = self._curr.text.upper() 5114 key = curr.split(" ") 5115 this.append(curr) 5116 5117 self._advance() 5118 result, trie = in_trie(trie, key) 5119 if result == TrieResult.FAILED: 5120 break 5121 5122 if result == TrieResult.EXISTS: 5123 subparser = parsers[" ".join(this)] 5124 return subparser 5125 5126 self._retreat(index) 5127 return None 5128 5129 def _match(self, token_type, advance=True, expression=None): 5130 if not self._curr: 5131 return None 5132 5133 if self._curr.token_type == token_type: 5134 if advance: 5135 self._advance() 5136 self._add_comments(expression) 5137 return True 5138 5139 return None 5140 5141 def _match_set(self, types, advance=True): 5142 if not self._curr: 5143 return None 5144 5145 if self._curr.token_type in types: 5146 if advance: 5147 self._advance() 5148 return True 5149 5150 return None 5151 5152 def _match_pair(self, token_type_a, token_type_b, advance=True): 5153 if not self._curr or not self._next: 5154 return None 5155 5156 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5157 if advance: 5158 self._advance(2) 5159 return True 5160 5161 return None 5162 5163 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5164 if not self._match(TokenType.L_PAREN, expression=expression): 5165 self.raise_error("Expecting (") 5166 5167 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5168 if not self._match(TokenType.R_PAREN, expression=expression): 5169 self.raise_error("Expecting )") 5170 5171 def _match_texts(self, texts, advance=True): 5172 if self._curr and self._curr.text.upper() in texts: 5173 if advance: 5174 self._advance() 5175 return True 5176 return False 5177 5178 def _match_text_seq(self, *texts, advance=True): 5179 index = self._index 5180 for text in texts: 5181 if self._curr and self._curr.text.upper() == text: 5182 self._advance() 5183 else: 5184 self._retreat(index) 5185 return False 5186 5187 if not advance: 5188 self._retreat(index) 5189 5190 return True 5191 5192 @t.overload 5193 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5194 ... 5195 5196 @t.overload 5197 def _replace_columns_with_dots( 5198 self, this: t.Optional[exp.Expression] 5199 ) -> t.Optional[exp.Expression]: 5200 ... 5201 5202 def _replace_columns_with_dots(self, this): 5203 if isinstance(this, exp.Dot): 5204 exp.replace_children(this, self._replace_columns_with_dots) 5205 elif isinstance(this, exp.Column): 5206 exp.replace_children(this, self._replace_columns_with_dots) 5207 table = this.args.get("table") 5208 this = ( 5209 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5210 ) 5211 5212 return this 5213 5214 def _replace_lambda( 5215 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5216 ) -> t.Optional[exp.Expression]: 5217 if not node: 5218 return node 5219 5220 for column in node.find_all(exp.Column): 5221 if column.parts[0].name in lambda_variables: 5222 dot_or_id = column.to_dot() if column.table else column.this 5223 parent = column.parent 5224 5225 while isinstance(parent, exp.Dot): 5226 if not isinstance(parent.parent, exp.Dot): 5227 parent.replace(dot_or_id) 5228 break 5229 parent = parent.parent 5230 else: 5231 if column is node: 5232 node = dot_or_id 5233 else: 5234 column.replace(dot_or_id) 5235 return node 5236 5237 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5238 return [ 5239 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5240 for value in values 5241 if value 5242 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
927 def __init__( 928 self, 929 error_level: t.Optional[ErrorLevel] = None, 930 error_message_context: int = 100, 931 max_errors: int = 3, 932 ): 933 self.error_level = error_level or ErrorLevel.IMMEDIATE 934 self.error_message_context = error_message_context 935 self.max_errors = max_errors 936 self._tokenizer = self.TOKENIZER_CLASS() 937 self.reset()
949 def parse( 950 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 951 ) -> t.List[t.Optional[exp.Expression]]: 952 """ 953 Parses a list of tokens and returns a list of syntax trees, one tree 954 per parsed SQL statement. 955 956 Args: 957 raw_tokens: The list of tokens. 958 sql: The original SQL string, used to produce helpful debug messages. 959 960 Returns: 961 The list of the produced syntax trees. 962 """ 963 return self._parse( 964 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 965 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
967 def parse_into( 968 self, 969 expression_types: exp.IntoType, 970 raw_tokens: t.List[Token], 971 sql: t.Optional[str] = None, 972 ) -> t.List[t.Optional[exp.Expression]]: 973 """ 974 Parses a list of tokens into a given Expression type. If a collection of Expression 975 types is given instead, this method will try to parse the token list into each one 976 of them, stopping at the first for which the parsing succeeds. 977 978 Args: 979 expression_types: The expression type(s) to try and parse the token list into. 980 raw_tokens: The list of tokens. 981 sql: The original SQL string, used to produce helpful debug messages. 982 983 Returns: 984 The target Expression. 985 """ 986 errors = [] 987 for expression_type in ensure_list(expression_types): 988 parser = self.EXPRESSION_PARSERS.get(expression_type) 989 if not parser: 990 raise TypeError(f"No parser registered for {expression_type}") 991 992 try: 993 return self._parse(parser, raw_tokens, sql) 994 except ParseError as e: 995 e.errors[0]["into_expression"] = expression_type 996 errors.append(e) 997 998 raise ParseError( 999 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1000 errors=merge_errors(errors), 1001 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1038 def check_errors(self) -> None: 1039 """Logs or raises any found errors, depending on the chosen error level setting.""" 1040 if self.error_level == ErrorLevel.WARN: 1041 for error in self.errors: 1042 logger.error(str(error)) 1043 elif self.error_level == ErrorLevel.RAISE and self.errors: 1044 raise ParseError( 1045 concat_messages(self.errors, self.max_errors), 1046 errors=merge_errors(self.errors), 1047 )
Logs or raises any found errors, depending on the chosen error level setting.
1049 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1050 """ 1051 Appends an error in the list of recorded errors or raises it, depending on the chosen 1052 error level setting. 1053 """ 1054 token = token or self._curr or self._prev or Token.string("") 1055 start = token.start 1056 end = token.end + 1 1057 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1058 highlight = self.sql[start:end] 1059 end_context = self.sql[end : end + self.error_message_context] 1060 1061 error = ParseError.new( 1062 f"{message}. Line {token.line}, Col: {token.col}.\n" 1063 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1064 description=message, 1065 line=token.line, 1066 col=token.col, 1067 start_context=start_context, 1068 highlight=highlight, 1069 end_context=end_context, 1070 ) 1071 1072 if self.error_level == ErrorLevel.IMMEDIATE: 1073 raise error 1074 1075 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1077 def expression( 1078 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1079 ) -> E: 1080 """ 1081 Creates a new, validated Expression. 1082 1083 Args: 1084 exp_class: The expression class to instantiate. 1085 comments: An optional list of comments to attach to the expression. 1086 kwargs: The arguments to set for the expression along with their respective values. 1087 1088 Returns: 1089 The target expression. 1090 """ 1091 instance = exp_class(**kwargs) 1092 instance.add_comments(comments) if comments else self._add_comments(instance) 1093 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1100 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1101 """ 1102 Validates an Expression, making sure that all its mandatory arguments are set. 1103 1104 Args: 1105 expression: The expression to validate. 1106 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1107 1108 Returns: 1109 The validated expression. 1110 """ 1111 if self.error_level != ErrorLevel.IGNORE: 1112 for error_message in expression.error_messages(args): 1113 self.raise_error(error_message) 1114 1115 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.