sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.UDECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 TokenType.DECIMAL: TokenType.UDECIMAL, 220 } 221 222 SUBQUERY_PREDICATES = { 223 TokenType.ANY: exp.Any, 224 TokenType.ALL: exp.All, 225 TokenType.EXISTS: exp.Exists, 226 TokenType.SOME: exp.Any, 227 } 228 229 RESERVED_KEYWORDS = { 230 *Tokenizer.SINGLE_TOKENS.values(), 231 TokenType.SELECT, 232 } 233 234 DB_CREATABLES = { 235 TokenType.DATABASE, 236 TokenType.SCHEMA, 237 TokenType.TABLE, 238 TokenType.VIEW, 239 TokenType.DICTIONARY, 240 } 241 242 CREATABLES = { 243 TokenType.COLUMN, 244 TokenType.FUNCTION, 245 TokenType.INDEX, 246 TokenType.PROCEDURE, 247 *DB_CREATABLES, 248 } 249 250 # Tokens that can represent identifiers 251 ID_VAR_TOKENS = { 252 TokenType.VAR, 253 TokenType.ANTI, 254 TokenType.APPLY, 255 TokenType.ASC, 256 TokenType.AUTO_INCREMENT, 257 TokenType.BEGIN, 258 TokenType.CACHE, 259 TokenType.CASE, 260 TokenType.COLLATE, 261 TokenType.COMMAND, 262 TokenType.COMMENT, 263 TokenType.COMMIT, 264 TokenType.CONSTRAINT, 265 TokenType.DEFAULT, 266 TokenType.DELETE, 267 TokenType.DESC, 268 TokenType.DESCRIBE, 269 TokenType.DICTIONARY, 270 TokenType.DIV, 271 TokenType.END, 272 TokenType.EXECUTE, 273 TokenType.ESCAPE, 274 TokenType.FALSE, 275 TokenType.FIRST, 276 TokenType.FILTER, 277 TokenType.FORMAT, 278 TokenType.FULL, 279 TokenType.IS, 280 TokenType.ISNULL, 281 TokenType.INTERVAL, 282 TokenType.KEEP, 283 TokenType.KILL, 284 TokenType.LEFT, 285 TokenType.LOAD, 286 TokenType.MERGE, 287 TokenType.NATURAL, 288 TokenType.NEXT, 289 TokenType.OFFSET, 290 TokenType.ORDINALITY, 291 TokenType.OVERLAPS, 292 TokenType.OVERWRITE, 293 TokenType.PARTITION, 294 TokenType.PERCENT, 295 TokenType.PIVOT, 296 TokenType.PRAGMA, 297 TokenType.RANGE, 298 TokenType.REFERENCES, 299 TokenType.RIGHT, 300 TokenType.ROW, 301 TokenType.ROWS, 302 TokenType.SEMI, 303 TokenType.SET, 304 TokenType.SETTINGS, 305 TokenType.SHOW, 306 TokenType.TEMPORARY, 307 TokenType.TOP, 308 TokenType.TRUE, 309 TokenType.UNIQUE, 310 TokenType.UNPIVOT, 311 TokenType.UPDATE, 312 TokenType.VOLATILE, 313 TokenType.WINDOW, 314 *CREATABLES, 315 *SUBQUERY_PREDICATES, 316 *TYPE_TOKENS, 317 *NO_PAREN_FUNCTIONS, 318 } 319 320 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 321 322 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 323 TokenType.ANTI, 324 TokenType.APPLY, 325 TokenType.ASOF, 326 TokenType.FULL, 327 TokenType.LEFT, 328 TokenType.LOCK, 329 TokenType.NATURAL, 330 TokenType.OFFSET, 331 TokenType.RIGHT, 332 TokenType.SEMI, 333 TokenType.WINDOW, 334 } 335 336 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 337 338 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 339 340 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 341 342 FUNC_TOKENS = { 343 TokenType.COLLATE, 344 TokenType.COMMAND, 345 TokenType.CURRENT_DATE, 346 TokenType.CURRENT_DATETIME, 347 TokenType.CURRENT_TIMESTAMP, 348 TokenType.CURRENT_TIME, 349 TokenType.CURRENT_USER, 350 TokenType.FILTER, 351 TokenType.FIRST, 352 TokenType.FORMAT, 353 TokenType.GLOB, 354 TokenType.IDENTIFIER, 355 TokenType.INDEX, 356 TokenType.ISNULL, 357 TokenType.ILIKE, 358 TokenType.INSERT, 359 TokenType.LIKE, 360 TokenType.MERGE, 361 TokenType.OFFSET, 362 TokenType.PRIMARY_KEY, 363 TokenType.RANGE, 364 TokenType.REPLACE, 365 TokenType.RLIKE, 366 TokenType.ROW, 367 TokenType.UNNEST, 368 TokenType.VAR, 369 TokenType.LEFT, 370 TokenType.RIGHT, 371 TokenType.DATE, 372 TokenType.DATETIME, 373 TokenType.TABLE, 374 TokenType.TIMESTAMP, 375 TokenType.TIMESTAMPTZ, 376 TokenType.WINDOW, 377 TokenType.XOR, 378 *TYPE_TOKENS, 379 *SUBQUERY_PREDICATES, 380 } 381 382 CONJUNCTION = { 383 TokenType.AND: exp.And, 384 TokenType.OR: exp.Or, 385 } 386 387 EQUALITY = { 388 TokenType.EQ: exp.EQ, 389 TokenType.NEQ: exp.NEQ, 390 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 391 } 392 393 COMPARISON = { 394 TokenType.GT: exp.GT, 395 TokenType.GTE: exp.GTE, 396 TokenType.LT: exp.LT, 397 TokenType.LTE: exp.LTE, 398 } 399 400 BITWISE = { 401 TokenType.AMP: exp.BitwiseAnd, 402 TokenType.CARET: exp.BitwiseXor, 403 TokenType.PIPE: exp.BitwiseOr, 404 TokenType.DPIPE: exp.DPipe, 405 } 406 407 TERM = { 408 TokenType.DASH: exp.Sub, 409 TokenType.PLUS: exp.Add, 410 TokenType.MOD: exp.Mod, 411 TokenType.COLLATE: exp.Collate, 412 } 413 414 FACTOR = { 415 TokenType.DIV: exp.IntDiv, 416 TokenType.LR_ARROW: exp.Distance, 417 TokenType.SLASH: exp.Div, 418 TokenType.STAR: exp.Mul, 419 } 420 421 TIMES = { 422 TokenType.TIME, 423 TokenType.TIMETZ, 424 } 425 426 TIMESTAMPS = { 427 TokenType.TIMESTAMP, 428 TokenType.TIMESTAMPTZ, 429 TokenType.TIMESTAMPLTZ, 430 *TIMES, 431 } 432 433 SET_OPERATIONS = { 434 TokenType.UNION, 435 TokenType.INTERSECT, 436 TokenType.EXCEPT, 437 } 438 439 JOIN_METHODS = { 440 TokenType.NATURAL, 441 TokenType.ASOF, 442 } 443 444 JOIN_SIDES = { 445 TokenType.LEFT, 446 TokenType.RIGHT, 447 TokenType.FULL, 448 } 449 450 JOIN_KINDS = { 451 TokenType.INNER, 452 TokenType.OUTER, 453 TokenType.CROSS, 454 TokenType.SEMI, 455 TokenType.ANTI, 456 } 457 458 JOIN_HINTS: t.Set[str] = set() 459 460 LAMBDAS = { 461 TokenType.ARROW: lambda self, expressions: self.expression( 462 exp.Lambda, 463 this=self._replace_lambda( 464 self._parse_conjunction(), 465 {node.name for node in expressions}, 466 ), 467 expressions=expressions, 468 ), 469 TokenType.FARROW: lambda self, expressions: self.expression( 470 exp.Kwarg, 471 this=exp.var(expressions[0].name), 472 expression=self._parse_conjunction(), 473 ), 474 } 475 476 COLUMN_OPERATORS = { 477 TokenType.DOT: None, 478 TokenType.DCOLON: lambda self, this, to: self.expression( 479 exp.Cast if self.STRICT_CAST else exp.TryCast, 480 this=this, 481 to=to, 482 ), 483 TokenType.ARROW: lambda self, this, path: self.expression( 484 exp.JSONExtract, 485 this=this, 486 expression=path, 487 ), 488 TokenType.DARROW: lambda self, this, path: self.expression( 489 exp.JSONExtractScalar, 490 this=this, 491 expression=path, 492 ), 493 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 494 exp.JSONBExtract, 495 this=this, 496 expression=path, 497 ), 498 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 499 exp.JSONBExtractScalar, 500 this=this, 501 expression=path, 502 ), 503 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 504 exp.JSONBContains, 505 this=this, 506 expression=key, 507 ), 508 } 509 510 EXPRESSION_PARSERS = { 511 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 512 exp.Column: lambda self: self._parse_column(), 513 exp.Condition: lambda self: self._parse_conjunction(), 514 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 515 exp.Expression: lambda self: self._parse_statement(), 516 exp.From: lambda self: self._parse_from(), 517 exp.Group: lambda self: self._parse_group(), 518 exp.Having: lambda self: self._parse_having(), 519 exp.Identifier: lambda self: self._parse_id_var(), 520 exp.Join: lambda self: self._parse_join(), 521 exp.Lambda: lambda self: self._parse_lambda(), 522 exp.Lateral: lambda self: self._parse_lateral(), 523 exp.Limit: lambda self: self._parse_limit(), 524 exp.Offset: lambda self: self._parse_offset(), 525 exp.Order: lambda self: self._parse_order(), 526 exp.Ordered: lambda self: self._parse_ordered(), 527 exp.Properties: lambda self: self._parse_properties(), 528 exp.Qualify: lambda self: self._parse_qualify(), 529 exp.Returning: lambda self: self._parse_returning(), 530 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 531 exp.Table: lambda self: self._parse_table_parts(), 532 exp.TableAlias: lambda self: self._parse_table_alias(), 533 exp.Where: lambda self: self._parse_where(), 534 exp.Window: lambda self: self._parse_named_window(), 535 exp.With: lambda self: self._parse_with(), 536 "JOIN_TYPE": lambda self: self._parse_join_parts(), 537 } 538 539 STATEMENT_PARSERS = { 540 TokenType.ALTER: lambda self: self._parse_alter(), 541 TokenType.BEGIN: lambda self: self._parse_transaction(), 542 TokenType.CACHE: lambda self: self._parse_cache(), 543 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 544 TokenType.COMMENT: lambda self: self._parse_comment(), 545 TokenType.CREATE: lambda self: self._parse_create(), 546 TokenType.DELETE: lambda self: self._parse_delete(), 547 TokenType.DESC: lambda self: self._parse_describe(), 548 TokenType.DESCRIBE: lambda self: self._parse_describe(), 549 TokenType.DROP: lambda self: self._parse_drop(), 550 TokenType.INSERT: lambda self: self._parse_insert(), 551 TokenType.KILL: lambda self: self._parse_kill(), 552 TokenType.LOAD: lambda self: self._parse_load(), 553 TokenType.MERGE: lambda self: self._parse_merge(), 554 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 555 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 556 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 557 TokenType.SET: lambda self: self._parse_set(), 558 TokenType.UNCACHE: lambda self: self._parse_uncache(), 559 TokenType.UPDATE: lambda self: self._parse_update(), 560 TokenType.USE: lambda self: self.expression( 561 exp.Use, 562 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 563 and exp.var(self._prev.text), 564 this=self._parse_table(schema=False), 565 ), 566 } 567 568 UNARY_PARSERS = { 569 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 570 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 571 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 572 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 573 } 574 575 PRIMARY_PARSERS = { 576 TokenType.STRING: lambda self, token: self.expression( 577 exp.Literal, this=token.text, is_string=True 578 ), 579 TokenType.NUMBER: lambda self, token: self.expression( 580 exp.Literal, this=token.text, is_string=False 581 ), 582 TokenType.STAR: lambda self, _: self.expression( 583 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 584 ), 585 TokenType.NULL: lambda self, _: self.expression(exp.Null), 586 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 587 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 588 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 589 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 590 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 591 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 592 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 593 exp.National, this=token.text 594 ), 595 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 596 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 597 exp.RawString, this=token.text 598 ), 599 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 600 } 601 602 PLACEHOLDER_PARSERS = { 603 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 604 TokenType.PARAMETER: lambda self: self._parse_parameter(), 605 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 606 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 607 else None, 608 } 609 610 RANGE_PARSERS = { 611 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 612 TokenType.GLOB: binary_range_parser(exp.Glob), 613 TokenType.ILIKE: binary_range_parser(exp.ILike), 614 TokenType.IN: lambda self, this: self._parse_in(this), 615 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 616 TokenType.IS: lambda self, this: self._parse_is(this), 617 TokenType.LIKE: binary_range_parser(exp.Like), 618 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 619 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 620 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 621 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 622 } 623 624 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 625 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 626 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 627 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 628 "CHARACTER SET": lambda self: self._parse_character_set(), 629 "CHECKSUM": lambda self: self._parse_checksum(), 630 "CLUSTER BY": lambda self: self._parse_cluster(), 631 "CLUSTERED": lambda self: self._parse_clustered_by(), 632 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 633 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 634 "COPY": lambda self: self._parse_copy_property(), 635 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 636 "DEFINER": lambda self: self._parse_definer(), 637 "DETERMINISTIC": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 639 ), 640 "DISTKEY": lambda self: self._parse_distkey(), 641 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 642 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 643 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 644 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 645 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 646 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "FREESPACE": lambda self: self._parse_freespace(), 648 "HEAP": lambda self: self.expression(exp.HeapProperty), 649 "IMMUTABLE": lambda self: self.expression( 650 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 651 ), 652 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 653 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 654 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 655 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 656 "LIKE": lambda self: self._parse_create_like(), 657 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 658 "LOCK": lambda self: self._parse_locking(), 659 "LOCKING": lambda self: self._parse_locking(), 660 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 661 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 662 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 663 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 664 "NO": lambda self: self._parse_no_property(), 665 "ON": lambda self: self._parse_on_property(), 666 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 667 "PARTITION BY": lambda self: self._parse_partitioned_by(), 668 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 669 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 670 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 671 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 672 "RETURNS": lambda self: self._parse_returns(), 673 "ROW": lambda self: self._parse_row(), 674 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 675 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 676 "SETTINGS": lambda self: self.expression( 677 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 678 ), 679 "SORTKEY": lambda self: self._parse_sortkey(), 680 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 681 "STABLE": lambda self: self.expression( 682 exp.StabilityProperty, this=exp.Literal.string("STABLE") 683 ), 684 "STORED": lambda self: self._parse_stored(), 685 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 686 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 687 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 688 "TO": lambda self: self._parse_to_table(), 689 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 690 "TTL": lambda self: self._parse_ttl(), 691 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 692 "VOLATILE": lambda self: self._parse_volatile_property(), 693 "WITH": lambda self: self._parse_with_property(), 694 } 695 696 CONSTRAINT_PARSERS = { 697 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 698 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 699 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 700 "CHARACTER SET": lambda self: self.expression( 701 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 702 ), 703 "CHECK": lambda self: self.expression( 704 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 705 ), 706 "COLLATE": lambda self: self.expression( 707 exp.CollateColumnConstraint, this=self._parse_var() 708 ), 709 "COMMENT": lambda self: self.expression( 710 exp.CommentColumnConstraint, this=self._parse_string() 711 ), 712 "COMPRESS": lambda self: self._parse_compress(), 713 "CLUSTERED": lambda self: self.expression( 714 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 715 ), 716 "NONCLUSTERED": lambda self: self.expression( 717 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 718 ), 719 "DEFAULT": lambda self: self.expression( 720 exp.DefaultColumnConstraint, this=self._parse_bitwise() 721 ), 722 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 723 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 724 "FORMAT": lambda self: self.expression( 725 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 726 ), 727 "GENERATED": lambda self: self._parse_generated_as_identity(), 728 "IDENTITY": lambda self: self._parse_auto_increment(), 729 "INLINE": lambda self: self._parse_inline(), 730 "LIKE": lambda self: self._parse_create_like(), 731 "NOT": lambda self: self._parse_not_constraint(), 732 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 733 "ON": lambda self: ( 734 self._match(TokenType.UPDATE) 735 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 736 ) 737 or self.expression(exp.OnProperty, this=self._parse_id_var()), 738 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 739 "PRIMARY KEY": lambda self: self._parse_primary_key(), 740 "REFERENCES": lambda self: self._parse_references(match=False), 741 "TITLE": lambda self: self.expression( 742 exp.TitleColumnConstraint, this=self._parse_var_or_string() 743 ), 744 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 745 "UNIQUE": lambda self: self._parse_unique(), 746 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 747 "WITH": lambda self: self.expression( 748 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 749 ), 750 } 751 752 ALTER_PARSERS = { 753 "ADD": lambda self: self._parse_alter_table_add(), 754 "ALTER": lambda self: self._parse_alter_table_alter(), 755 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 756 "DROP": lambda self: self._parse_alter_table_drop(), 757 "RENAME": lambda self: self._parse_alter_table_rename(), 758 } 759 760 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 761 762 NO_PAREN_FUNCTION_PARSERS = { 763 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 764 "CASE": lambda self: self._parse_case(), 765 "IF": lambda self: self._parse_if(), 766 "NEXT": lambda self: self._parse_next_value_for(), 767 } 768 769 INVALID_FUNC_NAME_TOKENS = { 770 TokenType.IDENTIFIER, 771 TokenType.STRING, 772 } 773 774 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 775 776 FUNCTION_PARSERS = { 777 "ANY_VALUE": lambda self: self._parse_any_value(), 778 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 779 "CONCAT": lambda self: self._parse_concat(), 780 "CONCAT_WS": lambda self: self._parse_concat_ws(), 781 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 782 "DECODE": lambda self: self._parse_decode(), 783 "EXTRACT": lambda self: self._parse_extract(), 784 "JSON_OBJECT": lambda self: self._parse_json_object(), 785 "LOG": lambda self: self._parse_logarithm(), 786 "MATCH": lambda self: self._parse_match_against(), 787 "OPENJSON": lambda self: self._parse_open_json(), 788 "POSITION": lambda self: self._parse_position(), 789 "SAFE_CAST": lambda self: self._parse_cast(False), 790 "STRING_AGG": lambda self: self._parse_string_agg(), 791 "SUBSTRING": lambda self: self._parse_substring(), 792 "TRIM": lambda self: self._parse_trim(), 793 "TRY_CAST": lambda self: self._parse_cast(False), 794 "TRY_CONVERT": lambda self: self._parse_convert(False), 795 } 796 797 QUERY_MODIFIER_PARSERS = { 798 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 799 TokenType.WHERE: lambda self: ("where", self._parse_where()), 800 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 801 TokenType.HAVING: lambda self: ("having", self._parse_having()), 802 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 803 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 804 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 805 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 806 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 807 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 808 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 809 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 810 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 811 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 812 TokenType.CLUSTER_BY: lambda self: ( 813 "cluster", 814 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 815 ), 816 TokenType.DISTRIBUTE_BY: lambda self: ( 817 "distribute", 818 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 819 ), 820 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 821 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 822 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 823 } 824 825 SET_PARSERS = { 826 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 827 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 828 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 829 "TRANSACTION": lambda self: self._parse_set_transaction(), 830 } 831 832 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 833 834 TYPE_LITERAL_PARSERS = { 835 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 836 } 837 838 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 839 840 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 841 842 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 843 844 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 845 TRANSACTION_CHARACTERISTICS = { 846 "ISOLATION LEVEL REPEATABLE READ", 847 "ISOLATION LEVEL READ COMMITTED", 848 "ISOLATION LEVEL READ UNCOMMITTED", 849 "ISOLATION LEVEL SERIALIZABLE", 850 "READ WRITE", 851 "READ ONLY", 852 } 853 854 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 855 856 CLONE_KEYWORDS = {"CLONE", "COPY"} 857 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 858 859 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 860 861 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 862 863 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 864 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 865 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 866 867 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 868 869 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 870 871 DISTINCT_TOKENS = {TokenType.DISTINCT} 872 873 NULL_TOKENS = {TokenType.NULL} 874 875 STRICT_CAST = True 876 877 # A NULL arg in CONCAT yields NULL by default 878 CONCAT_NULL_OUTPUTS_STRING = False 879 880 PREFIXED_PIVOT_COLUMNS = False 881 IDENTIFY_PIVOT_STRINGS = False 882 883 LOG_BASE_FIRST = True 884 LOG_DEFAULTS_TO_LN = False 885 886 # Whether or not ADD is present for each column added by ALTER TABLE 887 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 888 889 # Whether or not the table sample clause expects CSV syntax 890 TABLESAMPLE_CSV = False 891 892 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 893 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 894 895 # Whether the TRIM function expects the characters to trim as its first argument 896 TRIM_PATTERN_FIRST = False 897 898 __slots__ = ( 899 "error_level", 900 "error_message_context", 901 "max_errors", 902 "sql", 903 "errors", 904 "_tokens", 905 "_index", 906 "_curr", 907 "_next", 908 "_prev", 909 "_prev_comments", 910 "_tokenizer", 911 ) 912 913 # Autofilled 914 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 915 INDEX_OFFSET: int = 0 916 UNNEST_COLUMN_ONLY: bool = False 917 ALIAS_POST_TABLESAMPLE: bool = False 918 STRICT_STRING_CONCAT = False 919 SUPPORTS_USER_DEFINED_TYPES = True 920 NORMALIZE_FUNCTIONS = "upper" 921 NULL_ORDERING: str = "nulls_are_small" 922 SHOW_TRIE: t.Dict = {} 923 SET_TRIE: t.Dict = {} 924 FORMAT_MAPPING: t.Dict[str, str] = {} 925 FORMAT_TRIE: t.Dict = {} 926 TIME_MAPPING: t.Dict[str, str] = {} 927 TIME_TRIE: t.Dict = {} 928 929 def __init__( 930 self, 931 error_level: t.Optional[ErrorLevel] = None, 932 error_message_context: int = 100, 933 max_errors: int = 3, 934 ): 935 self.error_level = error_level or ErrorLevel.IMMEDIATE 936 self.error_message_context = error_message_context 937 self.max_errors = max_errors 938 self._tokenizer = self.TOKENIZER_CLASS() 939 self.reset() 940 941 def reset(self): 942 self.sql = "" 943 self.errors = [] 944 self._tokens = [] 945 self._index = 0 946 self._curr = None 947 self._next = None 948 self._prev = None 949 self._prev_comments = None 950 951 def parse( 952 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 953 ) -> t.List[t.Optional[exp.Expression]]: 954 """ 955 Parses a list of tokens and returns a list of syntax trees, one tree 956 per parsed SQL statement. 957 958 Args: 959 raw_tokens: The list of tokens. 960 sql: The original SQL string, used to produce helpful debug messages. 961 962 Returns: 963 The list of the produced syntax trees. 964 """ 965 return self._parse( 966 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 967 ) 968 969 def parse_into( 970 self, 971 expression_types: exp.IntoType, 972 raw_tokens: t.List[Token], 973 sql: t.Optional[str] = None, 974 ) -> t.List[t.Optional[exp.Expression]]: 975 """ 976 Parses a list of tokens into a given Expression type. If a collection of Expression 977 types is given instead, this method will try to parse the token list into each one 978 of them, stopping at the first for which the parsing succeeds. 979 980 Args: 981 expression_types: The expression type(s) to try and parse the token list into. 982 raw_tokens: The list of tokens. 983 sql: The original SQL string, used to produce helpful debug messages. 984 985 Returns: 986 The target Expression. 987 """ 988 errors = [] 989 for expression_type in ensure_list(expression_types): 990 parser = self.EXPRESSION_PARSERS.get(expression_type) 991 if not parser: 992 raise TypeError(f"No parser registered for {expression_type}") 993 994 try: 995 return self._parse(parser, raw_tokens, sql) 996 except ParseError as e: 997 e.errors[0]["into_expression"] = expression_type 998 errors.append(e) 999 1000 raise ParseError( 1001 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1002 errors=merge_errors(errors), 1003 ) from errors[-1] 1004 1005 def _parse( 1006 self, 1007 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1008 raw_tokens: t.List[Token], 1009 sql: t.Optional[str] = None, 1010 ) -> t.List[t.Optional[exp.Expression]]: 1011 self.reset() 1012 self.sql = sql or "" 1013 1014 total = len(raw_tokens) 1015 chunks: t.List[t.List[Token]] = [[]] 1016 1017 for i, token in enumerate(raw_tokens): 1018 if token.token_type == TokenType.SEMICOLON: 1019 if i < total - 1: 1020 chunks.append([]) 1021 else: 1022 chunks[-1].append(token) 1023 1024 expressions = [] 1025 1026 for tokens in chunks: 1027 self._index = -1 1028 self._tokens = tokens 1029 self._advance() 1030 1031 expressions.append(parse_method(self)) 1032 1033 if self._index < len(self._tokens): 1034 self.raise_error("Invalid expression / Unexpected token") 1035 1036 self.check_errors() 1037 1038 return expressions 1039 1040 def check_errors(self) -> None: 1041 """Logs or raises any found errors, depending on the chosen error level setting.""" 1042 if self.error_level == ErrorLevel.WARN: 1043 for error in self.errors: 1044 logger.error(str(error)) 1045 elif self.error_level == ErrorLevel.RAISE and self.errors: 1046 raise ParseError( 1047 concat_messages(self.errors, self.max_errors), 1048 errors=merge_errors(self.errors), 1049 ) 1050 1051 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1052 """ 1053 Appends an error in the list of recorded errors or raises it, depending on the chosen 1054 error level setting. 1055 """ 1056 token = token or self._curr or self._prev or Token.string("") 1057 start = token.start 1058 end = token.end + 1 1059 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1060 highlight = self.sql[start:end] 1061 end_context = self.sql[end : end + self.error_message_context] 1062 1063 error = ParseError.new( 1064 f"{message}. Line {token.line}, Col: {token.col}.\n" 1065 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1066 description=message, 1067 line=token.line, 1068 col=token.col, 1069 start_context=start_context, 1070 highlight=highlight, 1071 end_context=end_context, 1072 ) 1073 1074 if self.error_level == ErrorLevel.IMMEDIATE: 1075 raise error 1076 1077 self.errors.append(error) 1078 1079 def expression( 1080 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1081 ) -> E: 1082 """ 1083 Creates a new, validated Expression. 1084 1085 Args: 1086 exp_class: The expression class to instantiate. 1087 comments: An optional list of comments to attach to the expression. 1088 kwargs: The arguments to set for the expression along with their respective values. 1089 1090 Returns: 1091 The target expression. 1092 """ 1093 instance = exp_class(**kwargs) 1094 instance.add_comments(comments) if comments else self._add_comments(instance) 1095 return self.validate_expression(instance) 1096 1097 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1098 if expression and self._prev_comments: 1099 expression.add_comments(self._prev_comments) 1100 self._prev_comments = None 1101 1102 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1103 """ 1104 Validates an Expression, making sure that all its mandatory arguments are set. 1105 1106 Args: 1107 expression: The expression to validate. 1108 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1109 1110 Returns: 1111 The validated expression. 1112 """ 1113 if self.error_level != ErrorLevel.IGNORE: 1114 for error_message in expression.error_messages(args): 1115 self.raise_error(error_message) 1116 1117 return expression 1118 1119 def _find_sql(self, start: Token, end: Token) -> str: 1120 return self.sql[start.start : end.end + 1] 1121 1122 def _advance(self, times: int = 1) -> None: 1123 self._index += times 1124 self._curr = seq_get(self._tokens, self._index) 1125 self._next = seq_get(self._tokens, self._index + 1) 1126 1127 if self._index > 0: 1128 self._prev = self._tokens[self._index - 1] 1129 self._prev_comments = self._prev.comments 1130 else: 1131 self._prev = None 1132 self._prev_comments = None 1133 1134 def _retreat(self, index: int) -> None: 1135 if index != self._index: 1136 self._advance(index - self._index) 1137 1138 def _parse_command(self) -> exp.Command: 1139 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1140 1141 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1142 start = self._prev 1143 exists = self._parse_exists() if allow_exists else None 1144 1145 self._match(TokenType.ON) 1146 1147 kind = self._match_set(self.CREATABLES) and self._prev 1148 if not kind: 1149 return self._parse_as_command(start) 1150 1151 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1152 this = self._parse_user_defined_function(kind=kind.token_type) 1153 elif kind.token_type == TokenType.TABLE: 1154 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1155 elif kind.token_type == TokenType.COLUMN: 1156 this = self._parse_column() 1157 else: 1158 this = self._parse_id_var() 1159 1160 self._match(TokenType.IS) 1161 1162 return self.expression( 1163 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1164 ) 1165 1166 def _parse_to_table( 1167 self, 1168 ) -> exp.ToTableProperty: 1169 table = self._parse_table_parts(schema=True) 1170 return self.expression(exp.ToTableProperty, this=table) 1171 1172 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1173 def _parse_ttl(self) -> exp.Expression: 1174 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1175 this = self._parse_bitwise() 1176 1177 if self._match_text_seq("DELETE"): 1178 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1179 if self._match_text_seq("RECOMPRESS"): 1180 return self.expression( 1181 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1182 ) 1183 if self._match_text_seq("TO", "DISK"): 1184 return self.expression( 1185 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1186 ) 1187 if self._match_text_seq("TO", "VOLUME"): 1188 return self.expression( 1189 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1190 ) 1191 1192 return this 1193 1194 expressions = self._parse_csv(_parse_ttl_action) 1195 where = self._parse_where() 1196 group = self._parse_group() 1197 1198 aggregates = None 1199 if group and self._match(TokenType.SET): 1200 aggregates = self._parse_csv(self._parse_set_item) 1201 1202 return self.expression( 1203 exp.MergeTreeTTL, 1204 expressions=expressions, 1205 where=where, 1206 group=group, 1207 aggregates=aggregates, 1208 ) 1209 1210 def _parse_statement(self) -> t.Optional[exp.Expression]: 1211 if self._curr is None: 1212 return None 1213 1214 if self._match_set(self.STATEMENT_PARSERS): 1215 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1216 1217 if self._match_set(Tokenizer.COMMANDS): 1218 return self._parse_command() 1219 1220 expression = self._parse_expression() 1221 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1222 return self._parse_query_modifiers(expression) 1223 1224 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1225 start = self._prev 1226 temporary = self._match(TokenType.TEMPORARY) 1227 materialized = self._match_text_seq("MATERIALIZED") 1228 1229 kind = self._match_set(self.CREATABLES) and self._prev.text 1230 if not kind: 1231 return self._parse_as_command(start) 1232 1233 return self.expression( 1234 exp.Drop, 1235 comments=start.comments, 1236 exists=exists or self._parse_exists(), 1237 this=self._parse_table(schema=True), 1238 kind=kind, 1239 temporary=temporary, 1240 materialized=materialized, 1241 cascade=self._match_text_seq("CASCADE"), 1242 constraints=self._match_text_seq("CONSTRAINTS"), 1243 purge=self._match_text_seq("PURGE"), 1244 ) 1245 1246 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1247 return ( 1248 self._match_text_seq("IF") 1249 and (not not_ or self._match(TokenType.NOT)) 1250 and self._match(TokenType.EXISTS) 1251 ) 1252 1253 def _parse_create(self) -> exp.Create | exp.Command: 1254 # Note: this can't be None because we've matched a statement parser 1255 start = self._prev 1256 comments = self._prev_comments 1257 1258 replace = start.text.upper() == "REPLACE" or self._match_pair( 1259 TokenType.OR, TokenType.REPLACE 1260 ) 1261 unique = self._match(TokenType.UNIQUE) 1262 1263 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1264 self._advance() 1265 1266 properties = None 1267 create_token = self._match_set(self.CREATABLES) and self._prev 1268 1269 if not create_token: 1270 # exp.Properties.Location.POST_CREATE 1271 properties = self._parse_properties() 1272 create_token = self._match_set(self.CREATABLES) and self._prev 1273 1274 if not properties or not create_token: 1275 return self._parse_as_command(start) 1276 1277 exists = self._parse_exists(not_=True) 1278 this = None 1279 expression: t.Optional[exp.Expression] = None 1280 indexes = None 1281 no_schema_binding = None 1282 begin = None 1283 clone = None 1284 1285 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1286 nonlocal properties 1287 if properties and temp_props: 1288 properties.expressions.extend(temp_props.expressions) 1289 elif temp_props: 1290 properties = temp_props 1291 1292 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1293 this = self._parse_user_defined_function(kind=create_token.token_type) 1294 1295 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1296 extend_props(self._parse_properties()) 1297 1298 self._match(TokenType.ALIAS) 1299 1300 if self._match(TokenType.COMMAND): 1301 expression = self._parse_as_command(self._prev) 1302 else: 1303 begin = self._match(TokenType.BEGIN) 1304 return_ = self._match_text_seq("RETURN") 1305 1306 if self._match(TokenType.STRING, advance=False): 1307 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1308 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1309 expression = self._parse_string() 1310 extend_props(self._parse_properties()) 1311 else: 1312 expression = self._parse_statement() 1313 1314 if return_: 1315 expression = self.expression(exp.Return, this=expression) 1316 elif create_token.token_type == TokenType.INDEX: 1317 this = self._parse_index(index=self._parse_id_var()) 1318 elif create_token.token_type in self.DB_CREATABLES: 1319 table_parts = self._parse_table_parts(schema=True) 1320 1321 # exp.Properties.Location.POST_NAME 1322 self._match(TokenType.COMMA) 1323 extend_props(self._parse_properties(before=True)) 1324 1325 this = self._parse_schema(this=table_parts) 1326 1327 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1328 extend_props(self._parse_properties()) 1329 1330 self._match(TokenType.ALIAS) 1331 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1332 # exp.Properties.Location.POST_ALIAS 1333 extend_props(self._parse_properties()) 1334 1335 expression = self._parse_ddl_select() 1336 1337 if create_token.token_type == TokenType.TABLE: 1338 # exp.Properties.Location.POST_EXPRESSION 1339 extend_props(self._parse_properties()) 1340 1341 indexes = [] 1342 while True: 1343 index = self._parse_index() 1344 1345 # exp.Properties.Location.POST_INDEX 1346 extend_props(self._parse_properties()) 1347 1348 if not index: 1349 break 1350 else: 1351 self._match(TokenType.COMMA) 1352 indexes.append(index) 1353 elif create_token.token_type == TokenType.VIEW: 1354 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1355 no_schema_binding = True 1356 1357 shallow = self._match_text_seq("SHALLOW") 1358 1359 if self._match_texts(self.CLONE_KEYWORDS): 1360 copy = self._prev.text.lower() == "copy" 1361 clone = self._parse_table(schema=True) 1362 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1363 clone_kind = ( 1364 self._match(TokenType.L_PAREN) 1365 and self._match_texts(self.CLONE_KINDS) 1366 and self._prev.text.upper() 1367 ) 1368 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1369 self._match(TokenType.R_PAREN) 1370 clone = self.expression( 1371 exp.Clone, 1372 this=clone, 1373 when=when, 1374 kind=clone_kind, 1375 shallow=shallow, 1376 expression=clone_expression, 1377 copy=copy, 1378 ) 1379 1380 return self.expression( 1381 exp.Create, 1382 comments=comments, 1383 this=this, 1384 kind=create_token.text, 1385 replace=replace, 1386 unique=unique, 1387 expression=expression, 1388 exists=exists, 1389 properties=properties, 1390 indexes=indexes, 1391 no_schema_binding=no_schema_binding, 1392 begin=begin, 1393 clone=clone, 1394 ) 1395 1396 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1397 # only used for teradata currently 1398 self._match(TokenType.COMMA) 1399 1400 kwargs = { 1401 "no": self._match_text_seq("NO"), 1402 "dual": self._match_text_seq("DUAL"), 1403 "before": self._match_text_seq("BEFORE"), 1404 "default": self._match_text_seq("DEFAULT"), 1405 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1406 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1407 "after": self._match_text_seq("AFTER"), 1408 "minimum": self._match_texts(("MIN", "MINIMUM")), 1409 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1410 } 1411 1412 if self._match_texts(self.PROPERTY_PARSERS): 1413 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1414 try: 1415 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1416 except TypeError: 1417 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1418 1419 return None 1420 1421 def _parse_property(self) -> t.Optional[exp.Expression]: 1422 if self._match_texts(self.PROPERTY_PARSERS): 1423 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1424 1425 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1426 return self._parse_character_set(default=True) 1427 1428 if self._match_text_seq("COMPOUND", "SORTKEY"): 1429 return self._parse_sortkey(compound=True) 1430 1431 if self._match_text_seq("SQL", "SECURITY"): 1432 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1433 1434 index = self._index 1435 key = self._parse_column() 1436 1437 if not self._match(TokenType.EQ): 1438 self._retreat(index) 1439 return None 1440 1441 return self.expression( 1442 exp.Property, 1443 this=key.to_dot() if isinstance(key, exp.Column) else key, 1444 value=self._parse_column() or self._parse_var(any_token=True), 1445 ) 1446 1447 def _parse_stored(self) -> exp.FileFormatProperty: 1448 self._match(TokenType.ALIAS) 1449 1450 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1451 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1452 1453 return self.expression( 1454 exp.FileFormatProperty, 1455 this=self.expression( 1456 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1457 ) 1458 if input_format or output_format 1459 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1460 ) 1461 1462 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1463 self._match(TokenType.EQ) 1464 self._match(TokenType.ALIAS) 1465 return self.expression(exp_class, this=self._parse_field()) 1466 1467 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1468 properties = [] 1469 while True: 1470 if before: 1471 prop = self._parse_property_before() 1472 else: 1473 prop = self._parse_property() 1474 1475 if not prop: 1476 break 1477 for p in ensure_list(prop): 1478 properties.append(p) 1479 1480 if properties: 1481 return self.expression(exp.Properties, expressions=properties) 1482 1483 return None 1484 1485 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1486 return self.expression( 1487 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1488 ) 1489 1490 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1491 if self._index >= 2: 1492 pre_volatile_token = self._tokens[self._index - 2] 1493 else: 1494 pre_volatile_token = None 1495 1496 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1497 return exp.VolatileProperty() 1498 1499 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1500 1501 def _parse_with_property( 1502 self, 1503 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1504 if self._match(TokenType.L_PAREN, advance=False): 1505 return self._parse_wrapped_csv(self._parse_property) 1506 1507 if self._match_text_seq("JOURNAL"): 1508 return self._parse_withjournaltable() 1509 1510 if self._match_text_seq("DATA"): 1511 return self._parse_withdata(no=False) 1512 elif self._match_text_seq("NO", "DATA"): 1513 return self._parse_withdata(no=True) 1514 1515 if not self._next: 1516 return None 1517 1518 return self._parse_withisolatedloading() 1519 1520 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1521 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1522 self._match(TokenType.EQ) 1523 1524 user = self._parse_id_var() 1525 self._match(TokenType.PARAMETER) 1526 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1527 1528 if not user or not host: 1529 return None 1530 1531 return exp.DefinerProperty(this=f"{user}@{host}") 1532 1533 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1534 self._match(TokenType.TABLE) 1535 self._match(TokenType.EQ) 1536 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1537 1538 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1539 return self.expression(exp.LogProperty, no=no) 1540 1541 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1542 return self.expression(exp.JournalProperty, **kwargs) 1543 1544 def _parse_checksum(self) -> exp.ChecksumProperty: 1545 self._match(TokenType.EQ) 1546 1547 on = None 1548 if self._match(TokenType.ON): 1549 on = True 1550 elif self._match_text_seq("OFF"): 1551 on = False 1552 1553 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1554 1555 def _parse_cluster(self) -> exp.Cluster: 1556 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1557 1558 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1559 self._match_text_seq("BY") 1560 1561 self._match_l_paren() 1562 expressions = self._parse_csv(self._parse_column) 1563 self._match_r_paren() 1564 1565 if self._match_text_seq("SORTED", "BY"): 1566 self._match_l_paren() 1567 sorted_by = self._parse_csv(self._parse_ordered) 1568 self._match_r_paren() 1569 else: 1570 sorted_by = None 1571 1572 self._match(TokenType.INTO) 1573 buckets = self._parse_number() 1574 self._match_text_seq("BUCKETS") 1575 1576 return self.expression( 1577 exp.ClusteredByProperty, 1578 expressions=expressions, 1579 sorted_by=sorted_by, 1580 buckets=buckets, 1581 ) 1582 1583 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1584 if not self._match_text_seq("GRANTS"): 1585 self._retreat(self._index - 1) 1586 return None 1587 1588 return self.expression(exp.CopyGrantsProperty) 1589 1590 def _parse_freespace(self) -> exp.FreespaceProperty: 1591 self._match(TokenType.EQ) 1592 return self.expression( 1593 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1594 ) 1595 1596 def _parse_mergeblockratio( 1597 self, no: bool = False, default: bool = False 1598 ) -> exp.MergeBlockRatioProperty: 1599 if self._match(TokenType.EQ): 1600 return self.expression( 1601 exp.MergeBlockRatioProperty, 1602 this=self._parse_number(), 1603 percent=self._match(TokenType.PERCENT), 1604 ) 1605 1606 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1607 1608 def _parse_datablocksize( 1609 self, 1610 default: t.Optional[bool] = None, 1611 minimum: t.Optional[bool] = None, 1612 maximum: t.Optional[bool] = None, 1613 ) -> exp.DataBlocksizeProperty: 1614 self._match(TokenType.EQ) 1615 size = self._parse_number() 1616 1617 units = None 1618 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1619 units = self._prev.text 1620 1621 return self.expression( 1622 exp.DataBlocksizeProperty, 1623 size=size, 1624 units=units, 1625 default=default, 1626 minimum=minimum, 1627 maximum=maximum, 1628 ) 1629 1630 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1631 self._match(TokenType.EQ) 1632 always = self._match_text_seq("ALWAYS") 1633 manual = self._match_text_seq("MANUAL") 1634 never = self._match_text_seq("NEVER") 1635 default = self._match_text_seq("DEFAULT") 1636 1637 autotemp = None 1638 if self._match_text_seq("AUTOTEMP"): 1639 autotemp = self._parse_schema() 1640 1641 return self.expression( 1642 exp.BlockCompressionProperty, 1643 always=always, 1644 manual=manual, 1645 never=never, 1646 default=default, 1647 autotemp=autotemp, 1648 ) 1649 1650 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1651 no = self._match_text_seq("NO") 1652 concurrent = self._match_text_seq("CONCURRENT") 1653 self._match_text_seq("ISOLATED", "LOADING") 1654 for_all = self._match_text_seq("FOR", "ALL") 1655 for_insert = self._match_text_seq("FOR", "INSERT") 1656 for_none = self._match_text_seq("FOR", "NONE") 1657 return self.expression( 1658 exp.IsolatedLoadingProperty, 1659 no=no, 1660 concurrent=concurrent, 1661 for_all=for_all, 1662 for_insert=for_insert, 1663 for_none=for_none, 1664 ) 1665 1666 def _parse_locking(self) -> exp.LockingProperty: 1667 if self._match(TokenType.TABLE): 1668 kind = "TABLE" 1669 elif self._match(TokenType.VIEW): 1670 kind = "VIEW" 1671 elif self._match(TokenType.ROW): 1672 kind = "ROW" 1673 elif self._match_text_seq("DATABASE"): 1674 kind = "DATABASE" 1675 else: 1676 kind = None 1677 1678 if kind in ("DATABASE", "TABLE", "VIEW"): 1679 this = self._parse_table_parts() 1680 else: 1681 this = None 1682 1683 if self._match(TokenType.FOR): 1684 for_or_in = "FOR" 1685 elif self._match(TokenType.IN): 1686 for_or_in = "IN" 1687 else: 1688 for_or_in = None 1689 1690 if self._match_text_seq("ACCESS"): 1691 lock_type = "ACCESS" 1692 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1693 lock_type = "EXCLUSIVE" 1694 elif self._match_text_seq("SHARE"): 1695 lock_type = "SHARE" 1696 elif self._match_text_seq("READ"): 1697 lock_type = "READ" 1698 elif self._match_text_seq("WRITE"): 1699 lock_type = "WRITE" 1700 elif self._match_text_seq("CHECKSUM"): 1701 lock_type = "CHECKSUM" 1702 else: 1703 lock_type = None 1704 1705 override = self._match_text_seq("OVERRIDE") 1706 1707 return self.expression( 1708 exp.LockingProperty, 1709 this=this, 1710 kind=kind, 1711 for_or_in=for_or_in, 1712 lock_type=lock_type, 1713 override=override, 1714 ) 1715 1716 def _parse_partition_by(self) -> t.List[exp.Expression]: 1717 if self._match(TokenType.PARTITION_BY): 1718 return self._parse_csv(self._parse_conjunction) 1719 return [] 1720 1721 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1722 self._match(TokenType.EQ) 1723 return self.expression( 1724 exp.PartitionedByProperty, 1725 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1726 ) 1727 1728 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1729 if self._match_text_seq("AND", "STATISTICS"): 1730 statistics = True 1731 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1732 statistics = False 1733 else: 1734 statistics = None 1735 1736 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1737 1738 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1739 if self._match_text_seq("PRIMARY", "INDEX"): 1740 return exp.NoPrimaryIndexProperty() 1741 return None 1742 1743 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1744 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1745 return exp.OnCommitProperty() 1746 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1747 return exp.OnCommitProperty(delete=True) 1748 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1749 1750 def _parse_distkey(self) -> exp.DistKeyProperty: 1751 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1752 1753 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1754 table = self._parse_table(schema=True) 1755 1756 options = [] 1757 while self._match_texts(("INCLUDING", "EXCLUDING")): 1758 this = self._prev.text.upper() 1759 1760 id_var = self._parse_id_var() 1761 if not id_var: 1762 return None 1763 1764 options.append( 1765 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1766 ) 1767 1768 return self.expression(exp.LikeProperty, this=table, expressions=options) 1769 1770 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1771 return self.expression( 1772 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1773 ) 1774 1775 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1776 self._match(TokenType.EQ) 1777 return self.expression( 1778 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1779 ) 1780 1781 def _parse_returns(self) -> exp.ReturnsProperty: 1782 value: t.Optional[exp.Expression] 1783 is_table = self._match(TokenType.TABLE) 1784 1785 if is_table: 1786 if self._match(TokenType.LT): 1787 value = self.expression( 1788 exp.Schema, 1789 this="TABLE", 1790 expressions=self._parse_csv(self._parse_struct_types), 1791 ) 1792 if not self._match(TokenType.GT): 1793 self.raise_error("Expecting >") 1794 else: 1795 value = self._parse_schema(exp.var("TABLE")) 1796 else: 1797 value = self._parse_types() 1798 1799 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1800 1801 def _parse_describe(self) -> exp.Describe: 1802 kind = self._match_set(self.CREATABLES) and self._prev.text 1803 this = self._parse_table(schema=True) 1804 properties = self._parse_properties() 1805 expressions = properties.expressions if properties else None 1806 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1807 1808 def _parse_insert(self) -> exp.Insert: 1809 comments = ensure_list(self._prev_comments) 1810 overwrite = self._match(TokenType.OVERWRITE) 1811 ignore = self._match(TokenType.IGNORE) 1812 local = self._match_text_seq("LOCAL") 1813 alternative = None 1814 1815 if self._match_text_seq("DIRECTORY"): 1816 this: t.Optional[exp.Expression] = self.expression( 1817 exp.Directory, 1818 this=self._parse_var_or_string(), 1819 local=local, 1820 row_format=self._parse_row_format(match_row=True), 1821 ) 1822 else: 1823 if self._match(TokenType.OR): 1824 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1825 1826 self._match(TokenType.INTO) 1827 comments += ensure_list(self._prev_comments) 1828 self._match(TokenType.TABLE) 1829 this = self._parse_table(schema=True) 1830 1831 returning = self._parse_returning() 1832 1833 return self.expression( 1834 exp.Insert, 1835 comments=comments, 1836 this=this, 1837 by_name=self._match_text_seq("BY", "NAME"), 1838 exists=self._parse_exists(), 1839 partition=self._parse_partition(), 1840 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1841 and self._parse_conjunction(), 1842 expression=self._parse_ddl_select(), 1843 conflict=self._parse_on_conflict(), 1844 returning=returning or self._parse_returning(), 1845 overwrite=overwrite, 1846 alternative=alternative, 1847 ignore=ignore, 1848 ) 1849 1850 def _parse_kill(self) -> exp.Kill: 1851 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1852 1853 return self.expression( 1854 exp.Kill, 1855 this=self._parse_primary(), 1856 kind=kind, 1857 ) 1858 1859 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1860 conflict = self._match_text_seq("ON", "CONFLICT") 1861 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1862 1863 if not conflict and not duplicate: 1864 return None 1865 1866 nothing = None 1867 expressions = None 1868 key = None 1869 constraint = None 1870 1871 if conflict: 1872 if self._match_text_seq("ON", "CONSTRAINT"): 1873 constraint = self._parse_id_var() 1874 else: 1875 key = self._parse_csv(self._parse_value) 1876 1877 self._match_text_seq("DO") 1878 if self._match_text_seq("NOTHING"): 1879 nothing = True 1880 else: 1881 self._match(TokenType.UPDATE) 1882 self._match(TokenType.SET) 1883 expressions = self._parse_csv(self._parse_equality) 1884 1885 return self.expression( 1886 exp.OnConflict, 1887 duplicate=duplicate, 1888 expressions=expressions, 1889 nothing=nothing, 1890 key=key, 1891 constraint=constraint, 1892 ) 1893 1894 def _parse_returning(self) -> t.Optional[exp.Returning]: 1895 if not self._match(TokenType.RETURNING): 1896 return None 1897 return self.expression( 1898 exp.Returning, 1899 expressions=self._parse_csv(self._parse_expression), 1900 into=self._match(TokenType.INTO) and self._parse_table_part(), 1901 ) 1902 1903 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1904 if not self._match(TokenType.FORMAT): 1905 return None 1906 return self._parse_row_format() 1907 1908 def _parse_row_format( 1909 self, match_row: bool = False 1910 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1911 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1912 return None 1913 1914 if self._match_text_seq("SERDE"): 1915 this = self._parse_string() 1916 1917 serde_properties = None 1918 if self._match(TokenType.SERDE_PROPERTIES): 1919 serde_properties = self.expression( 1920 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1921 ) 1922 1923 return self.expression( 1924 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1925 ) 1926 1927 self._match_text_seq("DELIMITED") 1928 1929 kwargs = {} 1930 1931 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1932 kwargs["fields"] = self._parse_string() 1933 if self._match_text_seq("ESCAPED", "BY"): 1934 kwargs["escaped"] = self._parse_string() 1935 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1936 kwargs["collection_items"] = self._parse_string() 1937 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1938 kwargs["map_keys"] = self._parse_string() 1939 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1940 kwargs["lines"] = self._parse_string() 1941 if self._match_text_seq("NULL", "DEFINED", "AS"): 1942 kwargs["null"] = self._parse_string() 1943 1944 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1945 1946 def _parse_load(self) -> exp.LoadData | exp.Command: 1947 if self._match_text_seq("DATA"): 1948 local = self._match_text_seq("LOCAL") 1949 self._match_text_seq("INPATH") 1950 inpath = self._parse_string() 1951 overwrite = self._match(TokenType.OVERWRITE) 1952 self._match_pair(TokenType.INTO, TokenType.TABLE) 1953 1954 return self.expression( 1955 exp.LoadData, 1956 this=self._parse_table(schema=True), 1957 local=local, 1958 overwrite=overwrite, 1959 inpath=inpath, 1960 partition=self._parse_partition(), 1961 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1962 serde=self._match_text_seq("SERDE") and self._parse_string(), 1963 ) 1964 return self._parse_as_command(self._prev) 1965 1966 def _parse_delete(self) -> exp.Delete: 1967 # This handles MySQL's "Multiple-Table Syntax" 1968 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1969 tables = None 1970 comments = self._prev_comments 1971 if not self._match(TokenType.FROM, advance=False): 1972 tables = self._parse_csv(self._parse_table) or None 1973 1974 returning = self._parse_returning() 1975 1976 return self.expression( 1977 exp.Delete, 1978 comments=comments, 1979 tables=tables, 1980 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1981 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1982 where=self._parse_where(), 1983 returning=returning or self._parse_returning(), 1984 limit=self._parse_limit(), 1985 ) 1986 1987 def _parse_update(self) -> exp.Update: 1988 comments = self._prev_comments 1989 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1990 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1991 returning = self._parse_returning() 1992 return self.expression( 1993 exp.Update, 1994 comments=comments, 1995 **{ # type: ignore 1996 "this": this, 1997 "expressions": expressions, 1998 "from": self._parse_from(joins=True), 1999 "where": self._parse_where(), 2000 "returning": returning or self._parse_returning(), 2001 "order": self._parse_order(), 2002 "limit": self._parse_limit(), 2003 }, 2004 ) 2005 2006 def _parse_uncache(self) -> exp.Uncache: 2007 if not self._match(TokenType.TABLE): 2008 self.raise_error("Expecting TABLE after UNCACHE") 2009 2010 return self.expression( 2011 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2012 ) 2013 2014 def _parse_cache(self) -> exp.Cache: 2015 lazy = self._match_text_seq("LAZY") 2016 self._match(TokenType.TABLE) 2017 table = self._parse_table(schema=True) 2018 2019 options = [] 2020 if self._match_text_seq("OPTIONS"): 2021 self._match_l_paren() 2022 k = self._parse_string() 2023 self._match(TokenType.EQ) 2024 v = self._parse_string() 2025 options = [k, v] 2026 self._match_r_paren() 2027 2028 self._match(TokenType.ALIAS) 2029 return self.expression( 2030 exp.Cache, 2031 this=table, 2032 lazy=lazy, 2033 options=options, 2034 expression=self._parse_select(nested=True), 2035 ) 2036 2037 def _parse_partition(self) -> t.Optional[exp.Partition]: 2038 if not self._match(TokenType.PARTITION): 2039 return None 2040 2041 return self.expression( 2042 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2043 ) 2044 2045 def _parse_value(self) -> exp.Tuple: 2046 if self._match(TokenType.L_PAREN): 2047 expressions = self._parse_csv(self._parse_conjunction) 2048 self._match_r_paren() 2049 return self.expression(exp.Tuple, expressions=expressions) 2050 2051 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2052 # https://prestodb.io/docs/current/sql/values.html 2053 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2054 2055 def _parse_projections(self) -> t.List[exp.Expression]: 2056 return self._parse_expressions() 2057 2058 def _parse_select( 2059 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2060 ) -> t.Optional[exp.Expression]: 2061 cte = self._parse_with() 2062 2063 if cte: 2064 this = self._parse_statement() 2065 2066 if not this: 2067 self.raise_error("Failed to parse any statement following CTE") 2068 return cte 2069 2070 if "with" in this.arg_types: 2071 this.set("with", cte) 2072 else: 2073 self.raise_error(f"{this.key} does not support CTE") 2074 this = cte 2075 2076 return this 2077 2078 # duckdb supports leading with FROM x 2079 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2080 2081 if self._match(TokenType.SELECT): 2082 comments = self._prev_comments 2083 2084 hint = self._parse_hint() 2085 all_ = self._match(TokenType.ALL) 2086 distinct = self._match_set(self.DISTINCT_TOKENS) 2087 2088 kind = ( 2089 self._match(TokenType.ALIAS) 2090 and self._match_texts(("STRUCT", "VALUE")) 2091 and self._prev.text 2092 ) 2093 2094 if distinct: 2095 distinct = self.expression( 2096 exp.Distinct, 2097 on=self._parse_value() if self._match(TokenType.ON) else None, 2098 ) 2099 2100 if all_ and distinct: 2101 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2102 2103 limit = self._parse_limit(top=True) 2104 projections = self._parse_projections() 2105 2106 this = self.expression( 2107 exp.Select, 2108 kind=kind, 2109 hint=hint, 2110 distinct=distinct, 2111 expressions=projections, 2112 limit=limit, 2113 ) 2114 this.comments = comments 2115 2116 into = self._parse_into() 2117 if into: 2118 this.set("into", into) 2119 2120 if not from_: 2121 from_ = self._parse_from() 2122 2123 if from_: 2124 this.set("from", from_) 2125 2126 this = self._parse_query_modifiers(this) 2127 elif (table or nested) and self._match(TokenType.L_PAREN): 2128 if self._match(TokenType.PIVOT): 2129 this = self._parse_simplified_pivot() 2130 elif self._match(TokenType.FROM): 2131 this = exp.select("*").from_( 2132 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2133 ) 2134 else: 2135 this = self._parse_table() if table else self._parse_select(nested=True) 2136 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2137 2138 self._match_r_paren() 2139 2140 # We return early here so that the UNION isn't attached to the subquery by the 2141 # following call to _parse_set_operations, but instead becomes the parent node 2142 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2143 elif self._match(TokenType.VALUES): 2144 this = self.expression( 2145 exp.Values, 2146 expressions=self._parse_csv(self._parse_value), 2147 alias=self._parse_table_alias(), 2148 ) 2149 elif from_: 2150 this = exp.select("*").from_(from_.this, copy=False) 2151 else: 2152 this = None 2153 2154 return self._parse_set_operations(this) 2155 2156 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2157 if not skip_with_token and not self._match(TokenType.WITH): 2158 return None 2159 2160 comments = self._prev_comments 2161 recursive = self._match(TokenType.RECURSIVE) 2162 2163 expressions = [] 2164 while True: 2165 expressions.append(self._parse_cte()) 2166 2167 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2168 break 2169 else: 2170 self._match(TokenType.WITH) 2171 2172 return self.expression( 2173 exp.With, comments=comments, expressions=expressions, recursive=recursive 2174 ) 2175 2176 def _parse_cte(self) -> exp.CTE: 2177 alias = self._parse_table_alias() 2178 if not alias or not alias.this: 2179 self.raise_error("Expected CTE to have alias") 2180 2181 self._match(TokenType.ALIAS) 2182 return self.expression( 2183 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2184 ) 2185 2186 def _parse_table_alias( 2187 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2188 ) -> t.Optional[exp.TableAlias]: 2189 any_token = self._match(TokenType.ALIAS) 2190 alias = ( 2191 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2192 or self._parse_string_as_identifier() 2193 ) 2194 2195 index = self._index 2196 if self._match(TokenType.L_PAREN): 2197 columns = self._parse_csv(self._parse_function_parameter) 2198 self._match_r_paren() if columns else self._retreat(index) 2199 else: 2200 columns = None 2201 2202 if not alias and not columns: 2203 return None 2204 2205 return self.expression(exp.TableAlias, this=alias, columns=columns) 2206 2207 def _parse_subquery( 2208 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2209 ) -> t.Optional[exp.Subquery]: 2210 if not this: 2211 return None 2212 2213 return self.expression( 2214 exp.Subquery, 2215 this=this, 2216 pivots=self._parse_pivots(), 2217 alias=self._parse_table_alias() if parse_alias else None, 2218 ) 2219 2220 def _parse_query_modifiers( 2221 self, this: t.Optional[exp.Expression] 2222 ) -> t.Optional[exp.Expression]: 2223 if isinstance(this, self.MODIFIABLES): 2224 for join in iter(self._parse_join, None): 2225 this.append("joins", join) 2226 for lateral in iter(self._parse_lateral, None): 2227 this.append("laterals", lateral) 2228 2229 while True: 2230 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2231 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2232 key, expression = parser(self) 2233 2234 if expression: 2235 this.set(key, expression) 2236 if key == "limit": 2237 offset = expression.args.pop("offset", None) 2238 if offset: 2239 this.set("offset", exp.Offset(expression=offset)) 2240 continue 2241 break 2242 return this 2243 2244 def _parse_hint(self) -> t.Optional[exp.Hint]: 2245 if self._match(TokenType.HINT): 2246 hints = [] 2247 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2248 hints.extend(hint) 2249 2250 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2251 self.raise_error("Expected */ after HINT") 2252 2253 return self.expression(exp.Hint, expressions=hints) 2254 2255 return None 2256 2257 def _parse_into(self) -> t.Optional[exp.Into]: 2258 if not self._match(TokenType.INTO): 2259 return None 2260 2261 temp = self._match(TokenType.TEMPORARY) 2262 unlogged = self._match_text_seq("UNLOGGED") 2263 self._match(TokenType.TABLE) 2264 2265 return self.expression( 2266 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2267 ) 2268 2269 def _parse_from( 2270 self, joins: bool = False, skip_from_token: bool = False 2271 ) -> t.Optional[exp.From]: 2272 if not skip_from_token and not self._match(TokenType.FROM): 2273 return None 2274 2275 return self.expression( 2276 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2277 ) 2278 2279 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2280 if not self._match(TokenType.MATCH_RECOGNIZE): 2281 return None 2282 2283 self._match_l_paren() 2284 2285 partition = self._parse_partition_by() 2286 order = self._parse_order() 2287 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2288 2289 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2290 rows = exp.var("ONE ROW PER MATCH") 2291 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2292 text = "ALL ROWS PER MATCH" 2293 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2294 text += f" SHOW EMPTY MATCHES" 2295 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2296 text += f" OMIT EMPTY MATCHES" 2297 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2298 text += f" WITH UNMATCHED ROWS" 2299 rows = exp.var(text) 2300 else: 2301 rows = None 2302 2303 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2304 text = "AFTER MATCH SKIP" 2305 if self._match_text_seq("PAST", "LAST", "ROW"): 2306 text += f" PAST LAST ROW" 2307 elif self._match_text_seq("TO", "NEXT", "ROW"): 2308 text += f" TO NEXT ROW" 2309 elif self._match_text_seq("TO", "FIRST"): 2310 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2311 elif self._match_text_seq("TO", "LAST"): 2312 text += f" TO LAST {self._advance_any().text}" # type: ignore 2313 after = exp.var(text) 2314 else: 2315 after = None 2316 2317 if self._match_text_seq("PATTERN"): 2318 self._match_l_paren() 2319 2320 if not self._curr: 2321 self.raise_error("Expecting )", self._curr) 2322 2323 paren = 1 2324 start = self._curr 2325 2326 while self._curr and paren > 0: 2327 if self._curr.token_type == TokenType.L_PAREN: 2328 paren += 1 2329 if self._curr.token_type == TokenType.R_PAREN: 2330 paren -= 1 2331 2332 end = self._prev 2333 self._advance() 2334 2335 if paren > 0: 2336 self.raise_error("Expecting )", self._curr) 2337 2338 pattern = exp.var(self._find_sql(start, end)) 2339 else: 2340 pattern = None 2341 2342 define = ( 2343 self._parse_csv( 2344 lambda: self.expression( 2345 exp.Alias, 2346 alias=self._parse_id_var(any_token=True), 2347 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2348 ) 2349 ) 2350 if self._match_text_seq("DEFINE") 2351 else None 2352 ) 2353 2354 self._match_r_paren() 2355 2356 return self.expression( 2357 exp.MatchRecognize, 2358 partition_by=partition, 2359 order=order, 2360 measures=measures, 2361 rows=rows, 2362 after=after, 2363 pattern=pattern, 2364 define=define, 2365 alias=self._parse_table_alias(), 2366 ) 2367 2368 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2369 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2370 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2371 2372 if outer_apply or cross_apply: 2373 this = self._parse_select(table=True) 2374 view = None 2375 outer = not cross_apply 2376 elif self._match(TokenType.LATERAL): 2377 this = self._parse_select(table=True) 2378 view = self._match(TokenType.VIEW) 2379 outer = self._match(TokenType.OUTER) 2380 else: 2381 return None 2382 2383 if not this: 2384 this = ( 2385 self._parse_unnest() 2386 or self._parse_function() 2387 or self._parse_id_var(any_token=False) 2388 ) 2389 2390 while self._match(TokenType.DOT): 2391 this = exp.Dot( 2392 this=this, 2393 expression=self._parse_function() or self._parse_id_var(any_token=False), 2394 ) 2395 2396 if view: 2397 table = self._parse_id_var(any_token=False) 2398 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2399 table_alias: t.Optional[exp.TableAlias] = self.expression( 2400 exp.TableAlias, this=table, columns=columns 2401 ) 2402 elif isinstance(this, exp.Subquery) and this.alias: 2403 # Ensures parity between the Subquery's and the Lateral's "alias" args 2404 table_alias = this.args["alias"].copy() 2405 else: 2406 table_alias = self._parse_table_alias() 2407 2408 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2409 2410 def _parse_join_parts( 2411 self, 2412 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2413 return ( 2414 self._match_set(self.JOIN_METHODS) and self._prev, 2415 self._match_set(self.JOIN_SIDES) and self._prev, 2416 self._match_set(self.JOIN_KINDS) and self._prev, 2417 ) 2418 2419 def _parse_join( 2420 self, skip_join_token: bool = False, parse_bracket: bool = False 2421 ) -> t.Optional[exp.Join]: 2422 if self._match(TokenType.COMMA): 2423 return self.expression(exp.Join, this=self._parse_table()) 2424 2425 index = self._index 2426 method, side, kind = self._parse_join_parts() 2427 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2428 join = self._match(TokenType.JOIN) 2429 2430 if not skip_join_token and not join: 2431 self._retreat(index) 2432 kind = None 2433 method = None 2434 side = None 2435 2436 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2437 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2438 2439 if not skip_join_token and not join and not outer_apply and not cross_apply: 2440 return None 2441 2442 if outer_apply: 2443 side = Token(TokenType.LEFT, "LEFT") 2444 2445 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2446 2447 if method: 2448 kwargs["method"] = method.text 2449 if side: 2450 kwargs["side"] = side.text 2451 if kind: 2452 kwargs["kind"] = kind.text 2453 if hint: 2454 kwargs["hint"] = hint 2455 2456 if self._match(TokenType.ON): 2457 kwargs["on"] = self._parse_conjunction() 2458 elif self._match(TokenType.USING): 2459 kwargs["using"] = self._parse_wrapped_id_vars() 2460 elif not (kind and kind.token_type == TokenType.CROSS): 2461 index = self._index 2462 joins = self._parse_joins() 2463 2464 if joins and self._match(TokenType.ON): 2465 kwargs["on"] = self._parse_conjunction() 2466 elif joins and self._match(TokenType.USING): 2467 kwargs["using"] = self._parse_wrapped_id_vars() 2468 else: 2469 joins = None 2470 self._retreat(index) 2471 2472 kwargs["this"].set("joins", joins) 2473 2474 comments = [c for token in (method, side, kind) if token for c in token.comments] 2475 return self.expression(exp.Join, comments=comments, **kwargs) 2476 2477 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2478 this = self._parse_conjunction() 2479 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2480 return this 2481 2482 opclass = self._parse_var(any_token=True) 2483 if opclass: 2484 return self.expression(exp.Opclass, this=this, expression=opclass) 2485 2486 return this 2487 2488 def _parse_index( 2489 self, 2490 index: t.Optional[exp.Expression] = None, 2491 ) -> t.Optional[exp.Index]: 2492 if index: 2493 unique = None 2494 primary = None 2495 amp = None 2496 2497 self._match(TokenType.ON) 2498 self._match(TokenType.TABLE) # hive 2499 table = self._parse_table_parts(schema=True) 2500 else: 2501 unique = self._match(TokenType.UNIQUE) 2502 primary = self._match_text_seq("PRIMARY") 2503 amp = self._match_text_seq("AMP") 2504 2505 if not self._match(TokenType.INDEX): 2506 return None 2507 2508 index = self._parse_id_var() 2509 table = None 2510 2511 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2512 2513 if self._match(TokenType.L_PAREN, advance=False): 2514 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2515 else: 2516 columns = None 2517 2518 return self.expression( 2519 exp.Index, 2520 this=index, 2521 table=table, 2522 using=using, 2523 columns=columns, 2524 unique=unique, 2525 primary=primary, 2526 amp=amp, 2527 partition_by=self._parse_partition_by(), 2528 where=self._parse_where(), 2529 ) 2530 2531 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2532 hints: t.List[exp.Expression] = [] 2533 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2534 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2535 hints.append( 2536 self.expression( 2537 exp.WithTableHint, 2538 expressions=self._parse_csv( 2539 lambda: self._parse_function() or self._parse_var(any_token=True) 2540 ), 2541 ) 2542 ) 2543 self._match_r_paren() 2544 else: 2545 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2546 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2547 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2548 2549 self._match_texts({"INDEX", "KEY"}) 2550 if self._match(TokenType.FOR): 2551 hint.set("target", self._advance_any() and self._prev.text.upper()) 2552 2553 hint.set("expressions", self._parse_wrapped_id_vars()) 2554 hints.append(hint) 2555 2556 return hints or None 2557 2558 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2559 return ( 2560 (not schema and self._parse_function(optional_parens=False)) 2561 or self._parse_id_var(any_token=False) 2562 or self._parse_string_as_identifier() 2563 or self._parse_placeholder() 2564 ) 2565 2566 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2567 catalog = None 2568 db = None 2569 table = self._parse_table_part(schema=schema) 2570 2571 while self._match(TokenType.DOT): 2572 if catalog: 2573 # This allows nesting the table in arbitrarily many dot expressions if needed 2574 table = self.expression( 2575 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2576 ) 2577 else: 2578 catalog = db 2579 db = table 2580 table = self._parse_table_part(schema=schema) 2581 2582 if not table: 2583 self.raise_error(f"Expected table name but got {self._curr}") 2584 2585 return self.expression( 2586 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2587 ) 2588 2589 def _parse_table( 2590 self, 2591 schema: bool = False, 2592 joins: bool = False, 2593 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2594 parse_bracket: bool = False, 2595 ) -> t.Optional[exp.Expression]: 2596 lateral = self._parse_lateral() 2597 if lateral: 2598 return lateral 2599 2600 unnest = self._parse_unnest() 2601 if unnest: 2602 return unnest 2603 2604 values = self._parse_derived_table_values() 2605 if values: 2606 return values 2607 2608 subquery = self._parse_select(table=True) 2609 if subquery: 2610 if not subquery.args.get("pivots"): 2611 subquery.set("pivots", self._parse_pivots()) 2612 return subquery 2613 2614 bracket = parse_bracket and self._parse_bracket(None) 2615 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2616 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2617 2618 if schema: 2619 return self._parse_schema(this=this) 2620 2621 version = self._parse_version() 2622 2623 if version: 2624 this.set("version", version) 2625 2626 if self.ALIAS_POST_TABLESAMPLE: 2627 table_sample = self._parse_table_sample() 2628 2629 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2630 if alias: 2631 this.set("alias", alias) 2632 2633 this.set("hints", self._parse_table_hints()) 2634 2635 if not this.args.get("pivots"): 2636 this.set("pivots", self._parse_pivots()) 2637 2638 if not self.ALIAS_POST_TABLESAMPLE: 2639 table_sample = self._parse_table_sample() 2640 2641 if table_sample: 2642 table_sample.set("this", this) 2643 this = table_sample 2644 2645 if joins: 2646 for join in iter(self._parse_join, None): 2647 this.append("joins", join) 2648 2649 return this 2650 2651 def _parse_version(self) -> t.Optional[exp.Version]: 2652 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2653 this = "TIMESTAMP" 2654 elif self._match(TokenType.VERSION_SNAPSHOT): 2655 this = "VERSION" 2656 else: 2657 return None 2658 2659 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2660 kind = self._prev.text.upper() 2661 start = self._parse_bitwise() 2662 self._match_texts(("TO", "AND")) 2663 end = self._parse_bitwise() 2664 expression: t.Optional[exp.Expression] = self.expression( 2665 exp.Tuple, expressions=[start, end] 2666 ) 2667 elif self._match_text_seq("CONTAINED", "IN"): 2668 kind = "CONTAINED IN" 2669 expression = self.expression( 2670 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2671 ) 2672 elif self._match(TokenType.ALL): 2673 kind = "ALL" 2674 expression = None 2675 else: 2676 self._match_text_seq("AS", "OF") 2677 kind = "AS OF" 2678 expression = self._parse_type() 2679 2680 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2681 2682 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2683 if not self._match(TokenType.UNNEST): 2684 return None 2685 2686 expressions = self._parse_wrapped_csv(self._parse_type) 2687 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2688 2689 alias = self._parse_table_alias() if with_alias else None 2690 2691 if alias: 2692 if self.UNNEST_COLUMN_ONLY: 2693 if alias.args.get("columns"): 2694 self.raise_error("Unexpected extra column alias in unnest.") 2695 2696 alias.set("columns", [alias.this]) 2697 alias.set("this", None) 2698 2699 columns = alias.args.get("columns") or [] 2700 if offset and len(expressions) < len(columns): 2701 offset = columns.pop() 2702 2703 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2704 self._match(TokenType.ALIAS) 2705 offset = self._parse_id_var() or exp.to_identifier("offset") 2706 2707 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2708 2709 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2710 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2711 if not is_derived and not self._match(TokenType.VALUES): 2712 return None 2713 2714 expressions = self._parse_csv(self._parse_value) 2715 alias = self._parse_table_alias() 2716 2717 if is_derived: 2718 self._match_r_paren() 2719 2720 return self.expression( 2721 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2722 ) 2723 2724 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2725 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2726 as_modifier and self._match_text_seq("USING", "SAMPLE") 2727 ): 2728 return None 2729 2730 bucket_numerator = None 2731 bucket_denominator = None 2732 bucket_field = None 2733 percent = None 2734 rows = None 2735 size = None 2736 seed = None 2737 2738 kind = ( 2739 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2740 ) 2741 method = self._parse_var(tokens=(TokenType.ROW,)) 2742 2743 matched_l_paren = self._match(TokenType.L_PAREN) 2744 2745 if self.TABLESAMPLE_CSV: 2746 num = None 2747 expressions = self._parse_csv(self._parse_primary) 2748 else: 2749 expressions = None 2750 num = ( 2751 self._parse_factor() 2752 if self._match(TokenType.NUMBER, advance=False) 2753 else self._parse_primary() 2754 ) 2755 2756 if self._match_text_seq("BUCKET"): 2757 bucket_numerator = self._parse_number() 2758 self._match_text_seq("OUT", "OF") 2759 bucket_denominator = bucket_denominator = self._parse_number() 2760 self._match(TokenType.ON) 2761 bucket_field = self._parse_field() 2762 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2763 percent = num 2764 elif self._match(TokenType.ROWS): 2765 rows = num 2766 elif num: 2767 size = num 2768 2769 if matched_l_paren: 2770 self._match_r_paren() 2771 2772 if self._match(TokenType.L_PAREN): 2773 method = self._parse_var() 2774 seed = self._match(TokenType.COMMA) and self._parse_number() 2775 self._match_r_paren() 2776 elif self._match_texts(("SEED", "REPEATABLE")): 2777 seed = self._parse_wrapped(self._parse_number) 2778 2779 return self.expression( 2780 exp.TableSample, 2781 expressions=expressions, 2782 method=method, 2783 bucket_numerator=bucket_numerator, 2784 bucket_denominator=bucket_denominator, 2785 bucket_field=bucket_field, 2786 percent=percent, 2787 rows=rows, 2788 size=size, 2789 seed=seed, 2790 kind=kind, 2791 ) 2792 2793 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2794 return list(iter(self._parse_pivot, None)) or None 2795 2796 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2797 return list(iter(self._parse_join, None)) or None 2798 2799 # https://duckdb.org/docs/sql/statements/pivot 2800 def _parse_simplified_pivot(self) -> exp.Pivot: 2801 def _parse_on() -> t.Optional[exp.Expression]: 2802 this = self._parse_bitwise() 2803 return self._parse_in(this) if self._match(TokenType.IN) else this 2804 2805 this = self._parse_table() 2806 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2807 using = self._match(TokenType.USING) and self._parse_csv( 2808 lambda: self._parse_alias(self._parse_function()) 2809 ) 2810 group = self._parse_group() 2811 return self.expression( 2812 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2813 ) 2814 2815 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2816 index = self._index 2817 include_nulls = None 2818 2819 if self._match(TokenType.PIVOT): 2820 unpivot = False 2821 elif self._match(TokenType.UNPIVOT): 2822 unpivot = True 2823 2824 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2825 if self._match_text_seq("INCLUDE", "NULLS"): 2826 include_nulls = True 2827 elif self._match_text_seq("EXCLUDE", "NULLS"): 2828 include_nulls = False 2829 else: 2830 return None 2831 2832 expressions = [] 2833 field = None 2834 2835 if not self._match(TokenType.L_PAREN): 2836 self._retreat(index) 2837 return None 2838 2839 if unpivot: 2840 expressions = self._parse_csv(self._parse_column) 2841 else: 2842 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2843 2844 if not expressions: 2845 self.raise_error("Failed to parse PIVOT's aggregation list") 2846 2847 if not self._match(TokenType.FOR): 2848 self.raise_error("Expecting FOR") 2849 2850 value = self._parse_column() 2851 2852 if not self._match(TokenType.IN): 2853 self.raise_error("Expecting IN") 2854 2855 field = self._parse_in(value, alias=True) 2856 2857 self._match_r_paren() 2858 2859 pivot = self.expression( 2860 exp.Pivot, 2861 expressions=expressions, 2862 field=field, 2863 unpivot=unpivot, 2864 include_nulls=include_nulls, 2865 ) 2866 2867 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2868 pivot.set("alias", self._parse_table_alias()) 2869 2870 if not unpivot: 2871 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2872 2873 columns: t.List[exp.Expression] = [] 2874 for fld in pivot.args["field"].expressions: 2875 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2876 for name in names: 2877 if self.PREFIXED_PIVOT_COLUMNS: 2878 name = f"{name}_{field_name}" if name else field_name 2879 else: 2880 name = f"{field_name}_{name}" if name else field_name 2881 2882 columns.append(exp.to_identifier(name)) 2883 2884 pivot.set("columns", columns) 2885 2886 return pivot 2887 2888 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2889 return [agg.alias for agg in aggregations] 2890 2891 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2892 if not skip_where_token and not self._match(TokenType.WHERE): 2893 return None 2894 2895 return self.expression( 2896 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2897 ) 2898 2899 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2900 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2901 return None 2902 2903 elements = defaultdict(list) 2904 2905 if self._match(TokenType.ALL): 2906 return self.expression(exp.Group, all=True) 2907 2908 while True: 2909 expressions = self._parse_csv(self._parse_conjunction) 2910 if expressions: 2911 elements["expressions"].extend(expressions) 2912 2913 grouping_sets = self._parse_grouping_sets() 2914 if grouping_sets: 2915 elements["grouping_sets"].extend(grouping_sets) 2916 2917 rollup = None 2918 cube = None 2919 totals = None 2920 2921 with_ = self._match(TokenType.WITH) 2922 if self._match(TokenType.ROLLUP): 2923 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2924 elements["rollup"].extend(ensure_list(rollup)) 2925 2926 if self._match(TokenType.CUBE): 2927 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2928 elements["cube"].extend(ensure_list(cube)) 2929 2930 if self._match_text_seq("TOTALS"): 2931 totals = True 2932 elements["totals"] = True # type: ignore 2933 2934 if not (grouping_sets or rollup or cube or totals): 2935 break 2936 2937 return self.expression(exp.Group, **elements) # type: ignore 2938 2939 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2940 if not self._match(TokenType.GROUPING_SETS): 2941 return None 2942 2943 return self._parse_wrapped_csv(self._parse_grouping_set) 2944 2945 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2946 if self._match(TokenType.L_PAREN): 2947 grouping_set = self._parse_csv(self._parse_column) 2948 self._match_r_paren() 2949 return self.expression(exp.Tuple, expressions=grouping_set) 2950 2951 return self._parse_column() 2952 2953 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2954 if not skip_having_token and not self._match(TokenType.HAVING): 2955 return None 2956 return self.expression(exp.Having, this=self._parse_conjunction()) 2957 2958 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2959 if not self._match(TokenType.QUALIFY): 2960 return None 2961 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2962 2963 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2964 if skip_start_token: 2965 start = None 2966 elif self._match(TokenType.START_WITH): 2967 start = self._parse_conjunction() 2968 else: 2969 return None 2970 2971 self._match(TokenType.CONNECT_BY) 2972 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2973 exp.Prior, this=self._parse_bitwise() 2974 ) 2975 connect = self._parse_conjunction() 2976 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2977 2978 if not start and self._match(TokenType.START_WITH): 2979 start = self._parse_conjunction() 2980 2981 return self.expression(exp.Connect, start=start, connect=connect) 2982 2983 def _parse_order( 2984 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2985 ) -> t.Optional[exp.Expression]: 2986 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2987 return this 2988 2989 return self.expression( 2990 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2991 ) 2992 2993 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2994 if not self._match(token): 2995 return None 2996 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2997 2998 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 2999 this = parse_method() if parse_method else self._parse_conjunction() 3000 3001 asc = self._match(TokenType.ASC) 3002 desc = self._match(TokenType.DESC) or (asc and False) 3003 3004 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3005 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3006 3007 nulls_first = is_nulls_first or False 3008 explicitly_null_ordered = is_nulls_first or is_nulls_last 3009 3010 if ( 3011 not explicitly_null_ordered 3012 and ( 3013 (not desc and self.NULL_ORDERING == "nulls_are_small") 3014 or (desc and self.NULL_ORDERING != "nulls_are_small") 3015 ) 3016 and self.NULL_ORDERING != "nulls_are_last" 3017 ): 3018 nulls_first = True 3019 3020 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3021 3022 def _parse_limit( 3023 self, this: t.Optional[exp.Expression] = None, top: bool = False 3024 ) -> t.Optional[exp.Expression]: 3025 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3026 comments = self._prev_comments 3027 if top: 3028 limit_paren = self._match(TokenType.L_PAREN) 3029 expression = self._parse_number() 3030 3031 if limit_paren: 3032 self._match_r_paren() 3033 else: 3034 expression = self._parse_term() 3035 3036 if self._match(TokenType.COMMA): 3037 offset = expression 3038 expression = self._parse_term() 3039 else: 3040 offset = None 3041 3042 limit_exp = self.expression( 3043 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3044 ) 3045 3046 return limit_exp 3047 3048 if self._match(TokenType.FETCH): 3049 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3050 direction = self._prev.text if direction else "FIRST" 3051 3052 count = self._parse_field(tokens=self.FETCH_TOKENS) 3053 percent = self._match(TokenType.PERCENT) 3054 3055 self._match_set((TokenType.ROW, TokenType.ROWS)) 3056 3057 only = self._match_text_seq("ONLY") 3058 with_ties = self._match_text_seq("WITH", "TIES") 3059 3060 if only and with_ties: 3061 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3062 3063 return self.expression( 3064 exp.Fetch, 3065 direction=direction, 3066 count=count, 3067 percent=percent, 3068 with_ties=with_ties, 3069 ) 3070 3071 return this 3072 3073 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3074 if not self._match(TokenType.OFFSET): 3075 return this 3076 3077 count = self._parse_term() 3078 self._match_set((TokenType.ROW, TokenType.ROWS)) 3079 return self.expression(exp.Offset, this=this, expression=count) 3080 3081 def _parse_locks(self) -> t.List[exp.Lock]: 3082 locks = [] 3083 while True: 3084 if self._match_text_seq("FOR", "UPDATE"): 3085 update = True 3086 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3087 "LOCK", "IN", "SHARE", "MODE" 3088 ): 3089 update = False 3090 else: 3091 break 3092 3093 expressions = None 3094 if self._match_text_seq("OF"): 3095 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3096 3097 wait: t.Optional[bool | exp.Expression] = None 3098 if self._match_text_seq("NOWAIT"): 3099 wait = True 3100 elif self._match_text_seq("WAIT"): 3101 wait = self._parse_primary() 3102 elif self._match_text_seq("SKIP", "LOCKED"): 3103 wait = False 3104 3105 locks.append( 3106 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3107 ) 3108 3109 return locks 3110 3111 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3112 if not self._match_set(self.SET_OPERATIONS): 3113 return this 3114 3115 token_type = self._prev.token_type 3116 3117 if token_type == TokenType.UNION: 3118 expression = exp.Union 3119 elif token_type == TokenType.EXCEPT: 3120 expression = exp.Except 3121 else: 3122 expression = exp.Intersect 3123 3124 return self.expression( 3125 expression, 3126 this=this, 3127 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3128 by_name=self._match_text_seq("BY", "NAME"), 3129 expression=self._parse_set_operations(self._parse_select(nested=True)), 3130 ) 3131 3132 def _parse_expression(self) -> t.Optional[exp.Expression]: 3133 return self._parse_alias(self._parse_conjunction()) 3134 3135 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3136 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3137 3138 def _parse_equality(self) -> t.Optional[exp.Expression]: 3139 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3140 3141 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3142 return self._parse_tokens(self._parse_range, self.COMPARISON) 3143 3144 def _parse_range(self) -> t.Optional[exp.Expression]: 3145 this = self._parse_bitwise() 3146 negate = self._match(TokenType.NOT) 3147 3148 if self._match_set(self.RANGE_PARSERS): 3149 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3150 if not expression: 3151 return this 3152 3153 this = expression 3154 elif self._match(TokenType.ISNULL): 3155 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3156 3157 # Postgres supports ISNULL and NOTNULL for conditions. 3158 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3159 if self._match(TokenType.NOTNULL): 3160 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3161 this = self.expression(exp.Not, this=this) 3162 3163 if negate: 3164 this = self.expression(exp.Not, this=this) 3165 3166 if self._match(TokenType.IS): 3167 this = self._parse_is(this) 3168 3169 return this 3170 3171 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3172 index = self._index - 1 3173 negate = self._match(TokenType.NOT) 3174 3175 if self._match_text_seq("DISTINCT", "FROM"): 3176 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3177 return self.expression(klass, this=this, expression=self._parse_expression()) 3178 3179 expression = self._parse_null() or self._parse_boolean() 3180 if not expression: 3181 self._retreat(index) 3182 return None 3183 3184 this = self.expression(exp.Is, this=this, expression=expression) 3185 return self.expression(exp.Not, this=this) if negate else this 3186 3187 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3188 unnest = self._parse_unnest(with_alias=False) 3189 if unnest: 3190 this = self.expression(exp.In, this=this, unnest=unnest) 3191 elif self._match(TokenType.L_PAREN): 3192 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3193 3194 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3195 this = self.expression(exp.In, this=this, query=expressions[0]) 3196 else: 3197 this = self.expression(exp.In, this=this, expressions=expressions) 3198 3199 self._match_r_paren(this) 3200 else: 3201 this = self.expression(exp.In, this=this, field=self._parse_field()) 3202 3203 return this 3204 3205 def _parse_between(self, this: exp.Expression) -> exp.Between: 3206 low = self._parse_bitwise() 3207 self._match(TokenType.AND) 3208 high = self._parse_bitwise() 3209 return self.expression(exp.Between, this=this, low=low, high=high) 3210 3211 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3212 if not self._match(TokenType.ESCAPE): 3213 return this 3214 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3215 3216 def _parse_interval(self) -> t.Optional[exp.Interval]: 3217 index = self._index 3218 3219 if not self._match(TokenType.INTERVAL): 3220 return None 3221 3222 if self._match(TokenType.STRING, advance=False): 3223 this = self._parse_primary() 3224 else: 3225 this = self._parse_term() 3226 3227 if not this: 3228 self._retreat(index) 3229 return None 3230 3231 unit = self._parse_function() or self._parse_var(any_token=True) 3232 3233 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3234 # each INTERVAL expression into this canonical form so it's easy to transpile 3235 if this and this.is_number: 3236 this = exp.Literal.string(this.name) 3237 elif this and this.is_string: 3238 parts = this.name.split() 3239 3240 if len(parts) == 2: 3241 if unit: 3242 # This is not actually a unit, it's something else (e.g. a "window side") 3243 unit = None 3244 self._retreat(self._index - 1) 3245 3246 this = exp.Literal.string(parts[0]) 3247 unit = self.expression(exp.Var, this=parts[1]) 3248 3249 return self.expression(exp.Interval, this=this, unit=unit) 3250 3251 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3252 this = self._parse_term() 3253 3254 while True: 3255 if self._match_set(self.BITWISE): 3256 this = self.expression( 3257 self.BITWISE[self._prev.token_type], 3258 this=this, 3259 expression=self._parse_term(), 3260 ) 3261 elif self._match(TokenType.DQMARK): 3262 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3263 elif self._match_pair(TokenType.LT, TokenType.LT): 3264 this = self.expression( 3265 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3266 ) 3267 elif self._match_pair(TokenType.GT, TokenType.GT): 3268 this = self.expression( 3269 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3270 ) 3271 else: 3272 break 3273 3274 return this 3275 3276 def _parse_term(self) -> t.Optional[exp.Expression]: 3277 return self._parse_tokens(self._parse_factor, self.TERM) 3278 3279 def _parse_factor(self) -> t.Optional[exp.Expression]: 3280 return self._parse_tokens(self._parse_unary, self.FACTOR) 3281 3282 def _parse_unary(self) -> t.Optional[exp.Expression]: 3283 if self._match_set(self.UNARY_PARSERS): 3284 return self.UNARY_PARSERS[self._prev.token_type](self) 3285 return self._parse_at_time_zone(self._parse_type()) 3286 3287 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3288 interval = parse_interval and self._parse_interval() 3289 if interval: 3290 return interval 3291 3292 index = self._index 3293 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3294 this = self._parse_column() 3295 3296 if data_type: 3297 if isinstance(this, exp.Literal): 3298 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3299 if parser: 3300 return parser(self, this, data_type) 3301 return self.expression(exp.Cast, this=this, to=data_type) 3302 if not data_type.expressions: 3303 self._retreat(index) 3304 return self._parse_column() 3305 return self._parse_column_ops(data_type) 3306 3307 return this and self._parse_column_ops(this) 3308 3309 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3310 this = self._parse_type() 3311 if not this: 3312 return None 3313 3314 return self.expression( 3315 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3316 ) 3317 3318 def _parse_types( 3319 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3320 ) -> t.Optional[exp.Expression]: 3321 index = self._index 3322 3323 prefix = self._match_text_seq("SYSUDTLIB", ".") 3324 3325 if not self._match_set(self.TYPE_TOKENS): 3326 identifier = allow_identifiers and self._parse_id_var( 3327 any_token=False, tokens=(TokenType.VAR,) 3328 ) 3329 3330 if identifier: 3331 tokens = self._tokenizer.tokenize(identifier.name) 3332 3333 if len(tokens) != 1: 3334 self.raise_error("Unexpected identifier", self._prev) 3335 3336 if tokens[0].token_type in self.TYPE_TOKENS: 3337 self._prev = tokens[0] 3338 elif self.SUPPORTS_USER_DEFINED_TYPES: 3339 type_name = identifier.name 3340 3341 while self._match(TokenType.DOT): 3342 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3343 3344 return exp.DataType.build(type_name, udt=True) 3345 else: 3346 return None 3347 else: 3348 return None 3349 3350 type_token = self._prev.token_type 3351 3352 if type_token == TokenType.PSEUDO_TYPE: 3353 return self.expression(exp.PseudoType, this=self._prev.text) 3354 3355 if type_token == TokenType.OBJECT_IDENTIFIER: 3356 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3357 3358 nested = type_token in self.NESTED_TYPE_TOKENS 3359 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3360 expressions = None 3361 maybe_func = False 3362 3363 if self._match(TokenType.L_PAREN): 3364 if is_struct: 3365 expressions = self._parse_csv(self._parse_struct_types) 3366 elif nested: 3367 expressions = self._parse_csv( 3368 lambda: self._parse_types( 3369 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3370 ) 3371 ) 3372 elif type_token in self.ENUM_TYPE_TOKENS: 3373 expressions = self._parse_csv(self._parse_equality) 3374 else: 3375 expressions = self._parse_csv(self._parse_type_size) 3376 3377 if not expressions or not self._match(TokenType.R_PAREN): 3378 self._retreat(index) 3379 return None 3380 3381 maybe_func = True 3382 3383 this: t.Optional[exp.Expression] = None 3384 values: t.Optional[t.List[exp.Expression]] = None 3385 3386 if nested and self._match(TokenType.LT): 3387 if is_struct: 3388 expressions = self._parse_csv(self._parse_struct_types) 3389 else: 3390 expressions = self._parse_csv( 3391 lambda: self._parse_types( 3392 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3393 ) 3394 ) 3395 3396 if not self._match(TokenType.GT): 3397 self.raise_error("Expecting >") 3398 3399 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3400 values = self._parse_csv(self._parse_conjunction) 3401 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3402 3403 if type_token in self.TIMESTAMPS: 3404 if self._match_text_seq("WITH", "TIME", "ZONE"): 3405 maybe_func = False 3406 tz_type = ( 3407 exp.DataType.Type.TIMETZ 3408 if type_token in self.TIMES 3409 else exp.DataType.Type.TIMESTAMPTZ 3410 ) 3411 this = exp.DataType(this=tz_type, expressions=expressions) 3412 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3413 maybe_func = False 3414 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3415 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3416 maybe_func = False 3417 elif type_token == TokenType.INTERVAL: 3418 unit = self._parse_var() 3419 3420 if self._match_text_seq("TO"): 3421 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3422 else: 3423 span = None 3424 3425 if span or not unit: 3426 this = self.expression( 3427 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3428 ) 3429 else: 3430 this = self.expression(exp.Interval, unit=unit) 3431 3432 if maybe_func and check_func: 3433 index2 = self._index 3434 peek = self._parse_string() 3435 3436 if not peek: 3437 self._retreat(index) 3438 return None 3439 3440 self._retreat(index2) 3441 3442 if not this: 3443 if self._match_text_seq("UNSIGNED"): 3444 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3445 if not unsigned_type_token: 3446 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3447 3448 type_token = unsigned_type_token or type_token 3449 3450 this = exp.DataType( 3451 this=exp.DataType.Type[type_token.value], 3452 expressions=expressions, 3453 nested=nested, 3454 values=values, 3455 prefix=prefix, 3456 ) 3457 3458 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3459 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3460 3461 return this 3462 3463 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3464 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3465 self._match(TokenType.COLON) 3466 return self._parse_column_def(this) 3467 3468 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3469 if not self._match_text_seq("AT", "TIME", "ZONE"): 3470 return this 3471 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3472 3473 def _parse_column(self) -> t.Optional[exp.Expression]: 3474 this = self._parse_field() 3475 if isinstance(this, exp.Identifier): 3476 this = self.expression(exp.Column, this=this) 3477 elif not this: 3478 return self._parse_bracket(this) 3479 return self._parse_column_ops(this) 3480 3481 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3482 this = self._parse_bracket(this) 3483 3484 while self._match_set(self.COLUMN_OPERATORS): 3485 op_token = self._prev.token_type 3486 op = self.COLUMN_OPERATORS.get(op_token) 3487 3488 if op_token == TokenType.DCOLON: 3489 field = self._parse_types() 3490 if not field: 3491 self.raise_error("Expected type") 3492 elif op and self._curr: 3493 self._advance() 3494 value = self._prev.text 3495 field = ( 3496 exp.Literal.number(value) 3497 if self._prev.token_type == TokenType.NUMBER 3498 else exp.Literal.string(value) 3499 ) 3500 else: 3501 field = self._parse_field(anonymous_func=True, any_token=True) 3502 3503 if isinstance(field, exp.Func): 3504 # bigquery allows function calls like x.y.count(...) 3505 # SAFE.SUBSTR(...) 3506 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3507 this = self._replace_columns_with_dots(this) 3508 3509 if op: 3510 this = op(self, this, field) 3511 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3512 this = self.expression( 3513 exp.Column, 3514 this=field, 3515 table=this.this, 3516 db=this.args.get("table"), 3517 catalog=this.args.get("db"), 3518 ) 3519 else: 3520 this = self.expression(exp.Dot, this=this, expression=field) 3521 this = self._parse_bracket(this) 3522 return this 3523 3524 def _parse_primary(self) -> t.Optional[exp.Expression]: 3525 if self._match_set(self.PRIMARY_PARSERS): 3526 token_type = self._prev.token_type 3527 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3528 3529 if token_type == TokenType.STRING: 3530 expressions = [primary] 3531 while self._match(TokenType.STRING): 3532 expressions.append(exp.Literal.string(self._prev.text)) 3533 3534 if len(expressions) > 1: 3535 return self.expression(exp.Concat, expressions=expressions) 3536 3537 return primary 3538 3539 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3540 return exp.Literal.number(f"0.{self._prev.text}") 3541 3542 if self._match(TokenType.L_PAREN): 3543 comments = self._prev_comments 3544 query = self._parse_select() 3545 3546 if query: 3547 expressions = [query] 3548 else: 3549 expressions = self._parse_expressions() 3550 3551 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3552 3553 if isinstance(this, exp.Subqueryable): 3554 this = self._parse_set_operations( 3555 self._parse_subquery(this=this, parse_alias=False) 3556 ) 3557 elif len(expressions) > 1: 3558 this = self.expression(exp.Tuple, expressions=expressions) 3559 else: 3560 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3561 3562 if this: 3563 this.add_comments(comments) 3564 3565 self._match_r_paren(expression=this) 3566 return this 3567 3568 return None 3569 3570 def _parse_field( 3571 self, 3572 any_token: bool = False, 3573 tokens: t.Optional[t.Collection[TokenType]] = None, 3574 anonymous_func: bool = False, 3575 ) -> t.Optional[exp.Expression]: 3576 return ( 3577 self._parse_primary() 3578 or self._parse_function(anonymous=anonymous_func) 3579 or self._parse_id_var(any_token=any_token, tokens=tokens) 3580 ) 3581 3582 def _parse_function( 3583 self, 3584 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3585 anonymous: bool = False, 3586 optional_parens: bool = True, 3587 ) -> t.Optional[exp.Expression]: 3588 if not self._curr: 3589 return None 3590 3591 token_type = self._curr.token_type 3592 this = self._curr.text 3593 upper = this.upper() 3594 3595 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3596 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3597 self._advance() 3598 return parser(self) 3599 3600 if not self._next or self._next.token_type != TokenType.L_PAREN: 3601 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3602 self._advance() 3603 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3604 3605 return None 3606 3607 if token_type not in self.FUNC_TOKENS: 3608 return None 3609 3610 self._advance(2) 3611 3612 parser = self.FUNCTION_PARSERS.get(upper) 3613 if parser and not anonymous: 3614 this = parser(self) 3615 else: 3616 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3617 3618 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3619 this = self.expression(subquery_predicate, this=self._parse_select()) 3620 self._match_r_paren() 3621 return this 3622 3623 if functions is None: 3624 functions = self.FUNCTIONS 3625 3626 function = functions.get(upper) 3627 3628 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3629 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3630 3631 if function and not anonymous: 3632 func = self.validate_expression(function(args), args) 3633 if not self.NORMALIZE_FUNCTIONS: 3634 func.meta["name"] = this 3635 this = func 3636 else: 3637 this = self.expression(exp.Anonymous, this=this, expressions=args) 3638 3639 self._match_r_paren(this) 3640 return self._parse_window(this) 3641 3642 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3643 return self._parse_column_def(self._parse_id_var()) 3644 3645 def _parse_user_defined_function( 3646 self, kind: t.Optional[TokenType] = None 3647 ) -> t.Optional[exp.Expression]: 3648 this = self._parse_id_var() 3649 3650 while self._match(TokenType.DOT): 3651 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3652 3653 if not self._match(TokenType.L_PAREN): 3654 return this 3655 3656 expressions = self._parse_csv(self._parse_function_parameter) 3657 self._match_r_paren() 3658 return self.expression( 3659 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3660 ) 3661 3662 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3663 literal = self._parse_primary() 3664 if literal: 3665 return self.expression(exp.Introducer, this=token.text, expression=literal) 3666 3667 return self.expression(exp.Identifier, this=token.text) 3668 3669 def _parse_session_parameter(self) -> exp.SessionParameter: 3670 kind = None 3671 this = self._parse_id_var() or self._parse_primary() 3672 3673 if this and self._match(TokenType.DOT): 3674 kind = this.name 3675 this = self._parse_var() or self._parse_primary() 3676 3677 return self.expression(exp.SessionParameter, this=this, kind=kind) 3678 3679 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3680 index = self._index 3681 3682 if self._match(TokenType.L_PAREN): 3683 expressions = t.cast( 3684 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3685 ) 3686 3687 if not self._match(TokenType.R_PAREN): 3688 self._retreat(index) 3689 else: 3690 expressions = [self._parse_id_var()] 3691 3692 if self._match_set(self.LAMBDAS): 3693 return self.LAMBDAS[self._prev.token_type](self, expressions) 3694 3695 self._retreat(index) 3696 3697 this: t.Optional[exp.Expression] 3698 3699 if self._match(TokenType.DISTINCT): 3700 this = self.expression( 3701 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3702 ) 3703 else: 3704 this = self._parse_select_or_expression(alias=alias) 3705 3706 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3707 3708 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3709 index = self._index 3710 3711 if not self.errors: 3712 try: 3713 if self._parse_select(nested=True): 3714 return this 3715 except ParseError: 3716 pass 3717 finally: 3718 self.errors.clear() 3719 self._retreat(index) 3720 3721 if not self._match(TokenType.L_PAREN): 3722 return this 3723 3724 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3725 3726 self._match_r_paren() 3727 return self.expression(exp.Schema, this=this, expressions=args) 3728 3729 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3730 return self._parse_column_def(self._parse_field(any_token=True)) 3731 3732 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3733 # column defs are not really columns, they're identifiers 3734 if isinstance(this, exp.Column): 3735 this = this.this 3736 3737 kind = self._parse_types(schema=True) 3738 3739 if self._match_text_seq("FOR", "ORDINALITY"): 3740 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3741 3742 constraints: t.List[exp.Expression] = [] 3743 3744 if not kind and self._match(TokenType.ALIAS): 3745 constraints.append( 3746 self.expression( 3747 exp.ComputedColumnConstraint, 3748 this=self._parse_conjunction(), 3749 persisted=self._match_text_seq("PERSISTED"), 3750 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3751 ) 3752 ) 3753 3754 while True: 3755 constraint = self._parse_column_constraint() 3756 if not constraint: 3757 break 3758 constraints.append(constraint) 3759 3760 if not kind and not constraints: 3761 return this 3762 3763 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3764 3765 def _parse_auto_increment( 3766 self, 3767 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3768 start = None 3769 increment = None 3770 3771 if self._match(TokenType.L_PAREN, advance=False): 3772 args = self._parse_wrapped_csv(self._parse_bitwise) 3773 start = seq_get(args, 0) 3774 increment = seq_get(args, 1) 3775 elif self._match_text_seq("START"): 3776 start = self._parse_bitwise() 3777 self._match_text_seq("INCREMENT") 3778 increment = self._parse_bitwise() 3779 3780 if start and increment: 3781 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3782 3783 return exp.AutoIncrementColumnConstraint() 3784 3785 def _parse_compress(self) -> exp.CompressColumnConstraint: 3786 if self._match(TokenType.L_PAREN, advance=False): 3787 return self.expression( 3788 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3789 ) 3790 3791 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3792 3793 def _parse_generated_as_identity( 3794 self, 3795 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3796 if self._match_text_seq("BY", "DEFAULT"): 3797 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3798 this = self.expression( 3799 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3800 ) 3801 else: 3802 self._match_text_seq("ALWAYS") 3803 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3804 3805 self._match(TokenType.ALIAS) 3806 identity = self._match_text_seq("IDENTITY") 3807 3808 if self._match(TokenType.L_PAREN): 3809 if self._match(TokenType.START_WITH): 3810 this.set("start", self._parse_bitwise()) 3811 if self._match_text_seq("INCREMENT", "BY"): 3812 this.set("increment", self._parse_bitwise()) 3813 if self._match_text_seq("MINVALUE"): 3814 this.set("minvalue", self._parse_bitwise()) 3815 if self._match_text_seq("MAXVALUE"): 3816 this.set("maxvalue", self._parse_bitwise()) 3817 3818 if self._match_text_seq("CYCLE"): 3819 this.set("cycle", True) 3820 elif self._match_text_seq("NO", "CYCLE"): 3821 this.set("cycle", False) 3822 3823 if not identity: 3824 this.set("expression", self._parse_bitwise()) 3825 3826 self._match_r_paren() 3827 3828 return this 3829 3830 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3831 self._match_text_seq("LENGTH") 3832 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3833 3834 def _parse_not_constraint( 3835 self, 3836 ) -> t.Optional[exp.Expression]: 3837 if self._match_text_seq("NULL"): 3838 return self.expression(exp.NotNullColumnConstraint) 3839 if self._match_text_seq("CASESPECIFIC"): 3840 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3841 if self._match_text_seq("FOR", "REPLICATION"): 3842 return self.expression(exp.NotForReplicationColumnConstraint) 3843 return None 3844 3845 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3846 if self._match(TokenType.CONSTRAINT): 3847 this = self._parse_id_var() 3848 else: 3849 this = None 3850 3851 if self._match_texts(self.CONSTRAINT_PARSERS): 3852 return self.expression( 3853 exp.ColumnConstraint, 3854 this=this, 3855 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3856 ) 3857 3858 return this 3859 3860 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3861 if not self._match(TokenType.CONSTRAINT): 3862 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3863 3864 this = self._parse_id_var() 3865 expressions = [] 3866 3867 while True: 3868 constraint = self._parse_unnamed_constraint() or self._parse_function() 3869 if not constraint: 3870 break 3871 expressions.append(constraint) 3872 3873 return self.expression(exp.Constraint, this=this, expressions=expressions) 3874 3875 def _parse_unnamed_constraint( 3876 self, constraints: t.Optional[t.Collection[str]] = None 3877 ) -> t.Optional[exp.Expression]: 3878 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3879 return None 3880 3881 constraint = self._prev.text.upper() 3882 if constraint not in self.CONSTRAINT_PARSERS: 3883 self.raise_error(f"No parser found for schema constraint {constraint}.") 3884 3885 return self.CONSTRAINT_PARSERS[constraint](self) 3886 3887 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3888 self._match_text_seq("KEY") 3889 return self.expression( 3890 exp.UniqueColumnConstraint, 3891 this=self._parse_schema(self._parse_id_var(any_token=False)), 3892 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3893 ) 3894 3895 def _parse_key_constraint_options(self) -> t.List[str]: 3896 options = [] 3897 while True: 3898 if not self._curr: 3899 break 3900 3901 if self._match(TokenType.ON): 3902 action = None 3903 on = self._advance_any() and self._prev.text 3904 3905 if self._match_text_seq("NO", "ACTION"): 3906 action = "NO ACTION" 3907 elif self._match_text_seq("CASCADE"): 3908 action = "CASCADE" 3909 elif self._match_text_seq("RESTRICT"): 3910 action = "RESTRICT" 3911 elif self._match_pair(TokenType.SET, TokenType.NULL): 3912 action = "SET NULL" 3913 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3914 action = "SET DEFAULT" 3915 else: 3916 self.raise_error("Invalid key constraint") 3917 3918 options.append(f"ON {on} {action}") 3919 elif self._match_text_seq("NOT", "ENFORCED"): 3920 options.append("NOT ENFORCED") 3921 elif self._match_text_seq("DEFERRABLE"): 3922 options.append("DEFERRABLE") 3923 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3924 options.append("INITIALLY DEFERRED") 3925 elif self._match_text_seq("NORELY"): 3926 options.append("NORELY") 3927 elif self._match_text_seq("MATCH", "FULL"): 3928 options.append("MATCH FULL") 3929 else: 3930 break 3931 3932 return options 3933 3934 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3935 if match and not self._match(TokenType.REFERENCES): 3936 return None 3937 3938 expressions = None 3939 this = self._parse_table(schema=True) 3940 options = self._parse_key_constraint_options() 3941 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3942 3943 def _parse_foreign_key(self) -> exp.ForeignKey: 3944 expressions = self._parse_wrapped_id_vars() 3945 reference = self._parse_references() 3946 options = {} 3947 3948 while self._match(TokenType.ON): 3949 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3950 self.raise_error("Expected DELETE or UPDATE") 3951 3952 kind = self._prev.text.lower() 3953 3954 if self._match_text_seq("NO", "ACTION"): 3955 action = "NO ACTION" 3956 elif self._match(TokenType.SET): 3957 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3958 action = "SET " + self._prev.text.upper() 3959 else: 3960 self._advance() 3961 action = self._prev.text.upper() 3962 3963 options[kind] = action 3964 3965 return self.expression( 3966 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3967 ) 3968 3969 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3970 return self._parse_field() 3971 3972 def _parse_primary_key( 3973 self, wrapped_optional: bool = False, in_props: bool = False 3974 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3975 desc = ( 3976 self._match_set((TokenType.ASC, TokenType.DESC)) 3977 and self._prev.token_type == TokenType.DESC 3978 ) 3979 3980 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3981 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3982 3983 expressions = self._parse_wrapped_csv( 3984 self._parse_primary_key_part, optional=wrapped_optional 3985 ) 3986 options = self._parse_key_constraint_options() 3987 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3988 3989 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3990 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3991 return this 3992 3993 bracket_kind = self._prev.token_type 3994 3995 if self._match(TokenType.COLON): 3996 expressions: t.List[exp.Expression] = [ 3997 self.expression(exp.Slice, expression=self._parse_conjunction()) 3998 ] 3999 else: 4000 expressions = self._parse_csv( 4001 lambda: self._parse_slice( 4002 self._parse_alias(self._parse_conjunction(), explicit=True) 4003 ) 4004 ) 4005 4006 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4007 if bracket_kind == TokenType.L_BRACE: 4008 this = self.expression(exp.Struct, expressions=expressions) 4009 elif not this or this.name.upper() == "ARRAY": 4010 this = self.expression(exp.Array, expressions=expressions) 4011 else: 4012 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4013 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4014 4015 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4016 self.raise_error("Expected ]") 4017 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4018 self.raise_error("Expected }") 4019 4020 self._add_comments(this) 4021 return self._parse_bracket(this) 4022 4023 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4024 if self._match(TokenType.COLON): 4025 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4026 return this 4027 4028 def _parse_case(self) -> t.Optional[exp.Expression]: 4029 ifs = [] 4030 default = None 4031 4032 comments = self._prev_comments 4033 expression = self._parse_conjunction() 4034 4035 while self._match(TokenType.WHEN): 4036 this = self._parse_conjunction() 4037 self._match(TokenType.THEN) 4038 then = self._parse_conjunction() 4039 ifs.append(self.expression(exp.If, this=this, true=then)) 4040 4041 if self._match(TokenType.ELSE): 4042 default = self._parse_conjunction() 4043 4044 if not self._match(TokenType.END): 4045 self.raise_error("Expected END after CASE", self._prev) 4046 4047 return self._parse_window( 4048 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4049 ) 4050 4051 def _parse_if(self) -> t.Optional[exp.Expression]: 4052 if self._match(TokenType.L_PAREN): 4053 args = self._parse_csv(self._parse_conjunction) 4054 this = self.validate_expression(exp.If.from_arg_list(args), args) 4055 self._match_r_paren() 4056 else: 4057 index = self._index - 1 4058 condition = self._parse_conjunction() 4059 4060 if not condition: 4061 self._retreat(index) 4062 return None 4063 4064 self._match(TokenType.THEN) 4065 true = self._parse_conjunction() 4066 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4067 self._match(TokenType.END) 4068 this = self.expression(exp.If, this=condition, true=true, false=false) 4069 4070 return self._parse_window(this) 4071 4072 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4073 if not self._match_text_seq("VALUE", "FOR"): 4074 self._retreat(self._index - 1) 4075 return None 4076 4077 return self.expression( 4078 exp.NextValueFor, 4079 this=self._parse_column(), 4080 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4081 ) 4082 4083 def _parse_extract(self) -> exp.Extract: 4084 this = self._parse_function() or self._parse_var() or self._parse_type() 4085 4086 if self._match(TokenType.FROM): 4087 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4088 4089 if not self._match(TokenType.COMMA): 4090 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4091 4092 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4093 4094 def _parse_any_value(self) -> exp.AnyValue: 4095 this = self._parse_lambda() 4096 is_max = None 4097 having = None 4098 4099 if self._match(TokenType.HAVING): 4100 self._match_texts(("MAX", "MIN")) 4101 is_max = self._prev.text == "MAX" 4102 having = self._parse_column() 4103 4104 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4105 4106 def _parse_cast(self, strict: bool) -> exp.Expression: 4107 this = self._parse_conjunction() 4108 4109 if not self._match(TokenType.ALIAS): 4110 if self._match(TokenType.COMMA): 4111 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4112 4113 self.raise_error("Expected AS after CAST") 4114 4115 fmt = None 4116 to = self._parse_types() 4117 4118 if not to: 4119 self.raise_error("Expected TYPE after CAST") 4120 elif isinstance(to, exp.Identifier): 4121 to = exp.DataType.build(to.name, udt=True) 4122 elif to.this == exp.DataType.Type.CHAR: 4123 if self._match(TokenType.CHARACTER_SET): 4124 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4125 elif self._match(TokenType.FORMAT): 4126 fmt_string = self._parse_string() 4127 fmt = self._parse_at_time_zone(fmt_string) 4128 4129 if to.this in exp.DataType.TEMPORAL_TYPES: 4130 this = self.expression( 4131 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4132 this=this, 4133 format=exp.Literal.string( 4134 format_time( 4135 fmt_string.this if fmt_string else "", 4136 self.FORMAT_MAPPING or self.TIME_MAPPING, 4137 self.FORMAT_TRIE or self.TIME_TRIE, 4138 ) 4139 ), 4140 ) 4141 4142 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4143 this.set("zone", fmt.args["zone"]) 4144 4145 return this 4146 4147 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4148 4149 def _parse_concat(self) -> t.Optional[exp.Expression]: 4150 args = self._parse_csv(self._parse_conjunction) 4151 if self.CONCAT_NULL_OUTPUTS_STRING: 4152 args = self._ensure_string_if_null(args) 4153 4154 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4155 # we find such a call we replace it with its argument. 4156 if len(args) == 1: 4157 return args[0] 4158 4159 return self.expression( 4160 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4161 ) 4162 4163 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4164 args = self._parse_csv(self._parse_conjunction) 4165 if len(args) < 2: 4166 return self.expression(exp.ConcatWs, expressions=args) 4167 delim, *values = args 4168 if self.CONCAT_NULL_OUTPUTS_STRING: 4169 values = self._ensure_string_if_null(values) 4170 4171 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4172 4173 def _parse_string_agg(self) -> exp.Expression: 4174 if self._match(TokenType.DISTINCT): 4175 args: t.List[t.Optional[exp.Expression]] = [ 4176 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4177 ] 4178 if self._match(TokenType.COMMA): 4179 args.extend(self._parse_csv(self._parse_conjunction)) 4180 else: 4181 args = self._parse_csv(self._parse_conjunction) # type: ignore 4182 4183 index = self._index 4184 if not self._match(TokenType.R_PAREN) and args: 4185 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4186 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4187 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4188 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4189 4190 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4191 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4192 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4193 if not self._match_text_seq("WITHIN", "GROUP"): 4194 self._retreat(index) 4195 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4196 4197 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4198 order = self._parse_order(this=seq_get(args, 0)) 4199 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4200 4201 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4202 this = self._parse_bitwise() 4203 4204 if self._match(TokenType.USING): 4205 to: t.Optional[exp.Expression] = self.expression( 4206 exp.CharacterSet, this=self._parse_var() 4207 ) 4208 elif self._match(TokenType.COMMA): 4209 to = self._parse_types() 4210 else: 4211 to = None 4212 4213 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4214 4215 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4216 """ 4217 There are generally two variants of the DECODE function: 4218 4219 - DECODE(bin, charset) 4220 - DECODE(expression, search, result [, search, result] ... [, default]) 4221 4222 The second variant will always be parsed into a CASE expression. Note that NULL 4223 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4224 instead of relying on pattern matching. 4225 """ 4226 args = self._parse_csv(self._parse_conjunction) 4227 4228 if len(args) < 3: 4229 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4230 4231 expression, *expressions = args 4232 if not expression: 4233 return None 4234 4235 ifs = [] 4236 for search, result in zip(expressions[::2], expressions[1::2]): 4237 if not search or not result: 4238 return None 4239 4240 if isinstance(search, exp.Literal): 4241 ifs.append( 4242 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4243 ) 4244 elif isinstance(search, exp.Null): 4245 ifs.append( 4246 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4247 ) 4248 else: 4249 cond = exp.or_( 4250 exp.EQ(this=expression.copy(), expression=search), 4251 exp.and_( 4252 exp.Is(this=expression.copy(), expression=exp.Null()), 4253 exp.Is(this=search.copy(), expression=exp.Null()), 4254 copy=False, 4255 ), 4256 copy=False, 4257 ) 4258 ifs.append(exp.If(this=cond, true=result)) 4259 4260 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4261 4262 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4263 self._match_text_seq("KEY") 4264 key = self._parse_column() 4265 self._match_set((TokenType.COLON, TokenType.COMMA)) 4266 self._match_text_seq("VALUE") 4267 value = self._parse_bitwise() 4268 4269 if not key and not value: 4270 return None 4271 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4272 4273 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4274 if not this or not self._match_text_seq("FORMAT", "JSON"): 4275 return this 4276 4277 return self.expression(exp.FormatJson, this=this) 4278 4279 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4280 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4281 for value in values: 4282 if self._match_text_seq(value, "ON", on): 4283 return f"{value} ON {on}" 4284 4285 return None 4286 4287 def _parse_json_object(self) -> exp.JSONObject: 4288 star = self._parse_star() 4289 expressions = ( 4290 [star] 4291 if star 4292 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4293 ) 4294 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4295 4296 unique_keys = None 4297 if self._match_text_seq("WITH", "UNIQUE"): 4298 unique_keys = True 4299 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4300 unique_keys = False 4301 4302 self._match_text_seq("KEYS") 4303 4304 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4305 self._parse_type() 4306 ) 4307 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4308 4309 return self.expression( 4310 exp.JSONObject, 4311 expressions=expressions, 4312 null_handling=null_handling, 4313 unique_keys=unique_keys, 4314 return_type=return_type, 4315 encoding=encoding, 4316 ) 4317 4318 def _parse_logarithm(self) -> exp.Func: 4319 # Default argument order is base, expression 4320 args = self._parse_csv(self._parse_range) 4321 4322 if len(args) > 1: 4323 if not self.LOG_BASE_FIRST: 4324 args.reverse() 4325 return exp.Log.from_arg_list(args) 4326 4327 return self.expression( 4328 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4329 ) 4330 4331 def _parse_match_against(self) -> exp.MatchAgainst: 4332 expressions = self._parse_csv(self._parse_column) 4333 4334 self._match_text_seq(")", "AGAINST", "(") 4335 4336 this = self._parse_string() 4337 4338 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4339 modifier = "IN NATURAL LANGUAGE MODE" 4340 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4341 modifier = f"{modifier} WITH QUERY EXPANSION" 4342 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4343 modifier = "IN BOOLEAN MODE" 4344 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4345 modifier = "WITH QUERY EXPANSION" 4346 else: 4347 modifier = None 4348 4349 return self.expression( 4350 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4351 ) 4352 4353 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4354 def _parse_open_json(self) -> exp.OpenJSON: 4355 this = self._parse_bitwise() 4356 path = self._match(TokenType.COMMA) and self._parse_string() 4357 4358 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4359 this = self._parse_field(any_token=True) 4360 kind = self._parse_types() 4361 path = self._parse_string() 4362 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4363 4364 return self.expression( 4365 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4366 ) 4367 4368 expressions = None 4369 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4370 self._match_l_paren() 4371 expressions = self._parse_csv(_parse_open_json_column_def) 4372 4373 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4374 4375 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4376 args = self._parse_csv(self._parse_bitwise) 4377 4378 if self._match(TokenType.IN): 4379 return self.expression( 4380 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4381 ) 4382 4383 if haystack_first: 4384 haystack = seq_get(args, 0) 4385 needle = seq_get(args, 1) 4386 else: 4387 needle = seq_get(args, 0) 4388 haystack = seq_get(args, 1) 4389 4390 return self.expression( 4391 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4392 ) 4393 4394 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4395 args = self._parse_csv(self._parse_table) 4396 return exp.JoinHint(this=func_name.upper(), expressions=args) 4397 4398 def _parse_substring(self) -> exp.Substring: 4399 # Postgres supports the form: substring(string [from int] [for int]) 4400 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4401 4402 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4403 4404 if self._match(TokenType.FROM): 4405 args.append(self._parse_bitwise()) 4406 if self._match(TokenType.FOR): 4407 args.append(self._parse_bitwise()) 4408 4409 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4410 4411 def _parse_trim(self) -> exp.Trim: 4412 # https://www.w3resource.com/sql/character-functions/trim.php 4413 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4414 4415 position = None 4416 collation = None 4417 expression = None 4418 4419 if self._match_texts(self.TRIM_TYPES): 4420 position = self._prev.text.upper() 4421 4422 this = self._parse_bitwise() 4423 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4424 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4425 expression = self._parse_bitwise() 4426 4427 if invert_order: 4428 this, expression = expression, this 4429 4430 if self._match(TokenType.COLLATE): 4431 collation = self._parse_bitwise() 4432 4433 return self.expression( 4434 exp.Trim, this=this, position=position, expression=expression, collation=collation 4435 ) 4436 4437 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4438 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4439 4440 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4441 return self._parse_window(self._parse_id_var(), alias=True) 4442 4443 def _parse_respect_or_ignore_nulls( 4444 self, this: t.Optional[exp.Expression] 4445 ) -> t.Optional[exp.Expression]: 4446 if self._match_text_seq("IGNORE", "NULLS"): 4447 return self.expression(exp.IgnoreNulls, this=this) 4448 if self._match_text_seq("RESPECT", "NULLS"): 4449 return self.expression(exp.RespectNulls, this=this) 4450 return this 4451 4452 def _parse_window( 4453 self, this: t.Optional[exp.Expression], alias: bool = False 4454 ) -> t.Optional[exp.Expression]: 4455 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4456 self._match(TokenType.WHERE) 4457 this = self.expression( 4458 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4459 ) 4460 self._match_r_paren() 4461 4462 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4463 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4464 if self._match_text_seq("WITHIN", "GROUP"): 4465 order = self._parse_wrapped(self._parse_order) 4466 this = self.expression(exp.WithinGroup, this=this, expression=order) 4467 4468 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4469 # Some dialects choose to implement and some do not. 4470 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4471 4472 # There is some code above in _parse_lambda that handles 4473 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4474 4475 # The below changes handle 4476 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4477 4478 # Oracle allows both formats 4479 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4480 # and Snowflake chose to do the same for familiarity 4481 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4482 this = self._parse_respect_or_ignore_nulls(this) 4483 4484 # bigquery select from window x AS (partition by ...) 4485 if alias: 4486 over = None 4487 self._match(TokenType.ALIAS) 4488 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4489 return this 4490 else: 4491 over = self._prev.text.upper() 4492 4493 if not self._match(TokenType.L_PAREN): 4494 return self.expression( 4495 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4496 ) 4497 4498 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4499 4500 first = self._match(TokenType.FIRST) 4501 if self._match_text_seq("LAST"): 4502 first = False 4503 4504 partition, order = self._parse_partition_and_order() 4505 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4506 4507 if kind: 4508 self._match(TokenType.BETWEEN) 4509 start = self._parse_window_spec() 4510 self._match(TokenType.AND) 4511 end = self._parse_window_spec() 4512 4513 spec = self.expression( 4514 exp.WindowSpec, 4515 kind=kind, 4516 start=start["value"], 4517 start_side=start["side"], 4518 end=end["value"], 4519 end_side=end["side"], 4520 ) 4521 else: 4522 spec = None 4523 4524 self._match_r_paren() 4525 4526 window = self.expression( 4527 exp.Window, 4528 this=this, 4529 partition_by=partition, 4530 order=order, 4531 spec=spec, 4532 alias=window_alias, 4533 over=over, 4534 first=first, 4535 ) 4536 4537 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4538 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4539 return self._parse_window(window, alias=alias) 4540 4541 return window 4542 4543 def _parse_partition_and_order( 4544 self, 4545 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4546 return self._parse_partition_by(), self._parse_order() 4547 4548 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4549 self._match(TokenType.BETWEEN) 4550 4551 return { 4552 "value": ( 4553 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4554 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4555 or self._parse_bitwise() 4556 ), 4557 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4558 } 4559 4560 def _parse_alias( 4561 self, this: t.Optional[exp.Expression], explicit: bool = False 4562 ) -> t.Optional[exp.Expression]: 4563 any_token = self._match(TokenType.ALIAS) 4564 4565 if explicit and not any_token: 4566 return this 4567 4568 if self._match(TokenType.L_PAREN): 4569 aliases = self.expression( 4570 exp.Aliases, 4571 this=this, 4572 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4573 ) 4574 self._match_r_paren(aliases) 4575 return aliases 4576 4577 alias = self._parse_id_var(any_token) 4578 4579 if alias: 4580 return self.expression(exp.Alias, this=this, alias=alias) 4581 4582 return this 4583 4584 def _parse_id_var( 4585 self, 4586 any_token: bool = True, 4587 tokens: t.Optional[t.Collection[TokenType]] = None, 4588 ) -> t.Optional[exp.Expression]: 4589 identifier = self._parse_identifier() 4590 4591 if identifier: 4592 return identifier 4593 4594 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4595 quoted = self._prev.token_type == TokenType.STRING 4596 return exp.Identifier(this=self._prev.text, quoted=quoted) 4597 4598 return None 4599 4600 def _parse_string(self) -> t.Optional[exp.Expression]: 4601 if self._match(TokenType.STRING): 4602 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4603 return self._parse_placeholder() 4604 4605 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4606 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4607 4608 def _parse_number(self) -> t.Optional[exp.Expression]: 4609 if self._match(TokenType.NUMBER): 4610 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4611 return self._parse_placeholder() 4612 4613 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4614 if self._match(TokenType.IDENTIFIER): 4615 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4616 return self._parse_placeholder() 4617 4618 def _parse_var( 4619 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4620 ) -> t.Optional[exp.Expression]: 4621 if ( 4622 (any_token and self._advance_any()) 4623 or self._match(TokenType.VAR) 4624 or (self._match_set(tokens) if tokens else False) 4625 ): 4626 return self.expression(exp.Var, this=self._prev.text) 4627 return self._parse_placeholder() 4628 4629 def _advance_any(self) -> t.Optional[Token]: 4630 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4631 self._advance() 4632 return self._prev 4633 return None 4634 4635 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4636 return self._parse_var() or self._parse_string() 4637 4638 def _parse_null(self) -> t.Optional[exp.Expression]: 4639 if self._match_set(self.NULL_TOKENS): 4640 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4641 return self._parse_placeholder() 4642 4643 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4644 if self._match(TokenType.TRUE): 4645 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4646 if self._match(TokenType.FALSE): 4647 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4648 return self._parse_placeholder() 4649 4650 def _parse_star(self) -> t.Optional[exp.Expression]: 4651 if self._match(TokenType.STAR): 4652 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4653 return self._parse_placeholder() 4654 4655 def _parse_parameter(self) -> exp.Parameter: 4656 wrapped = self._match(TokenType.L_BRACE) 4657 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4658 self._match(TokenType.R_BRACE) 4659 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4660 4661 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4662 if self._match_set(self.PLACEHOLDER_PARSERS): 4663 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4664 if placeholder: 4665 return placeholder 4666 self._advance(-1) 4667 return None 4668 4669 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4670 if not self._match(TokenType.EXCEPT): 4671 return None 4672 if self._match(TokenType.L_PAREN, advance=False): 4673 return self._parse_wrapped_csv(self._parse_column) 4674 4675 except_column = self._parse_column() 4676 return [except_column] if except_column else None 4677 4678 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4679 if not self._match(TokenType.REPLACE): 4680 return None 4681 if self._match(TokenType.L_PAREN, advance=False): 4682 return self._parse_wrapped_csv(self._parse_expression) 4683 4684 replace_expression = self._parse_expression() 4685 return [replace_expression] if replace_expression else None 4686 4687 def _parse_csv( 4688 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4689 ) -> t.List[exp.Expression]: 4690 parse_result = parse_method() 4691 items = [parse_result] if parse_result is not None else [] 4692 4693 while self._match(sep): 4694 self._add_comments(parse_result) 4695 parse_result = parse_method() 4696 if parse_result is not None: 4697 items.append(parse_result) 4698 4699 return items 4700 4701 def _parse_tokens( 4702 self, parse_method: t.Callable, expressions: t.Dict 4703 ) -> t.Optional[exp.Expression]: 4704 this = parse_method() 4705 4706 while self._match_set(expressions): 4707 this = self.expression( 4708 expressions[self._prev.token_type], 4709 this=this, 4710 comments=self._prev_comments, 4711 expression=parse_method(), 4712 ) 4713 4714 return this 4715 4716 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4717 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4718 4719 def _parse_wrapped_csv( 4720 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4721 ) -> t.List[exp.Expression]: 4722 return self._parse_wrapped( 4723 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4724 ) 4725 4726 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4727 wrapped = self._match(TokenType.L_PAREN) 4728 if not wrapped and not optional: 4729 self.raise_error("Expecting (") 4730 parse_result = parse_method() 4731 if wrapped: 4732 self._match_r_paren() 4733 return parse_result 4734 4735 def _parse_expressions(self) -> t.List[exp.Expression]: 4736 return self._parse_csv(self._parse_expression) 4737 4738 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4739 return self._parse_select() or self._parse_set_operations( 4740 self._parse_expression() if alias else self._parse_conjunction() 4741 ) 4742 4743 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4744 return self._parse_query_modifiers( 4745 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4746 ) 4747 4748 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4749 this = None 4750 if self._match_texts(self.TRANSACTION_KIND): 4751 this = self._prev.text 4752 4753 self._match_texts({"TRANSACTION", "WORK"}) 4754 4755 modes = [] 4756 while True: 4757 mode = [] 4758 while self._match(TokenType.VAR): 4759 mode.append(self._prev.text) 4760 4761 if mode: 4762 modes.append(" ".join(mode)) 4763 if not self._match(TokenType.COMMA): 4764 break 4765 4766 return self.expression(exp.Transaction, this=this, modes=modes) 4767 4768 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4769 chain = None 4770 savepoint = None 4771 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4772 4773 self._match_texts({"TRANSACTION", "WORK"}) 4774 4775 if self._match_text_seq("TO"): 4776 self._match_text_seq("SAVEPOINT") 4777 savepoint = self._parse_id_var() 4778 4779 if self._match(TokenType.AND): 4780 chain = not self._match_text_seq("NO") 4781 self._match_text_seq("CHAIN") 4782 4783 if is_rollback: 4784 return self.expression(exp.Rollback, savepoint=savepoint) 4785 4786 return self.expression(exp.Commit, chain=chain) 4787 4788 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4789 if not self._match_text_seq("ADD"): 4790 return None 4791 4792 self._match(TokenType.COLUMN) 4793 exists_column = self._parse_exists(not_=True) 4794 expression = self._parse_field_def() 4795 4796 if expression: 4797 expression.set("exists", exists_column) 4798 4799 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4800 if self._match_texts(("FIRST", "AFTER")): 4801 position = self._prev.text 4802 column_position = self.expression( 4803 exp.ColumnPosition, this=self._parse_column(), position=position 4804 ) 4805 expression.set("position", column_position) 4806 4807 return expression 4808 4809 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4810 drop = self._match(TokenType.DROP) and self._parse_drop() 4811 if drop and not isinstance(drop, exp.Command): 4812 drop.set("kind", drop.args.get("kind", "COLUMN")) 4813 return drop 4814 4815 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4816 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4817 return self.expression( 4818 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4819 ) 4820 4821 def _parse_add_constraint(self) -> exp.AddConstraint: 4822 this = None 4823 kind = self._prev.token_type 4824 4825 if kind == TokenType.CONSTRAINT: 4826 this = self._parse_id_var() 4827 4828 if self._match_text_seq("CHECK"): 4829 expression = self._parse_wrapped(self._parse_conjunction) 4830 enforced = self._match_text_seq("ENFORCED") 4831 4832 return self.expression( 4833 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4834 ) 4835 4836 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4837 expression = self._parse_foreign_key() 4838 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4839 expression = self._parse_primary_key() 4840 else: 4841 expression = None 4842 4843 return self.expression(exp.AddConstraint, this=this, expression=expression) 4844 4845 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4846 index = self._index - 1 4847 4848 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4849 return self._parse_csv(self._parse_add_constraint) 4850 4851 self._retreat(index) 4852 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4853 return self._parse_csv(self._parse_field_def) 4854 4855 return self._parse_csv(self._parse_add_column) 4856 4857 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4858 self._match(TokenType.COLUMN) 4859 column = self._parse_field(any_token=True) 4860 4861 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4862 return self.expression(exp.AlterColumn, this=column, drop=True) 4863 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4864 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4865 4866 self._match_text_seq("SET", "DATA") 4867 return self.expression( 4868 exp.AlterColumn, 4869 this=column, 4870 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4871 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4872 using=self._match(TokenType.USING) and self._parse_conjunction(), 4873 ) 4874 4875 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4876 index = self._index - 1 4877 4878 partition_exists = self._parse_exists() 4879 if self._match(TokenType.PARTITION, advance=False): 4880 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4881 4882 self._retreat(index) 4883 return self._parse_csv(self._parse_drop_column) 4884 4885 def _parse_alter_table_rename(self) -> exp.RenameTable: 4886 self._match_text_seq("TO") 4887 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4888 4889 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4890 start = self._prev 4891 4892 if not self._match(TokenType.TABLE): 4893 return self._parse_as_command(start) 4894 4895 exists = self._parse_exists() 4896 only = self._match_text_seq("ONLY") 4897 this = self._parse_table(schema=True) 4898 4899 if self._next: 4900 self._advance() 4901 4902 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4903 if parser: 4904 actions = ensure_list(parser(self)) 4905 4906 if not self._curr: 4907 return self.expression( 4908 exp.AlterTable, 4909 this=this, 4910 exists=exists, 4911 actions=actions, 4912 only=only, 4913 ) 4914 4915 return self._parse_as_command(start) 4916 4917 def _parse_merge(self) -> exp.Merge: 4918 self._match(TokenType.INTO) 4919 target = self._parse_table() 4920 4921 if target and self._match(TokenType.ALIAS, advance=False): 4922 target.set("alias", self._parse_table_alias()) 4923 4924 self._match(TokenType.USING) 4925 using = self._parse_table() 4926 4927 self._match(TokenType.ON) 4928 on = self._parse_conjunction() 4929 4930 whens = [] 4931 while self._match(TokenType.WHEN): 4932 matched = not self._match(TokenType.NOT) 4933 self._match_text_seq("MATCHED") 4934 source = ( 4935 False 4936 if self._match_text_seq("BY", "TARGET") 4937 else self._match_text_seq("BY", "SOURCE") 4938 ) 4939 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4940 4941 self._match(TokenType.THEN) 4942 4943 if self._match(TokenType.INSERT): 4944 _this = self._parse_star() 4945 if _this: 4946 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4947 else: 4948 then = self.expression( 4949 exp.Insert, 4950 this=self._parse_value(), 4951 expression=self._match(TokenType.VALUES) and self._parse_value(), 4952 ) 4953 elif self._match(TokenType.UPDATE): 4954 expressions = self._parse_star() 4955 if expressions: 4956 then = self.expression(exp.Update, expressions=expressions) 4957 else: 4958 then = self.expression( 4959 exp.Update, 4960 expressions=self._match(TokenType.SET) 4961 and self._parse_csv(self._parse_equality), 4962 ) 4963 elif self._match(TokenType.DELETE): 4964 then = self.expression(exp.Var, this=self._prev.text) 4965 else: 4966 then = None 4967 4968 whens.append( 4969 self.expression( 4970 exp.When, 4971 matched=matched, 4972 source=source, 4973 condition=condition, 4974 then=then, 4975 ) 4976 ) 4977 4978 return self.expression( 4979 exp.Merge, 4980 this=target, 4981 using=using, 4982 on=on, 4983 expressions=whens, 4984 ) 4985 4986 def _parse_show(self) -> t.Optional[exp.Expression]: 4987 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4988 if parser: 4989 return parser(self) 4990 return self._parse_as_command(self._prev) 4991 4992 def _parse_set_item_assignment( 4993 self, kind: t.Optional[str] = None 4994 ) -> t.Optional[exp.Expression]: 4995 index = self._index 4996 4997 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4998 return self._parse_set_transaction(global_=kind == "GLOBAL") 4999 5000 left = self._parse_primary() or self._parse_id_var() 5001 assignment_delimiter = self._match_texts(("=", "TO")) 5002 5003 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5004 self._retreat(index) 5005 return None 5006 5007 right = self._parse_statement() or self._parse_id_var() 5008 this = self.expression(exp.EQ, this=left, expression=right) 5009 5010 return self.expression(exp.SetItem, this=this, kind=kind) 5011 5012 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5013 self._match_text_seq("TRANSACTION") 5014 characteristics = self._parse_csv( 5015 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5016 ) 5017 return self.expression( 5018 exp.SetItem, 5019 expressions=characteristics, 5020 kind="TRANSACTION", 5021 **{"global": global_}, # type: ignore 5022 ) 5023 5024 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5025 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5026 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5027 5028 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5029 index = self._index 5030 set_ = self.expression( 5031 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5032 ) 5033 5034 if self._curr: 5035 self._retreat(index) 5036 return self._parse_as_command(self._prev) 5037 5038 return set_ 5039 5040 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5041 for option in options: 5042 if self._match_text_seq(*option.split(" ")): 5043 return exp.var(option) 5044 return None 5045 5046 def _parse_as_command(self, start: Token) -> exp.Command: 5047 while self._curr: 5048 self._advance() 5049 text = self._find_sql(start, self._prev) 5050 size = len(start.text) 5051 return exp.Command(this=text[:size], expression=text[size:]) 5052 5053 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5054 settings = [] 5055 5056 self._match_l_paren() 5057 kind = self._parse_id_var() 5058 5059 if self._match(TokenType.L_PAREN): 5060 while True: 5061 key = self._parse_id_var() 5062 value = self._parse_primary() 5063 5064 if not key and value is None: 5065 break 5066 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5067 self._match(TokenType.R_PAREN) 5068 5069 self._match_r_paren() 5070 5071 return self.expression( 5072 exp.DictProperty, 5073 this=this, 5074 kind=kind.this if kind else None, 5075 settings=settings, 5076 ) 5077 5078 def _parse_dict_range(self, this: str) -> exp.DictRange: 5079 self._match_l_paren() 5080 has_min = self._match_text_seq("MIN") 5081 if has_min: 5082 min = self._parse_var() or self._parse_primary() 5083 self._match_text_seq("MAX") 5084 max = self._parse_var() or self._parse_primary() 5085 else: 5086 max = self._parse_var() or self._parse_primary() 5087 min = exp.Literal.number(0) 5088 self._match_r_paren() 5089 return self.expression(exp.DictRange, this=this, min=min, max=max) 5090 5091 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5092 index = self._index 5093 expression = self._parse_column() 5094 if not self._match(TokenType.IN): 5095 self._retreat(index - 1) 5096 return None 5097 iterator = self._parse_column() 5098 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5099 return self.expression( 5100 exp.Comprehension, 5101 this=this, 5102 expression=expression, 5103 iterator=iterator, 5104 condition=condition, 5105 ) 5106 5107 def _find_parser( 5108 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5109 ) -> t.Optional[t.Callable]: 5110 if not self._curr: 5111 return None 5112 5113 index = self._index 5114 this = [] 5115 while True: 5116 # The current token might be multiple words 5117 curr = self._curr.text.upper() 5118 key = curr.split(" ") 5119 this.append(curr) 5120 5121 self._advance() 5122 result, trie = in_trie(trie, key) 5123 if result == TrieResult.FAILED: 5124 break 5125 5126 if result == TrieResult.EXISTS: 5127 subparser = parsers[" ".join(this)] 5128 return subparser 5129 5130 self._retreat(index) 5131 return None 5132 5133 def _match(self, token_type, advance=True, expression=None): 5134 if not self._curr: 5135 return None 5136 5137 if self._curr.token_type == token_type: 5138 if advance: 5139 self._advance() 5140 self._add_comments(expression) 5141 return True 5142 5143 return None 5144 5145 def _match_set(self, types, advance=True): 5146 if not self._curr: 5147 return None 5148 5149 if self._curr.token_type in types: 5150 if advance: 5151 self._advance() 5152 return True 5153 5154 return None 5155 5156 def _match_pair(self, token_type_a, token_type_b, advance=True): 5157 if not self._curr or not self._next: 5158 return None 5159 5160 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5161 if advance: 5162 self._advance(2) 5163 return True 5164 5165 return None 5166 5167 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5168 if not self._match(TokenType.L_PAREN, expression=expression): 5169 self.raise_error("Expecting (") 5170 5171 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5172 if not self._match(TokenType.R_PAREN, expression=expression): 5173 self.raise_error("Expecting )") 5174 5175 def _match_texts(self, texts, advance=True): 5176 if self._curr and self._curr.text.upper() in texts: 5177 if advance: 5178 self._advance() 5179 return True 5180 return False 5181 5182 def _match_text_seq(self, *texts, advance=True): 5183 index = self._index 5184 for text in texts: 5185 if self._curr and self._curr.text.upper() == text: 5186 self._advance() 5187 else: 5188 self._retreat(index) 5189 return False 5190 5191 if not advance: 5192 self._retreat(index) 5193 5194 return True 5195 5196 @t.overload 5197 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5198 ... 5199 5200 @t.overload 5201 def _replace_columns_with_dots( 5202 self, this: t.Optional[exp.Expression] 5203 ) -> t.Optional[exp.Expression]: 5204 ... 5205 5206 def _replace_columns_with_dots(self, this): 5207 if isinstance(this, exp.Dot): 5208 exp.replace_children(this, self._replace_columns_with_dots) 5209 elif isinstance(this, exp.Column): 5210 exp.replace_children(this, self._replace_columns_with_dots) 5211 table = this.args.get("table") 5212 this = ( 5213 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5214 ) 5215 5216 return this 5217 5218 def _replace_lambda( 5219 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5220 ) -> t.Optional[exp.Expression]: 5221 if not node: 5222 return node 5223 5224 for column in node.find_all(exp.Column): 5225 if column.parts[0].name in lambda_variables: 5226 dot_or_id = column.to_dot() if column.table else column.this 5227 parent = column.parent 5228 5229 while isinstance(parent, exp.Dot): 5230 if not isinstance(parent.parent, exp.Dot): 5231 parent.replace(dot_or_id) 5232 break 5233 parent = parent.parent 5234 else: 5235 if column is node: 5236 node = dot_or_id 5237 else: 5238 column.replace(dot_or_id) 5239 return node 5240 5241 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5242 return [ 5243 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5244 for value in values 5245 if value 5246 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.UDECIMAL, 183 TokenType.BIGDECIMAL, 184 TokenType.UUID, 185 TokenType.GEOGRAPHY, 186 TokenType.GEOMETRY, 187 TokenType.HLLSKETCH, 188 TokenType.HSTORE, 189 TokenType.PSEUDO_TYPE, 190 TokenType.SUPER, 191 TokenType.SERIAL, 192 TokenType.SMALLSERIAL, 193 TokenType.BIGSERIAL, 194 TokenType.XML, 195 TokenType.YEAR, 196 TokenType.UNIQUEIDENTIFIER, 197 TokenType.USERDEFINED, 198 TokenType.MONEY, 199 TokenType.SMALLMONEY, 200 TokenType.ROWVERSION, 201 TokenType.IMAGE, 202 TokenType.VARIANT, 203 TokenType.OBJECT, 204 TokenType.OBJECT_IDENTIFIER, 205 TokenType.INET, 206 TokenType.IPADDRESS, 207 TokenType.IPPREFIX, 208 TokenType.UNKNOWN, 209 TokenType.NULL, 210 *ENUM_TYPE_TOKENS, 211 *NESTED_TYPE_TOKENS, 212 } 213 214 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 215 TokenType.BIGINT: TokenType.UBIGINT, 216 TokenType.INT: TokenType.UINT, 217 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 218 TokenType.SMALLINT: TokenType.USMALLINT, 219 TokenType.TINYINT: TokenType.UTINYINT, 220 TokenType.DECIMAL: TokenType.UDECIMAL, 221 } 222 223 SUBQUERY_PREDICATES = { 224 TokenType.ANY: exp.Any, 225 TokenType.ALL: exp.All, 226 TokenType.EXISTS: exp.Exists, 227 TokenType.SOME: exp.Any, 228 } 229 230 RESERVED_KEYWORDS = { 231 *Tokenizer.SINGLE_TOKENS.values(), 232 TokenType.SELECT, 233 } 234 235 DB_CREATABLES = { 236 TokenType.DATABASE, 237 TokenType.SCHEMA, 238 TokenType.TABLE, 239 TokenType.VIEW, 240 TokenType.DICTIONARY, 241 } 242 243 CREATABLES = { 244 TokenType.COLUMN, 245 TokenType.FUNCTION, 246 TokenType.INDEX, 247 TokenType.PROCEDURE, 248 *DB_CREATABLES, 249 } 250 251 # Tokens that can represent identifiers 252 ID_VAR_TOKENS = { 253 TokenType.VAR, 254 TokenType.ANTI, 255 TokenType.APPLY, 256 TokenType.ASC, 257 TokenType.AUTO_INCREMENT, 258 TokenType.BEGIN, 259 TokenType.CACHE, 260 TokenType.CASE, 261 TokenType.COLLATE, 262 TokenType.COMMAND, 263 TokenType.COMMENT, 264 TokenType.COMMIT, 265 TokenType.CONSTRAINT, 266 TokenType.DEFAULT, 267 TokenType.DELETE, 268 TokenType.DESC, 269 TokenType.DESCRIBE, 270 TokenType.DICTIONARY, 271 TokenType.DIV, 272 TokenType.END, 273 TokenType.EXECUTE, 274 TokenType.ESCAPE, 275 TokenType.FALSE, 276 TokenType.FIRST, 277 TokenType.FILTER, 278 TokenType.FORMAT, 279 TokenType.FULL, 280 TokenType.IS, 281 TokenType.ISNULL, 282 TokenType.INTERVAL, 283 TokenType.KEEP, 284 TokenType.KILL, 285 TokenType.LEFT, 286 TokenType.LOAD, 287 TokenType.MERGE, 288 TokenType.NATURAL, 289 TokenType.NEXT, 290 TokenType.OFFSET, 291 TokenType.ORDINALITY, 292 TokenType.OVERLAPS, 293 TokenType.OVERWRITE, 294 TokenType.PARTITION, 295 TokenType.PERCENT, 296 TokenType.PIVOT, 297 TokenType.PRAGMA, 298 TokenType.RANGE, 299 TokenType.REFERENCES, 300 TokenType.RIGHT, 301 TokenType.ROW, 302 TokenType.ROWS, 303 TokenType.SEMI, 304 TokenType.SET, 305 TokenType.SETTINGS, 306 TokenType.SHOW, 307 TokenType.TEMPORARY, 308 TokenType.TOP, 309 TokenType.TRUE, 310 TokenType.UNIQUE, 311 TokenType.UNPIVOT, 312 TokenType.UPDATE, 313 TokenType.VOLATILE, 314 TokenType.WINDOW, 315 *CREATABLES, 316 *SUBQUERY_PREDICATES, 317 *TYPE_TOKENS, 318 *NO_PAREN_FUNCTIONS, 319 } 320 321 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 322 323 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 324 TokenType.ANTI, 325 TokenType.APPLY, 326 TokenType.ASOF, 327 TokenType.FULL, 328 TokenType.LEFT, 329 TokenType.LOCK, 330 TokenType.NATURAL, 331 TokenType.OFFSET, 332 TokenType.RIGHT, 333 TokenType.SEMI, 334 TokenType.WINDOW, 335 } 336 337 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 338 339 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 340 341 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 342 343 FUNC_TOKENS = { 344 TokenType.COLLATE, 345 TokenType.COMMAND, 346 TokenType.CURRENT_DATE, 347 TokenType.CURRENT_DATETIME, 348 TokenType.CURRENT_TIMESTAMP, 349 TokenType.CURRENT_TIME, 350 TokenType.CURRENT_USER, 351 TokenType.FILTER, 352 TokenType.FIRST, 353 TokenType.FORMAT, 354 TokenType.GLOB, 355 TokenType.IDENTIFIER, 356 TokenType.INDEX, 357 TokenType.ISNULL, 358 TokenType.ILIKE, 359 TokenType.INSERT, 360 TokenType.LIKE, 361 TokenType.MERGE, 362 TokenType.OFFSET, 363 TokenType.PRIMARY_KEY, 364 TokenType.RANGE, 365 TokenType.REPLACE, 366 TokenType.RLIKE, 367 TokenType.ROW, 368 TokenType.UNNEST, 369 TokenType.VAR, 370 TokenType.LEFT, 371 TokenType.RIGHT, 372 TokenType.DATE, 373 TokenType.DATETIME, 374 TokenType.TABLE, 375 TokenType.TIMESTAMP, 376 TokenType.TIMESTAMPTZ, 377 TokenType.WINDOW, 378 TokenType.XOR, 379 *TYPE_TOKENS, 380 *SUBQUERY_PREDICATES, 381 } 382 383 CONJUNCTION = { 384 TokenType.AND: exp.And, 385 TokenType.OR: exp.Or, 386 } 387 388 EQUALITY = { 389 TokenType.EQ: exp.EQ, 390 TokenType.NEQ: exp.NEQ, 391 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 392 } 393 394 COMPARISON = { 395 TokenType.GT: exp.GT, 396 TokenType.GTE: exp.GTE, 397 TokenType.LT: exp.LT, 398 TokenType.LTE: exp.LTE, 399 } 400 401 BITWISE = { 402 TokenType.AMP: exp.BitwiseAnd, 403 TokenType.CARET: exp.BitwiseXor, 404 TokenType.PIPE: exp.BitwiseOr, 405 TokenType.DPIPE: exp.DPipe, 406 } 407 408 TERM = { 409 TokenType.DASH: exp.Sub, 410 TokenType.PLUS: exp.Add, 411 TokenType.MOD: exp.Mod, 412 TokenType.COLLATE: exp.Collate, 413 } 414 415 FACTOR = { 416 TokenType.DIV: exp.IntDiv, 417 TokenType.LR_ARROW: exp.Distance, 418 TokenType.SLASH: exp.Div, 419 TokenType.STAR: exp.Mul, 420 } 421 422 TIMES = { 423 TokenType.TIME, 424 TokenType.TIMETZ, 425 } 426 427 TIMESTAMPS = { 428 TokenType.TIMESTAMP, 429 TokenType.TIMESTAMPTZ, 430 TokenType.TIMESTAMPLTZ, 431 *TIMES, 432 } 433 434 SET_OPERATIONS = { 435 TokenType.UNION, 436 TokenType.INTERSECT, 437 TokenType.EXCEPT, 438 } 439 440 JOIN_METHODS = { 441 TokenType.NATURAL, 442 TokenType.ASOF, 443 } 444 445 JOIN_SIDES = { 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.FULL, 449 } 450 451 JOIN_KINDS = { 452 TokenType.INNER, 453 TokenType.OUTER, 454 TokenType.CROSS, 455 TokenType.SEMI, 456 TokenType.ANTI, 457 } 458 459 JOIN_HINTS: t.Set[str] = set() 460 461 LAMBDAS = { 462 TokenType.ARROW: lambda self, expressions: self.expression( 463 exp.Lambda, 464 this=self._replace_lambda( 465 self._parse_conjunction(), 466 {node.name for node in expressions}, 467 ), 468 expressions=expressions, 469 ), 470 TokenType.FARROW: lambda self, expressions: self.expression( 471 exp.Kwarg, 472 this=exp.var(expressions[0].name), 473 expression=self._parse_conjunction(), 474 ), 475 } 476 477 COLUMN_OPERATORS = { 478 TokenType.DOT: None, 479 TokenType.DCOLON: lambda self, this, to: self.expression( 480 exp.Cast if self.STRICT_CAST else exp.TryCast, 481 this=this, 482 to=to, 483 ), 484 TokenType.ARROW: lambda self, this, path: self.expression( 485 exp.JSONExtract, 486 this=this, 487 expression=path, 488 ), 489 TokenType.DARROW: lambda self, this, path: self.expression( 490 exp.JSONExtractScalar, 491 this=this, 492 expression=path, 493 ), 494 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 495 exp.JSONBExtract, 496 this=this, 497 expression=path, 498 ), 499 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 500 exp.JSONBExtractScalar, 501 this=this, 502 expression=path, 503 ), 504 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 505 exp.JSONBContains, 506 this=this, 507 expression=key, 508 ), 509 } 510 511 EXPRESSION_PARSERS = { 512 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 513 exp.Column: lambda self: self._parse_column(), 514 exp.Condition: lambda self: self._parse_conjunction(), 515 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 516 exp.Expression: lambda self: self._parse_statement(), 517 exp.From: lambda self: self._parse_from(), 518 exp.Group: lambda self: self._parse_group(), 519 exp.Having: lambda self: self._parse_having(), 520 exp.Identifier: lambda self: self._parse_id_var(), 521 exp.Join: lambda self: self._parse_join(), 522 exp.Lambda: lambda self: self._parse_lambda(), 523 exp.Lateral: lambda self: self._parse_lateral(), 524 exp.Limit: lambda self: self._parse_limit(), 525 exp.Offset: lambda self: self._parse_offset(), 526 exp.Order: lambda self: self._parse_order(), 527 exp.Ordered: lambda self: self._parse_ordered(), 528 exp.Properties: lambda self: self._parse_properties(), 529 exp.Qualify: lambda self: self._parse_qualify(), 530 exp.Returning: lambda self: self._parse_returning(), 531 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 532 exp.Table: lambda self: self._parse_table_parts(), 533 exp.TableAlias: lambda self: self._parse_table_alias(), 534 exp.Where: lambda self: self._parse_where(), 535 exp.Window: lambda self: self._parse_named_window(), 536 exp.With: lambda self: self._parse_with(), 537 "JOIN_TYPE": lambda self: self._parse_join_parts(), 538 } 539 540 STATEMENT_PARSERS = { 541 TokenType.ALTER: lambda self: self._parse_alter(), 542 TokenType.BEGIN: lambda self: self._parse_transaction(), 543 TokenType.CACHE: lambda self: self._parse_cache(), 544 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 545 TokenType.COMMENT: lambda self: self._parse_comment(), 546 TokenType.CREATE: lambda self: self._parse_create(), 547 TokenType.DELETE: lambda self: self._parse_delete(), 548 TokenType.DESC: lambda self: self._parse_describe(), 549 TokenType.DESCRIBE: lambda self: self._parse_describe(), 550 TokenType.DROP: lambda self: self._parse_drop(), 551 TokenType.INSERT: lambda self: self._parse_insert(), 552 TokenType.KILL: lambda self: self._parse_kill(), 553 TokenType.LOAD: lambda self: self._parse_load(), 554 TokenType.MERGE: lambda self: self._parse_merge(), 555 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 556 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 557 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 558 TokenType.SET: lambda self: self._parse_set(), 559 TokenType.UNCACHE: lambda self: self._parse_uncache(), 560 TokenType.UPDATE: lambda self: self._parse_update(), 561 TokenType.USE: lambda self: self.expression( 562 exp.Use, 563 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 564 and exp.var(self._prev.text), 565 this=self._parse_table(schema=False), 566 ), 567 } 568 569 UNARY_PARSERS = { 570 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 571 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 572 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 573 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 574 } 575 576 PRIMARY_PARSERS = { 577 TokenType.STRING: lambda self, token: self.expression( 578 exp.Literal, this=token.text, is_string=True 579 ), 580 TokenType.NUMBER: lambda self, token: self.expression( 581 exp.Literal, this=token.text, is_string=False 582 ), 583 TokenType.STAR: lambda self, _: self.expression( 584 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 585 ), 586 TokenType.NULL: lambda self, _: self.expression(exp.Null), 587 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 588 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 589 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 590 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 591 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 592 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 593 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 594 exp.National, this=token.text 595 ), 596 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 597 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 598 exp.RawString, this=token.text 599 ), 600 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 601 } 602 603 PLACEHOLDER_PARSERS = { 604 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 605 TokenType.PARAMETER: lambda self: self._parse_parameter(), 606 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 607 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 608 else None, 609 } 610 611 RANGE_PARSERS = { 612 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 613 TokenType.GLOB: binary_range_parser(exp.Glob), 614 TokenType.ILIKE: binary_range_parser(exp.ILike), 615 TokenType.IN: lambda self, this: self._parse_in(this), 616 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 617 TokenType.IS: lambda self, this: self._parse_is(this), 618 TokenType.LIKE: binary_range_parser(exp.Like), 619 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 620 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 621 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 622 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 623 } 624 625 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 626 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 627 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 628 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 629 "CHARACTER SET": lambda self: self._parse_character_set(), 630 "CHECKSUM": lambda self: self._parse_checksum(), 631 "CLUSTER BY": lambda self: self._parse_cluster(), 632 "CLUSTERED": lambda self: self._parse_clustered_by(), 633 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 634 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 635 "COPY": lambda self: self._parse_copy_property(), 636 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 637 "DEFINER": lambda self: self._parse_definer(), 638 "DETERMINISTIC": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 640 ), 641 "DISTKEY": lambda self: self._parse_distkey(), 642 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 643 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 644 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 645 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 646 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 647 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "FREESPACE": lambda self: self._parse_freespace(), 649 "HEAP": lambda self: self.expression(exp.HeapProperty), 650 "IMMUTABLE": lambda self: self.expression( 651 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 652 ), 653 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 654 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 655 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 656 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 657 "LIKE": lambda self: self._parse_create_like(), 658 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 659 "LOCK": lambda self: self._parse_locking(), 660 "LOCKING": lambda self: self._parse_locking(), 661 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 662 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 663 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 664 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 665 "NO": lambda self: self._parse_no_property(), 666 "ON": lambda self: self._parse_on_property(), 667 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 668 "PARTITION BY": lambda self: self._parse_partitioned_by(), 669 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 670 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 671 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 672 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 673 "RETURNS": lambda self: self._parse_returns(), 674 "ROW": lambda self: self._parse_row(), 675 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 676 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 677 "SETTINGS": lambda self: self.expression( 678 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 679 ), 680 "SORTKEY": lambda self: self._parse_sortkey(), 681 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 682 "STABLE": lambda self: self.expression( 683 exp.StabilityProperty, this=exp.Literal.string("STABLE") 684 ), 685 "STORED": lambda self: self._parse_stored(), 686 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 687 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 688 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 689 "TO": lambda self: self._parse_to_table(), 690 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 691 "TTL": lambda self: self._parse_ttl(), 692 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 693 "VOLATILE": lambda self: self._parse_volatile_property(), 694 "WITH": lambda self: self._parse_with_property(), 695 } 696 697 CONSTRAINT_PARSERS = { 698 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 699 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 700 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 701 "CHARACTER SET": lambda self: self.expression( 702 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 703 ), 704 "CHECK": lambda self: self.expression( 705 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 706 ), 707 "COLLATE": lambda self: self.expression( 708 exp.CollateColumnConstraint, this=self._parse_var() 709 ), 710 "COMMENT": lambda self: self.expression( 711 exp.CommentColumnConstraint, this=self._parse_string() 712 ), 713 "COMPRESS": lambda self: self._parse_compress(), 714 "CLUSTERED": lambda self: self.expression( 715 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 716 ), 717 "NONCLUSTERED": lambda self: self.expression( 718 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 719 ), 720 "DEFAULT": lambda self: self.expression( 721 exp.DefaultColumnConstraint, this=self._parse_bitwise() 722 ), 723 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 724 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 725 "FORMAT": lambda self: self.expression( 726 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 727 ), 728 "GENERATED": lambda self: self._parse_generated_as_identity(), 729 "IDENTITY": lambda self: self._parse_auto_increment(), 730 "INLINE": lambda self: self._parse_inline(), 731 "LIKE": lambda self: self._parse_create_like(), 732 "NOT": lambda self: self._parse_not_constraint(), 733 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 734 "ON": lambda self: ( 735 self._match(TokenType.UPDATE) 736 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 737 ) 738 or self.expression(exp.OnProperty, this=self._parse_id_var()), 739 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 740 "PRIMARY KEY": lambda self: self._parse_primary_key(), 741 "REFERENCES": lambda self: self._parse_references(match=False), 742 "TITLE": lambda self: self.expression( 743 exp.TitleColumnConstraint, this=self._parse_var_or_string() 744 ), 745 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 746 "UNIQUE": lambda self: self._parse_unique(), 747 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 748 "WITH": lambda self: self.expression( 749 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 750 ), 751 } 752 753 ALTER_PARSERS = { 754 "ADD": lambda self: self._parse_alter_table_add(), 755 "ALTER": lambda self: self._parse_alter_table_alter(), 756 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 757 "DROP": lambda self: self._parse_alter_table_drop(), 758 "RENAME": lambda self: self._parse_alter_table_rename(), 759 } 760 761 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 762 763 NO_PAREN_FUNCTION_PARSERS = { 764 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 765 "CASE": lambda self: self._parse_case(), 766 "IF": lambda self: self._parse_if(), 767 "NEXT": lambda self: self._parse_next_value_for(), 768 } 769 770 INVALID_FUNC_NAME_TOKENS = { 771 TokenType.IDENTIFIER, 772 TokenType.STRING, 773 } 774 775 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 776 777 FUNCTION_PARSERS = { 778 "ANY_VALUE": lambda self: self._parse_any_value(), 779 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 780 "CONCAT": lambda self: self._parse_concat(), 781 "CONCAT_WS": lambda self: self._parse_concat_ws(), 782 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 783 "DECODE": lambda self: self._parse_decode(), 784 "EXTRACT": lambda self: self._parse_extract(), 785 "JSON_OBJECT": lambda self: self._parse_json_object(), 786 "LOG": lambda self: self._parse_logarithm(), 787 "MATCH": lambda self: self._parse_match_against(), 788 "OPENJSON": lambda self: self._parse_open_json(), 789 "POSITION": lambda self: self._parse_position(), 790 "SAFE_CAST": lambda self: self._parse_cast(False), 791 "STRING_AGG": lambda self: self._parse_string_agg(), 792 "SUBSTRING": lambda self: self._parse_substring(), 793 "TRIM": lambda self: self._parse_trim(), 794 "TRY_CAST": lambda self: self._parse_cast(False), 795 "TRY_CONVERT": lambda self: self._parse_convert(False), 796 } 797 798 QUERY_MODIFIER_PARSERS = { 799 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 800 TokenType.WHERE: lambda self: ("where", self._parse_where()), 801 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 802 TokenType.HAVING: lambda self: ("having", self._parse_having()), 803 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 804 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 805 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 806 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 807 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 808 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 809 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 810 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 811 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 812 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 813 TokenType.CLUSTER_BY: lambda self: ( 814 "cluster", 815 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 816 ), 817 TokenType.DISTRIBUTE_BY: lambda self: ( 818 "distribute", 819 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 820 ), 821 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 822 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 823 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 824 } 825 826 SET_PARSERS = { 827 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 828 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 829 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 830 "TRANSACTION": lambda self: self._parse_set_transaction(), 831 } 832 833 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 834 835 TYPE_LITERAL_PARSERS = { 836 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 837 } 838 839 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 840 841 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 842 843 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 844 845 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 846 TRANSACTION_CHARACTERISTICS = { 847 "ISOLATION LEVEL REPEATABLE READ", 848 "ISOLATION LEVEL READ COMMITTED", 849 "ISOLATION LEVEL READ UNCOMMITTED", 850 "ISOLATION LEVEL SERIALIZABLE", 851 "READ WRITE", 852 "READ ONLY", 853 } 854 855 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 856 857 CLONE_KEYWORDS = {"CLONE", "COPY"} 858 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 859 860 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 861 862 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 863 864 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 865 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 866 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 867 868 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 869 870 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 871 872 DISTINCT_TOKENS = {TokenType.DISTINCT} 873 874 NULL_TOKENS = {TokenType.NULL} 875 876 STRICT_CAST = True 877 878 # A NULL arg in CONCAT yields NULL by default 879 CONCAT_NULL_OUTPUTS_STRING = False 880 881 PREFIXED_PIVOT_COLUMNS = False 882 IDENTIFY_PIVOT_STRINGS = False 883 884 LOG_BASE_FIRST = True 885 LOG_DEFAULTS_TO_LN = False 886 887 # Whether or not ADD is present for each column added by ALTER TABLE 888 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 889 890 # Whether or not the table sample clause expects CSV syntax 891 TABLESAMPLE_CSV = False 892 893 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 894 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 895 896 # Whether the TRIM function expects the characters to trim as its first argument 897 TRIM_PATTERN_FIRST = False 898 899 __slots__ = ( 900 "error_level", 901 "error_message_context", 902 "max_errors", 903 "sql", 904 "errors", 905 "_tokens", 906 "_index", 907 "_curr", 908 "_next", 909 "_prev", 910 "_prev_comments", 911 "_tokenizer", 912 ) 913 914 # Autofilled 915 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 916 INDEX_OFFSET: int = 0 917 UNNEST_COLUMN_ONLY: bool = False 918 ALIAS_POST_TABLESAMPLE: bool = False 919 STRICT_STRING_CONCAT = False 920 SUPPORTS_USER_DEFINED_TYPES = True 921 NORMALIZE_FUNCTIONS = "upper" 922 NULL_ORDERING: str = "nulls_are_small" 923 SHOW_TRIE: t.Dict = {} 924 SET_TRIE: t.Dict = {} 925 FORMAT_MAPPING: t.Dict[str, str] = {} 926 FORMAT_TRIE: t.Dict = {} 927 TIME_MAPPING: t.Dict[str, str] = {} 928 TIME_TRIE: t.Dict = {} 929 930 def __init__( 931 self, 932 error_level: t.Optional[ErrorLevel] = None, 933 error_message_context: int = 100, 934 max_errors: int = 3, 935 ): 936 self.error_level = error_level or ErrorLevel.IMMEDIATE 937 self.error_message_context = error_message_context 938 self.max_errors = max_errors 939 self._tokenizer = self.TOKENIZER_CLASS() 940 self.reset() 941 942 def reset(self): 943 self.sql = "" 944 self.errors = [] 945 self._tokens = [] 946 self._index = 0 947 self._curr = None 948 self._next = None 949 self._prev = None 950 self._prev_comments = None 951 952 def parse( 953 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 954 ) -> t.List[t.Optional[exp.Expression]]: 955 """ 956 Parses a list of tokens and returns a list of syntax trees, one tree 957 per parsed SQL statement. 958 959 Args: 960 raw_tokens: The list of tokens. 961 sql: The original SQL string, used to produce helpful debug messages. 962 963 Returns: 964 The list of the produced syntax trees. 965 """ 966 return self._parse( 967 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 968 ) 969 970 def parse_into( 971 self, 972 expression_types: exp.IntoType, 973 raw_tokens: t.List[Token], 974 sql: t.Optional[str] = None, 975 ) -> t.List[t.Optional[exp.Expression]]: 976 """ 977 Parses a list of tokens into a given Expression type. If a collection of Expression 978 types is given instead, this method will try to parse the token list into each one 979 of them, stopping at the first for which the parsing succeeds. 980 981 Args: 982 expression_types: The expression type(s) to try and parse the token list into. 983 raw_tokens: The list of tokens. 984 sql: The original SQL string, used to produce helpful debug messages. 985 986 Returns: 987 The target Expression. 988 """ 989 errors = [] 990 for expression_type in ensure_list(expression_types): 991 parser = self.EXPRESSION_PARSERS.get(expression_type) 992 if not parser: 993 raise TypeError(f"No parser registered for {expression_type}") 994 995 try: 996 return self._parse(parser, raw_tokens, sql) 997 except ParseError as e: 998 e.errors[0]["into_expression"] = expression_type 999 errors.append(e) 1000 1001 raise ParseError( 1002 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1003 errors=merge_errors(errors), 1004 ) from errors[-1] 1005 1006 def _parse( 1007 self, 1008 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1009 raw_tokens: t.List[Token], 1010 sql: t.Optional[str] = None, 1011 ) -> t.List[t.Optional[exp.Expression]]: 1012 self.reset() 1013 self.sql = sql or "" 1014 1015 total = len(raw_tokens) 1016 chunks: t.List[t.List[Token]] = [[]] 1017 1018 for i, token in enumerate(raw_tokens): 1019 if token.token_type == TokenType.SEMICOLON: 1020 if i < total - 1: 1021 chunks.append([]) 1022 else: 1023 chunks[-1].append(token) 1024 1025 expressions = [] 1026 1027 for tokens in chunks: 1028 self._index = -1 1029 self._tokens = tokens 1030 self._advance() 1031 1032 expressions.append(parse_method(self)) 1033 1034 if self._index < len(self._tokens): 1035 self.raise_error("Invalid expression / Unexpected token") 1036 1037 self.check_errors() 1038 1039 return expressions 1040 1041 def check_errors(self) -> None: 1042 """Logs or raises any found errors, depending on the chosen error level setting.""" 1043 if self.error_level == ErrorLevel.WARN: 1044 for error in self.errors: 1045 logger.error(str(error)) 1046 elif self.error_level == ErrorLevel.RAISE and self.errors: 1047 raise ParseError( 1048 concat_messages(self.errors, self.max_errors), 1049 errors=merge_errors(self.errors), 1050 ) 1051 1052 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1053 """ 1054 Appends an error in the list of recorded errors or raises it, depending on the chosen 1055 error level setting. 1056 """ 1057 token = token or self._curr or self._prev or Token.string("") 1058 start = token.start 1059 end = token.end + 1 1060 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1061 highlight = self.sql[start:end] 1062 end_context = self.sql[end : end + self.error_message_context] 1063 1064 error = ParseError.new( 1065 f"{message}. Line {token.line}, Col: {token.col}.\n" 1066 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1067 description=message, 1068 line=token.line, 1069 col=token.col, 1070 start_context=start_context, 1071 highlight=highlight, 1072 end_context=end_context, 1073 ) 1074 1075 if self.error_level == ErrorLevel.IMMEDIATE: 1076 raise error 1077 1078 self.errors.append(error) 1079 1080 def expression( 1081 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1082 ) -> E: 1083 """ 1084 Creates a new, validated Expression. 1085 1086 Args: 1087 exp_class: The expression class to instantiate. 1088 comments: An optional list of comments to attach to the expression. 1089 kwargs: The arguments to set for the expression along with their respective values. 1090 1091 Returns: 1092 The target expression. 1093 """ 1094 instance = exp_class(**kwargs) 1095 instance.add_comments(comments) if comments else self._add_comments(instance) 1096 return self.validate_expression(instance) 1097 1098 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1099 if expression and self._prev_comments: 1100 expression.add_comments(self._prev_comments) 1101 self._prev_comments = None 1102 1103 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1104 """ 1105 Validates an Expression, making sure that all its mandatory arguments are set. 1106 1107 Args: 1108 expression: The expression to validate. 1109 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1110 1111 Returns: 1112 The validated expression. 1113 """ 1114 if self.error_level != ErrorLevel.IGNORE: 1115 for error_message in expression.error_messages(args): 1116 self.raise_error(error_message) 1117 1118 return expression 1119 1120 def _find_sql(self, start: Token, end: Token) -> str: 1121 return self.sql[start.start : end.end + 1] 1122 1123 def _advance(self, times: int = 1) -> None: 1124 self._index += times 1125 self._curr = seq_get(self._tokens, self._index) 1126 self._next = seq_get(self._tokens, self._index + 1) 1127 1128 if self._index > 0: 1129 self._prev = self._tokens[self._index - 1] 1130 self._prev_comments = self._prev.comments 1131 else: 1132 self._prev = None 1133 self._prev_comments = None 1134 1135 def _retreat(self, index: int) -> None: 1136 if index != self._index: 1137 self._advance(index - self._index) 1138 1139 def _parse_command(self) -> exp.Command: 1140 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1141 1142 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1143 start = self._prev 1144 exists = self._parse_exists() if allow_exists else None 1145 1146 self._match(TokenType.ON) 1147 1148 kind = self._match_set(self.CREATABLES) and self._prev 1149 if not kind: 1150 return self._parse_as_command(start) 1151 1152 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1153 this = self._parse_user_defined_function(kind=kind.token_type) 1154 elif kind.token_type == TokenType.TABLE: 1155 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1156 elif kind.token_type == TokenType.COLUMN: 1157 this = self._parse_column() 1158 else: 1159 this = self._parse_id_var() 1160 1161 self._match(TokenType.IS) 1162 1163 return self.expression( 1164 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1165 ) 1166 1167 def _parse_to_table( 1168 self, 1169 ) -> exp.ToTableProperty: 1170 table = self._parse_table_parts(schema=True) 1171 return self.expression(exp.ToTableProperty, this=table) 1172 1173 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1174 def _parse_ttl(self) -> exp.Expression: 1175 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1176 this = self._parse_bitwise() 1177 1178 if self._match_text_seq("DELETE"): 1179 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1180 if self._match_text_seq("RECOMPRESS"): 1181 return self.expression( 1182 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1183 ) 1184 if self._match_text_seq("TO", "DISK"): 1185 return self.expression( 1186 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1187 ) 1188 if self._match_text_seq("TO", "VOLUME"): 1189 return self.expression( 1190 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1191 ) 1192 1193 return this 1194 1195 expressions = self._parse_csv(_parse_ttl_action) 1196 where = self._parse_where() 1197 group = self._parse_group() 1198 1199 aggregates = None 1200 if group and self._match(TokenType.SET): 1201 aggregates = self._parse_csv(self._parse_set_item) 1202 1203 return self.expression( 1204 exp.MergeTreeTTL, 1205 expressions=expressions, 1206 where=where, 1207 group=group, 1208 aggregates=aggregates, 1209 ) 1210 1211 def _parse_statement(self) -> t.Optional[exp.Expression]: 1212 if self._curr is None: 1213 return None 1214 1215 if self._match_set(self.STATEMENT_PARSERS): 1216 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1217 1218 if self._match_set(Tokenizer.COMMANDS): 1219 return self._parse_command() 1220 1221 expression = self._parse_expression() 1222 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1223 return self._parse_query_modifiers(expression) 1224 1225 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1226 start = self._prev 1227 temporary = self._match(TokenType.TEMPORARY) 1228 materialized = self._match_text_seq("MATERIALIZED") 1229 1230 kind = self._match_set(self.CREATABLES) and self._prev.text 1231 if not kind: 1232 return self._parse_as_command(start) 1233 1234 return self.expression( 1235 exp.Drop, 1236 comments=start.comments, 1237 exists=exists or self._parse_exists(), 1238 this=self._parse_table(schema=True), 1239 kind=kind, 1240 temporary=temporary, 1241 materialized=materialized, 1242 cascade=self._match_text_seq("CASCADE"), 1243 constraints=self._match_text_seq("CONSTRAINTS"), 1244 purge=self._match_text_seq("PURGE"), 1245 ) 1246 1247 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1248 return ( 1249 self._match_text_seq("IF") 1250 and (not not_ or self._match(TokenType.NOT)) 1251 and self._match(TokenType.EXISTS) 1252 ) 1253 1254 def _parse_create(self) -> exp.Create | exp.Command: 1255 # Note: this can't be None because we've matched a statement parser 1256 start = self._prev 1257 comments = self._prev_comments 1258 1259 replace = start.text.upper() == "REPLACE" or self._match_pair( 1260 TokenType.OR, TokenType.REPLACE 1261 ) 1262 unique = self._match(TokenType.UNIQUE) 1263 1264 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1265 self._advance() 1266 1267 properties = None 1268 create_token = self._match_set(self.CREATABLES) and self._prev 1269 1270 if not create_token: 1271 # exp.Properties.Location.POST_CREATE 1272 properties = self._parse_properties() 1273 create_token = self._match_set(self.CREATABLES) and self._prev 1274 1275 if not properties or not create_token: 1276 return self._parse_as_command(start) 1277 1278 exists = self._parse_exists(not_=True) 1279 this = None 1280 expression: t.Optional[exp.Expression] = None 1281 indexes = None 1282 no_schema_binding = None 1283 begin = None 1284 clone = None 1285 1286 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1287 nonlocal properties 1288 if properties and temp_props: 1289 properties.expressions.extend(temp_props.expressions) 1290 elif temp_props: 1291 properties = temp_props 1292 1293 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1294 this = self._parse_user_defined_function(kind=create_token.token_type) 1295 1296 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1297 extend_props(self._parse_properties()) 1298 1299 self._match(TokenType.ALIAS) 1300 1301 if self._match(TokenType.COMMAND): 1302 expression = self._parse_as_command(self._prev) 1303 else: 1304 begin = self._match(TokenType.BEGIN) 1305 return_ = self._match_text_seq("RETURN") 1306 1307 if self._match(TokenType.STRING, advance=False): 1308 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1309 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1310 expression = self._parse_string() 1311 extend_props(self._parse_properties()) 1312 else: 1313 expression = self._parse_statement() 1314 1315 if return_: 1316 expression = self.expression(exp.Return, this=expression) 1317 elif create_token.token_type == TokenType.INDEX: 1318 this = self._parse_index(index=self._parse_id_var()) 1319 elif create_token.token_type in self.DB_CREATABLES: 1320 table_parts = self._parse_table_parts(schema=True) 1321 1322 # exp.Properties.Location.POST_NAME 1323 self._match(TokenType.COMMA) 1324 extend_props(self._parse_properties(before=True)) 1325 1326 this = self._parse_schema(this=table_parts) 1327 1328 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1329 extend_props(self._parse_properties()) 1330 1331 self._match(TokenType.ALIAS) 1332 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1333 # exp.Properties.Location.POST_ALIAS 1334 extend_props(self._parse_properties()) 1335 1336 expression = self._parse_ddl_select() 1337 1338 if create_token.token_type == TokenType.TABLE: 1339 # exp.Properties.Location.POST_EXPRESSION 1340 extend_props(self._parse_properties()) 1341 1342 indexes = [] 1343 while True: 1344 index = self._parse_index() 1345 1346 # exp.Properties.Location.POST_INDEX 1347 extend_props(self._parse_properties()) 1348 1349 if not index: 1350 break 1351 else: 1352 self._match(TokenType.COMMA) 1353 indexes.append(index) 1354 elif create_token.token_type == TokenType.VIEW: 1355 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1356 no_schema_binding = True 1357 1358 shallow = self._match_text_seq("SHALLOW") 1359 1360 if self._match_texts(self.CLONE_KEYWORDS): 1361 copy = self._prev.text.lower() == "copy" 1362 clone = self._parse_table(schema=True) 1363 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1364 clone_kind = ( 1365 self._match(TokenType.L_PAREN) 1366 and self._match_texts(self.CLONE_KINDS) 1367 and self._prev.text.upper() 1368 ) 1369 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1370 self._match(TokenType.R_PAREN) 1371 clone = self.expression( 1372 exp.Clone, 1373 this=clone, 1374 when=when, 1375 kind=clone_kind, 1376 shallow=shallow, 1377 expression=clone_expression, 1378 copy=copy, 1379 ) 1380 1381 return self.expression( 1382 exp.Create, 1383 comments=comments, 1384 this=this, 1385 kind=create_token.text, 1386 replace=replace, 1387 unique=unique, 1388 expression=expression, 1389 exists=exists, 1390 properties=properties, 1391 indexes=indexes, 1392 no_schema_binding=no_schema_binding, 1393 begin=begin, 1394 clone=clone, 1395 ) 1396 1397 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1398 # only used for teradata currently 1399 self._match(TokenType.COMMA) 1400 1401 kwargs = { 1402 "no": self._match_text_seq("NO"), 1403 "dual": self._match_text_seq("DUAL"), 1404 "before": self._match_text_seq("BEFORE"), 1405 "default": self._match_text_seq("DEFAULT"), 1406 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1407 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1408 "after": self._match_text_seq("AFTER"), 1409 "minimum": self._match_texts(("MIN", "MINIMUM")), 1410 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1411 } 1412 1413 if self._match_texts(self.PROPERTY_PARSERS): 1414 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1415 try: 1416 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1417 except TypeError: 1418 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1419 1420 return None 1421 1422 def _parse_property(self) -> t.Optional[exp.Expression]: 1423 if self._match_texts(self.PROPERTY_PARSERS): 1424 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1425 1426 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1427 return self._parse_character_set(default=True) 1428 1429 if self._match_text_seq("COMPOUND", "SORTKEY"): 1430 return self._parse_sortkey(compound=True) 1431 1432 if self._match_text_seq("SQL", "SECURITY"): 1433 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1434 1435 index = self._index 1436 key = self._parse_column() 1437 1438 if not self._match(TokenType.EQ): 1439 self._retreat(index) 1440 return None 1441 1442 return self.expression( 1443 exp.Property, 1444 this=key.to_dot() if isinstance(key, exp.Column) else key, 1445 value=self._parse_column() or self._parse_var(any_token=True), 1446 ) 1447 1448 def _parse_stored(self) -> exp.FileFormatProperty: 1449 self._match(TokenType.ALIAS) 1450 1451 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1452 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1453 1454 return self.expression( 1455 exp.FileFormatProperty, 1456 this=self.expression( 1457 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1458 ) 1459 if input_format or output_format 1460 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1461 ) 1462 1463 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1464 self._match(TokenType.EQ) 1465 self._match(TokenType.ALIAS) 1466 return self.expression(exp_class, this=self._parse_field()) 1467 1468 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1469 properties = [] 1470 while True: 1471 if before: 1472 prop = self._parse_property_before() 1473 else: 1474 prop = self._parse_property() 1475 1476 if not prop: 1477 break 1478 for p in ensure_list(prop): 1479 properties.append(p) 1480 1481 if properties: 1482 return self.expression(exp.Properties, expressions=properties) 1483 1484 return None 1485 1486 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1487 return self.expression( 1488 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1489 ) 1490 1491 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1492 if self._index >= 2: 1493 pre_volatile_token = self._tokens[self._index - 2] 1494 else: 1495 pre_volatile_token = None 1496 1497 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1498 return exp.VolatileProperty() 1499 1500 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1501 1502 def _parse_with_property( 1503 self, 1504 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1505 if self._match(TokenType.L_PAREN, advance=False): 1506 return self._parse_wrapped_csv(self._parse_property) 1507 1508 if self._match_text_seq("JOURNAL"): 1509 return self._parse_withjournaltable() 1510 1511 if self._match_text_seq("DATA"): 1512 return self._parse_withdata(no=False) 1513 elif self._match_text_seq("NO", "DATA"): 1514 return self._parse_withdata(no=True) 1515 1516 if not self._next: 1517 return None 1518 1519 return self._parse_withisolatedloading() 1520 1521 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1522 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1523 self._match(TokenType.EQ) 1524 1525 user = self._parse_id_var() 1526 self._match(TokenType.PARAMETER) 1527 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1528 1529 if not user or not host: 1530 return None 1531 1532 return exp.DefinerProperty(this=f"{user}@{host}") 1533 1534 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1535 self._match(TokenType.TABLE) 1536 self._match(TokenType.EQ) 1537 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1538 1539 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1540 return self.expression(exp.LogProperty, no=no) 1541 1542 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1543 return self.expression(exp.JournalProperty, **kwargs) 1544 1545 def _parse_checksum(self) -> exp.ChecksumProperty: 1546 self._match(TokenType.EQ) 1547 1548 on = None 1549 if self._match(TokenType.ON): 1550 on = True 1551 elif self._match_text_seq("OFF"): 1552 on = False 1553 1554 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1555 1556 def _parse_cluster(self) -> exp.Cluster: 1557 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1558 1559 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1560 self._match_text_seq("BY") 1561 1562 self._match_l_paren() 1563 expressions = self._parse_csv(self._parse_column) 1564 self._match_r_paren() 1565 1566 if self._match_text_seq("SORTED", "BY"): 1567 self._match_l_paren() 1568 sorted_by = self._parse_csv(self._parse_ordered) 1569 self._match_r_paren() 1570 else: 1571 sorted_by = None 1572 1573 self._match(TokenType.INTO) 1574 buckets = self._parse_number() 1575 self._match_text_seq("BUCKETS") 1576 1577 return self.expression( 1578 exp.ClusteredByProperty, 1579 expressions=expressions, 1580 sorted_by=sorted_by, 1581 buckets=buckets, 1582 ) 1583 1584 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1585 if not self._match_text_seq("GRANTS"): 1586 self._retreat(self._index - 1) 1587 return None 1588 1589 return self.expression(exp.CopyGrantsProperty) 1590 1591 def _parse_freespace(self) -> exp.FreespaceProperty: 1592 self._match(TokenType.EQ) 1593 return self.expression( 1594 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1595 ) 1596 1597 def _parse_mergeblockratio( 1598 self, no: bool = False, default: bool = False 1599 ) -> exp.MergeBlockRatioProperty: 1600 if self._match(TokenType.EQ): 1601 return self.expression( 1602 exp.MergeBlockRatioProperty, 1603 this=self._parse_number(), 1604 percent=self._match(TokenType.PERCENT), 1605 ) 1606 1607 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1608 1609 def _parse_datablocksize( 1610 self, 1611 default: t.Optional[bool] = None, 1612 minimum: t.Optional[bool] = None, 1613 maximum: t.Optional[bool] = None, 1614 ) -> exp.DataBlocksizeProperty: 1615 self._match(TokenType.EQ) 1616 size = self._parse_number() 1617 1618 units = None 1619 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1620 units = self._prev.text 1621 1622 return self.expression( 1623 exp.DataBlocksizeProperty, 1624 size=size, 1625 units=units, 1626 default=default, 1627 minimum=minimum, 1628 maximum=maximum, 1629 ) 1630 1631 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1632 self._match(TokenType.EQ) 1633 always = self._match_text_seq("ALWAYS") 1634 manual = self._match_text_seq("MANUAL") 1635 never = self._match_text_seq("NEVER") 1636 default = self._match_text_seq("DEFAULT") 1637 1638 autotemp = None 1639 if self._match_text_seq("AUTOTEMP"): 1640 autotemp = self._parse_schema() 1641 1642 return self.expression( 1643 exp.BlockCompressionProperty, 1644 always=always, 1645 manual=manual, 1646 never=never, 1647 default=default, 1648 autotemp=autotemp, 1649 ) 1650 1651 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1652 no = self._match_text_seq("NO") 1653 concurrent = self._match_text_seq("CONCURRENT") 1654 self._match_text_seq("ISOLATED", "LOADING") 1655 for_all = self._match_text_seq("FOR", "ALL") 1656 for_insert = self._match_text_seq("FOR", "INSERT") 1657 for_none = self._match_text_seq("FOR", "NONE") 1658 return self.expression( 1659 exp.IsolatedLoadingProperty, 1660 no=no, 1661 concurrent=concurrent, 1662 for_all=for_all, 1663 for_insert=for_insert, 1664 for_none=for_none, 1665 ) 1666 1667 def _parse_locking(self) -> exp.LockingProperty: 1668 if self._match(TokenType.TABLE): 1669 kind = "TABLE" 1670 elif self._match(TokenType.VIEW): 1671 kind = "VIEW" 1672 elif self._match(TokenType.ROW): 1673 kind = "ROW" 1674 elif self._match_text_seq("DATABASE"): 1675 kind = "DATABASE" 1676 else: 1677 kind = None 1678 1679 if kind in ("DATABASE", "TABLE", "VIEW"): 1680 this = self._parse_table_parts() 1681 else: 1682 this = None 1683 1684 if self._match(TokenType.FOR): 1685 for_or_in = "FOR" 1686 elif self._match(TokenType.IN): 1687 for_or_in = "IN" 1688 else: 1689 for_or_in = None 1690 1691 if self._match_text_seq("ACCESS"): 1692 lock_type = "ACCESS" 1693 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1694 lock_type = "EXCLUSIVE" 1695 elif self._match_text_seq("SHARE"): 1696 lock_type = "SHARE" 1697 elif self._match_text_seq("READ"): 1698 lock_type = "READ" 1699 elif self._match_text_seq("WRITE"): 1700 lock_type = "WRITE" 1701 elif self._match_text_seq("CHECKSUM"): 1702 lock_type = "CHECKSUM" 1703 else: 1704 lock_type = None 1705 1706 override = self._match_text_seq("OVERRIDE") 1707 1708 return self.expression( 1709 exp.LockingProperty, 1710 this=this, 1711 kind=kind, 1712 for_or_in=for_or_in, 1713 lock_type=lock_type, 1714 override=override, 1715 ) 1716 1717 def _parse_partition_by(self) -> t.List[exp.Expression]: 1718 if self._match(TokenType.PARTITION_BY): 1719 return self._parse_csv(self._parse_conjunction) 1720 return [] 1721 1722 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1723 self._match(TokenType.EQ) 1724 return self.expression( 1725 exp.PartitionedByProperty, 1726 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1727 ) 1728 1729 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1730 if self._match_text_seq("AND", "STATISTICS"): 1731 statistics = True 1732 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1733 statistics = False 1734 else: 1735 statistics = None 1736 1737 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1738 1739 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1740 if self._match_text_seq("PRIMARY", "INDEX"): 1741 return exp.NoPrimaryIndexProperty() 1742 return None 1743 1744 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1745 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1746 return exp.OnCommitProperty() 1747 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1748 return exp.OnCommitProperty(delete=True) 1749 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1750 1751 def _parse_distkey(self) -> exp.DistKeyProperty: 1752 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1753 1754 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1755 table = self._parse_table(schema=True) 1756 1757 options = [] 1758 while self._match_texts(("INCLUDING", "EXCLUDING")): 1759 this = self._prev.text.upper() 1760 1761 id_var = self._parse_id_var() 1762 if not id_var: 1763 return None 1764 1765 options.append( 1766 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1767 ) 1768 1769 return self.expression(exp.LikeProperty, this=table, expressions=options) 1770 1771 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1772 return self.expression( 1773 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1774 ) 1775 1776 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1777 self._match(TokenType.EQ) 1778 return self.expression( 1779 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1780 ) 1781 1782 def _parse_returns(self) -> exp.ReturnsProperty: 1783 value: t.Optional[exp.Expression] 1784 is_table = self._match(TokenType.TABLE) 1785 1786 if is_table: 1787 if self._match(TokenType.LT): 1788 value = self.expression( 1789 exp.Schema, 1790 this="TABLE", 1791 expressions=self._parse_csv(self._parse_struct_types), 1792 ) 1793 if not self._match(TokenType.GT): 1794 self.raise_error("Expecting >") 1795 else: 1796 value = self._parse_schema(exp.var("TABLE")) 1797 else: 1798 value = self._parse_types() 1799 1800 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1801 1802 def _parse_describe(self) -> exp.Describe: 1803 kind = self._match_set(self.CREATABLES) and self._prev.text 1804 this = self._parse_table(schema=True) 1805 properties = self._parse_properties() 1806 expressions = properties.expressions if properties else None 1807 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1808 1809 def _parse_insert(self) -> exp.Insert: 1810 comments = ensure_list(self._prev_comments) 1811 overwrite = self._match(TokenType.OVERWRITE) 1812 ignore = self._match(TokenType.IGNORE) 1813 local = self._match_text_seq("LOCAL") 1814 alternative = None 1815 1816 if self._match_text_seq("DIRECTORY"): 1817 this: t.Optional[exp.Expression] = self.expression( 1818 exp.Directory, 1819 this=self._parse_var_or_string(), 1820 local=local, 1821 row_format=self._parse_row_format(match_row=True), 1822 ) 1823 else: 1824 if self._match(TokenType.OR): 1825 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1826 1827 self._match(TokenType.INTO) 1828 comments += ensure_list(self._prev_comments) 1829 self._match(TokenType.TABLE) 1830 this = self._parse_table(schema=True) 1831 1832 returning = self._parse_returning() 1833 1834 return self.expression( 1835 exp.Insert, 1836 comments=comments, 1837 this=this, 1838 by_name=self._match_text_seq("BY", "NAME"), 1839 exists=self._parse_exists(), 1840 partition=self._parse_partition(), 1841 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1842 and self._parse_conjunction(), 1843 expression=self._parse_ddl_select(), 1844 conflict=self._parse_on_conflict(), 1845 returning=returning or self._parse_returning(), 1846 overwrite=overwrite, 1847 alternative=alternative, 1848 ignore=ignore, 1849 ) 1850 1851 def _parse_kill(self) -> exp.Kill: 1852 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1853 1854 return self.expression( 1855 exp.Kill, 1856 this=self._parse_primary(), 1857 kind=kind, 1858 ) 1859 1860 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1861 conflict = self._match_text_seq("ON", "CONFLICT") 1862 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1863 1864 if not conflict and not duplicate: 1865 return None 1866 1867 nothing = None 1868 expressions = None 1869 key = None 1870 constraint = None 1871 1872 if conflict: 1873 if self._match_text_seq("ON", "CONSTRAINT"): 1874 constraint = self._parse_id_var() 1875 else: 1876 key = self._parse_csv(self._parse_value) 1877 1878 self._match_text_seq("DO") 1879 if self._match_text_seq("NOTHING"): 1880 nothing = True 1881 else: 1882 self._match(TokenType.UPDATE) 1883 self._match(TokenType.SET) 1884 expressions = self._parse_csv(self._parse_equality) 1885 1886 return self.expression( 1887 exp.OnConflict, 1888 duplicate=duplicate, 1889 expressions=expressions, 1890 nothing=nothing, 1891 key=key, 1892 constraint=constraint, 1893 ) 1894 1895 def _parse_returning(self) -> t.Optional[exp.Returning]: 1896 if not self._match(TokenType.RETURNING): 1897 return None 1898 return self.expression( 1899 exp.Returning, 1900 expressions=self._parse_csv(self._parse_expression), 1901 into=self._match(TokenType.INTO) and self._parse_table_part(), 1902 ) 1903 1904 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1905 if not self._match(TokenType.FORMAT): 1906 return None 1907 return self._parse_row_format() 1908 1909 def _parse_row_format( 1910 self, match_row: bool = False 1911 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1912 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1913 return None 1914 1915 if self._match_text_seq("SERDE"): 1916 this = self._parse_string() 1917 1918 serde_properties = None 1919 if self._match(TokenType.SERDE_PROPERTIES): 1920 serde_properties = self.expression( 1921 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1922 ) 1923 1924 return self.expression( 1925 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1926 ) 1927 1928 self._match_text_seq("DELIMITED") 1929 1930 kwargs = {} 1931 1932 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1933 kwargs["fields"] = self._parse_string() 1934 if self._match_text_seq("ESCAPED", "BY"): 1935 kwargs["escaped"] = self._parse_string() 1936 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1937 kwargs["collection_items"] = self._parse_string() 1938 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1939 kwargs["map_keys"] = self._parse_string() 1940 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1941 kwargs["lines"] = self._parse_string() 1942 if self._match_text_seq("NULL", "DEFINED", "AS"): 1943 kwargs["null"] = self._parse_string() 1944 1945 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1946 1947 def _parse_load(self) -> exp.LoadData | exp.Command: 1948 if self._match_text_seq("DATA"): 1949 local = self._match_text_seq("LOCAL") 1950 self._match_text_seq("INPATH") 1951 inpath = self._parse_string() 1952 overwrite = self._match(TokenType.OVERWRITE) 1953 self._match_pair(TokenType.INTO, TokenType.TABLE) 1954 1955 return self.expression( 1956 exp.LoadData, 1957 this=self._parse_table(schema=True), 1958 local=local, 1959 overwrite=overwrite, 1960 inpath=inpath, 1961 partition=self._parse_partition(), 1962 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1963 serde=self._match_text_seq("SERDE") and self._parse_string(), 1964 ) 1965 return self._parse_as_command(self._prev) 1966 1967 def _parse_delete(self) -> exp.Delete: 1968 # This handles MySQL's "Multiple-Table Syntax" 1969 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1970 tables = None 1971 comments = self._prev_comments 1972 if not self._match(TokenType.FROM, advance=False): 1973 tables = self._parse_csv(self._parse_table) or None 1974 1975 returning = self._parse_returning() 1976 1977 return self.expression( 1978 exp.Delete, 1979 comments=comments, 1980 tables=tables, 1981 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1982 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1983 where=self._parse_where(), 1984 returning=returning or self._parse_returning(), 1985 limit=self._parse_limit(), 1986 ) 1987 1988 def _parse_update(self) -> exp.Update: 1989 comments = self._prev_comments 1990 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1991 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1992 returning = self._parse_returning() 1993 return self.expression( 1994 exp.Update, 1995 comments=comments, 1996 **{ # type: ignore 1997 "this": this, 1998 "expressions": expressions, 1999 "from": self._parse_from(joins=True), 2000 "where": self._parse_where(), 2001 "returning": returning or self._parse_returning(), 2002 "order": self._parse_order(), 2003 "limit": self._parse_limit(), 2004 }, 2005 ) 2006 2007 def _parse_uncache(self) -> exp.Uncache: 2008 if not self._match(TokenType.TABLE): 2009 self.raise_error("Expecting TABLE after UNCACHE") 2010 2011 return self.expression( 2012 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2013 ) 2014 2015 def _parse_cache(self) -> exp.Cache: 2016 lazy = self._match_text_seq("LAZY") 2017 self._match(TokenType.TABLE) 2018 table = self._parse_table(schema=True) 2019 2020 options = [] 2021 if self._match_text_seq("OPTIONS"): 2022 self._match_l_paren() 2023 k = self._parse_string() 2024 self._match(TokenType.EQ) 2025 v = self._parse_string() 2026 options = [k, v] 2027 self._match_r_paren() 2028 2029 self._match(TokenType.ALIAS) 2030 return self.expression( 2031 exp.Cache, 2032 this=table, 2033 lazy=lazy, 2034 options=options, 2035 expression=self._parse_select(nested=True), 2036 ) 2037 2038 def _parse_partition(self) -> t.Optional[exp.Partition]: 2039 if not self._match(TokenType.PARTITION): 2040 return None 2041 2042 return self.expression( 2043 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2044 ) 2045 2046 def _parse_value(self) -> exp.Tuple: 2047 if self._match(TokenType.L_PAREN): 2048 expressions = self._parse_csv(self._parse_conjunction) 2049 self._match_r_paren() 2050 return self.expression(exp.Tuple, expressions=expressions) 2051 2052 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2053 # https://prestodb.io/docs/current/sql/values.html 2054 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2055 2056 def _parse_projections(self) -> t.List[exp.Expression]: 2057 return self._parse_expressions() 2058 2059 def _parse_select( 2060 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2061 ) -> t.Optional[exp.Expression]: 2062 cte = self._parse_with() 2063 2064 if cte: 2065 this = self._parse_statement() 2066 2067 if not this: 2068 self.raise_error("Failed to parse any statement following CTE") 2069 return cte 2070 2071 if "with" in this.arg_types: 2072 this.set("with", cte) 2073 else: 2074 self.raise_error(f"{this.key} does not support CTE") 2075 this = cte 2076 2077 return this 2078 2079 # duckdb supports leading with FROM x 2080 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2081 2082 if self._match(TokenType.SELECT): 2083 comments = self._prev_comments 2084 2085 hint = self._parse_hint() 2086 all_ = self._match(TokenType.ALL) 2087 distinct = self._match_set(self.DISTINCT_TOKENS) 2088 2089 kind = ( 2090 self._match(TokenType.ALIAS) 2091 and self._match_texts(("STRUCT", "VALUE")) 2092 and self._prev.text 2093 ) 2094 2095 if distinct: 2096 distinct = self.expression( 2097 exp.Distinct, 2098 on=self._parse_value() if self._match(TokenType.ON) else None, 2099 ) 2100 2101 if all_ and distinct: 2102 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2103 2104 limit = self._parse_limit(top=True) 2105 projections = self._parse_projections() 2106 2107 this = self.expression( 2108 exp.Select, 2109 kind=kind, 2110 hint=hint, 2111 distinct=distinct, 2112 expressions=projections, 2113 limit=limit, 2114 ) 2115 this.comments = comments 2116 2117 into = self._parse_into() 2118 if into: 2119 this.set("into", into) 2120 2121 if not from_: 2122 from_ = self._parse_from() 2123 2124 if from_: 2125 this.set("from", from_) 2126 2127 this = self._parse_query_modifiers(this) 2128 elif (table or nested) and self._match(TokenType.L_PAREN): 2129 if self._match(TokenType.PIVOT): 2130 this = self._parse_simplified_pivot() 2131 elif self._match(TokenType.FROM): 2132 this = exp.select("*").from_( 2133 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2134 ) 2135 else: 2136 this = self._parse_table() if table else self._parse_select(nested=True) 2137 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2138 2139 self._match_r_paren() 2140 2141 # We return early here so that the UNION isn't attached to the subquery by the 2142 # following call to _parse_set_operations, but instead becomes the parent node 2143 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2144 elif self._match(TokenType.VALUES): 2145 this = self.expression( 2146 exp.Values, 2147 expressions=self._parse_csv(self._parse_value), 2148 alias=self._parse_table_alias(), 2149 ) 2150 elif from_: 2151 this = exp.select("*").from_(from_.this, copy=False) 2152 else: 2153 this = None 2154 2155 return self._parse_set_operations(this) 2156 2157 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2158 if not skip_with_token and not self._match(TokenType.WITH): 2159 return None 2160 2161 comments = self._prev_comments 2162 recursive = self._match(TokenType.RECURSIVE) 2163 2164 expressions = [] 2165 while True: 2166 expressions.append(self._parse_cte()) 2167 2168 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2169 break 2170 else: 2171 self._match(TokenType.WITH) 2172 2173 return self.expression( 2174 exp.With, comments=comments, expressions=expressions, recursive=recursive 2175 ) 2176 2177 def _parse_cte(self) -> exp.CTE: 2178 alias = self._parse_table_alias() 2179 if not alias or not alias.this: 2180 self.raise_error("Expected CTE to have alias") 2181 2182 self._match(TokenType.ALIAS) 2183 return self.expression( 2184 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2185 ) 2186 2187 def _parse_table_alias( 2188 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2189 ) -> t.Optional[exp.TableAlias]: 2190 any_token = self._match(TokenType.ALIAS) 2191 alias = ( 2192 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2193 or self._parse_string_as_identifier() 2194 ) 2195 2196 index = self._index 2197 if self._match(TokenType.L_PAREN): 2198 columns = self._parse_csv(self._parse_function_parameter) 2199 self._match_r_paren() if columns else self._retreat(index) 2200 else: 2201 columns = None 2202 2203 if not alias and not columns: 2204 return None 2205 2206 return self.expression(exp.TableAlias, this=alias, columns=columns) 2207 2208 def _parse_subquery( 2209 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2210 ) -> t.Optional[exp.Subquery]: 2211 if not this: 2212 return None 2213 2214 return self.expression( 2215 exp.Subquery, 2216 this=this, 2217 pivots=self._parse_pivots(), 2218 alias=self._parse_table_alias() if parse_alias else None, 2219 ) 2220 2221 def _parse_query_modifiers( 2222 self, this: t.Optional[exp.Expression] 2223 ) -> t.Optional[exp.Expression]: 2224 if isinstance(this, self.MODIFIABLES): 2225 for join in iter(self._parse_join, None): 2226 this.append("joins", join) 2227 for lateral in iter(self._parse_lateral, None): 2228 this.append("laterals", lateral) 2229 2230 while True: 2231 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2232 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2233 key, expression = parser(self) 2234 2235 if expression: 2236 this.set(key, expression) 2237 if key == "limit": 2238 offset = expression.args.pop("offset", None) 2239 if offset: 2240 this.set("offset", exp.Offset(expression=offset)) 2241 continue 2242 break 2243 return this 2244 2245 def _parse_hint(self) -> t.Optional[exp.Hint]: 2246 if self._match(TokenType.HINT): 2247 hints = [] 2248 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2249 hints.extend(hint) 2250 2251 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2252 self.raise_error("Expected */ after HINT") 2253 2254 return self.expression(exp.Hint, expressions=hints) 2255 2256 return None 2257 2258 def _parse_into(self) -> t.Optional[exp.Into]: 2259 if not self._match(TokenType.INTO): 2260 return None 2261 2262 temp = self._match(TokenType.TEMPORARY) 2263 unlogged = self._match_text_seq("UNLOGGED") 2264 self._match(TokenType.TABLE) 2265 2266 return self.expression( 2267 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2268 ) 2269 2270 def _parse_from( 2271 self, joins: bool = False, skip_from_token: bool = False 2272 ) -> t.Optional[exp.From]: 2273 if not skip_from_token and not self._match(TokenType.FROM): 2274 return None 2275 2276 return self.expression( 2277 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2278 ) 2279 2280 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2281 if not self._match(TokenType.MATCH_RECOGNIZE): 2282 return None 2283 2284 self._match_l_paren() 2285 2286 partition = self._parse_partition_by() 2287 order = self._parse_order() 2288 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2289 2290 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2291 rows = exp.var("ONE ROW PER MATCH") 2292 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2293 text = "ALL ROWS PER MATCH" 2294 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2295 text += f" SHOW EMPTY MATCHES" 2296 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2297 text += f" OMIT EMPTY MATCHES" 2298 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2299 text += f" WITH UNMATCHED ROWS" 2300 rows = exp.var(text) 2301 else: 2302 rows = None 2303 2304 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2305 text = "AFTER MATCH SKIP" 2306 if self._match_text_seq("PAST", "LAST", "ROW"): 2307 text += f" PAST LAST ROW" 2308 elif self._match_text_seq("TO", "NEXT", "ROW"): 2309 text += f" TO NEXT ROW" 2310 elif self._match_text_seq("TO", "FIRST"): 2311 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2312 elif self._match_text_seq("TO", "LAST"): 2313 text += f" TO LAST {self._advance_any().text}" # type: ignore 2314 after = exp.var(text) 2315 else: 2316 after = None 2317 2318 if self._match_text_seq("PATTERN"): 2319 self._match_l_paren() 2320 2321 if not self._curr: 2322 self.raise_error("Expecting )", self._curr) 2323 2324 paren = 1 2325 start = self._curr 2326 2327 while self._curr and paren > 0: 2328 if self._curr.token_type == TokenType.L_PAREN: 2329 paren += 1 2330 if self._curr.token_type == TokenType.R_PAREN: 2331 paren -= 1 2332 2333 end = self._prev 2334 self._advance() 2335 2336 if paren > 0: 2337 self.raise_error("Expecting )", self._curr) 2338 2339 pattern = exp.var(self._find_sql(start, end)) 2340 else: 2341 pattern = None 2342 2343 define = ( 2344 self._parse_csv( 2345 lambda: self.expression( 2346 exp.Alias, 2347 alias=self._parse_id_var(any_token=True), 2348 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2349 ) 2350 ) 2351 if self._match_text_seq("DEFINE") 2352 else None 2353 ) 2354 2355 self._match_r_paren() 2356 2357 return self.expression( 2358 exp.MatchRecognize, 2359 partition_by=partition, 2360 order=order, 2361 measures=measures, 2362 rows=rows, 2363 after=after, 2364 pattern=pattern, 2365 define=define, 2366 alias=self._parse_table_alias(), 2367 ) 2368 2369 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2370 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2371 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2372 2373 if outer_apply or cross_apply: 2374 this = self._parse_select(table=True) 2375 view = None 2376 outer = not cross_apply 2377 elif self._match(TokenType.LATERAL): 2378 this = self._parse_select(table=True) 2379 view = self._match(TokenType.VIEW) 2380 outer = self._match(TokenType.OUTER) 2381 else: 2382 return None 2383 2384 if not this: 2385 this = ( 2386 self._parse_unnest() 2387 or self._parse_function() 2388 or self._parse_id_var(any_token=False) 2389 ) 2390 2391 while self._match(TokenType.DOT): 2392 this = exp.Dot( 2393 this=this, 2394 expression=self._parse_function() or self._parse_id_var(any_token=False), 2395 ) 2396 2397 if view: 2398 table = self._parse_id_var(any_token=False) 2399 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2400 table_alias: t.Optional[exp.TableAlias] = self.expression( 2401 exp.TableAlias, this=table, columns=columns 2402 ) 2403 elif isinstance(this, exp.Subquery) and this.alias: 2404 # Ensures parity between the Subquery's and the Lateral's "alias" args 2405 table_alias = this.args["alias"].copy() 2406 else: 2407 table_alias = self._parse_table_alias() 2408 2409 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2410 2411 def _parse_join_parts( 2412 self, 2413 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2414 return ( 2415 self._match_set(self.JOIN_METHODS) and self._prev, 2416 self._match_set(self.JOIN_SIDES) and self._prev, 2417 self._match_set(self.JOIN_KINDS) and self._prev, 2418 ) 2419 2420 def _parse_join( 2421 self, skip_join_token: bool = False, parse_bracket: bool = False 2422 ) -> t.Optional[exp.Join]: 2423 if self._match(TokenType.COMMA): 2424 return self.expression(exp.Join, this=self._parse_table()) 2425 2426 index = self._index 2427 method, side, kind = self._parse_join_parts() 2428 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2429 join = self._match(TokenType.JOIN) 2430 2431 if not skip_join_token and not join: 2432 self._retreat(index) 2433 kind = None 2434 method = None 2435 side = None 2436 2437 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2438 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2439 2440 if not skip_join_token and not join and not outer_apply and not cross_apply: 2441 return None 2442 2443 if outer_apply: 2444 side = Token(TokenType.LEFT, "LEFT") 2445 2446 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2447 2448 if method: 2449 kwargs["method"] = method.text 2450 if side: 2451 kwargs["side"] = side.text 2452 if kind: 2453 kwargs["kind"] = kind.text 2454 if hint: 2455 kwargs["hint"] = hint 2456 2457 if self._match(TokenType.ON): 2458 kwargs["on"] = self._parse_conjunction() 2459 elif self._match(TokenType.USING): 2460 kwargs["using"] = self._parse_wrapped_id_vars() 2461 elif not (kind and kind.token_type == TokenType.CROSS): 2462 index = self._index 2463 joins = self._parse_joins() 2464 2465 if joins and self._match(TokenType.ON): 2466 kwargs["on"] = self._parse_conjunction() 2467 elif joins and self._match(TokenType.USING): 2468 kwargs["using"] = self._parse_wrapped_id_vars() 2469 else: 2470 joins = None 2471 self._retreat(index) 2472 2473 kwargs["this"].set("joins", joins) 2474 2475 comments = [c for token in (method, side, kind) if token for c in token.comments] 2476 return self.expression(exp.Join, comments=comments, **kwargs) 2477 2478 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2479 this = self._parse_conjunction() 2480 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2481 return this 2482 2483 opclass = self._parse_var(any_token=True) 2484 if opclass: 2485 return self.expression(exp.Opclass, this=this, expression=opclass) 2486 2487 return this 2488 2489 def _parse_index( 2490 self, 2491 index: t.Optional[exp.Expression] = None, 2492 ) -> t.Optional[exp.Index]: 2493 if index: 2494 unique = None 2495 primary = None 2496 amp = None 2497 2498 self._match(TokenType.ON) 2499 self._match(TokenType.TABLE) # hive 2500 table = self._parse_table_parts(schema=True) 2501 else: 2502 unique = self._match(TokenType.UNIQUE) 2503 primary = self._match_text_seq("PRIMARY") 2504 amp = self._match_text_seq("AMP") 2505 2506 if not self._match(TokenType.INDEX): 2507 return None 2508 2509 index = self._parse_id_var() 2510 table = None 2511 2512 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2513 2514 if self._match(TokenType.L_PAREN, advance=False): 2515 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2516 else: 2517 columns = None 2518 2519 return self.expression( 2520 exp.Index, 2521 this=index, 2522 table=table, 2523 using=using, 2524 columns=columns, 2525 unique=unique, 2526 primary=primary, 2527 amp=amp, 2528 partition_by=self._parse_partition_by(), 2529 where=self._parse_where(), 2530 ) 2531 2532 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2533 hints: t.List[exp.Expression] = [] 2534 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2535 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2536 hints.append( 2537 self.expression( 2538 exp.WithTableHint, 2539 expressions=self._parse_csv( 2540 lambda: self._parse_function() or self._parse_var(any_token=True) 2541 ), 2542 ) 2543 ) 2544 self._match_r_paren() 2545 else: 2546 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2547 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2548 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2549 2550 self._match_texts({"INDEX", "KEY"}) 2551 if self._match(TokenType.FOR): 2552 hint.set("target", self._advance_any() and self._prev.text.upper()) 2553 2554 hint.set("expressions", self._parse_wrapped_id_vars()) 2555 hints.append(hint) 2556 2557 return hints or None 2558 2559 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2560 return ( 2561 (not schema and self._parse_function(optional_parens=False)) 2562 or self._parse_id_var(any_token=False) 2563 or self._parse_string_as_identifier() 2564 or self._parse_placeholder() 2565 ) 2566 2567 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2568 catalog = None 2569 db = None 2570 table = self._parse_table_part(schema=schema) 2571 2572 while self._match(TokenType.DOT): 2573 if catalog: 2574 # This allows nesting the table in arbitrarily many dot expressions if needed 2575 table = self.expression( 2576 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2577 ) 2578 else: 2579 catalog = db 2580 db = table 2581 table = self._parse_table_part(schema=schema) 2582 2583 if not table: 2584 self.raise_error(f"Expected table name but got {self._curr}") 2585 2586 return self.expression( 2587 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2588 ) 2589 2590 def _parse_table( 2591 self, 2592 schema: bool = False, 2593 joins: bool = False, 2594 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2595 parse_bracket: bool = False, 2596 ) -> t.Optional[exp.Expression]: 2597 lateral = self._parse_lateral() 2598 if lateral: 2599 return lateral 2600 2601 unnest = self._parse_unnest() 2602 if unnest: 2603 return unnest 2604 2605 values = self._parse_derived_table_values() 2606 if values: 2607 return values 2608 2609 subquery = self._parse_select(table=True) 2610 if subquery: 2611 if not subquery.args.get("pivots"): 2612 subquery.set("pivots", self._parse_pivots()) 2613 return subquery 2614 2615 bracket = parse_bracket and self._parse_bracket(None) 2616 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2617 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2618 2619 if schema: 2620 return self._parse_schema(this=this) 2621 2622 version = self._parse_version() 2623 2624 if version: 2625 this.set("version", version) 2626 2627 if self.ALIAS_POST_TABLESAMPLE: 2628 table_sample = self._parse_table_sample() 2629 2630 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2631 if alias: 2632 this.set("alias", alias) 2633 2634 this.set("hints", self._parse_table_hints()) 2635 2636 if not this.args.get("pivots"): 2637 this.set("pivots", self._parse_pivots()) 2638 2639 if not self.ALIAS_POST_TABLESAMPLE: 2640 table_sample = self._parse_table_sample() 2641 2642 if table_sample: 2643 table_sample.set("this", this) 2644 this = table_sample 2645 2646 if joins: 2647 for join in iter(self._parse_join, None): 2648 this.append("joins", join) 2649 2650 return this 2651 2652 def _parse_version(self) -> t.Optional[exp.Version]: 2653 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2654 this = "TIMESTAMP" 2655 elif self._match(TokenType.VERSION_SNAPSHOT): 2656 this = "VERSION" 2657 else: 2658 return None 2659 2660 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2661 kind = self._prev.text.upper() 2662 start = self._parse_bitwise() 2663 self._match_texts(("TO", "AND")) 2664 end = self._parse_bitwise() 2665 expression: t.Optional[exp.Expression] = self.expression( 2666 exp.Tuple, expressions=[start, end] 2667 ) 2668 elif self._match_text_seq("CONTAINED", "IN"): 2669 kind = "CONTAINED IN" 2670 expression = self.expression( 2671 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2672 ) 2673 elif self._match(TokenType.ALL): 2674 kind = "ALL" 2675 expression = None 2676 else: 2677 self._match_text_seq("AS", "OF") 2678 kind = "AS OF" 2679 expression = self._parse_type() 2680 2681 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2682 2683 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2684 if not self._match(TokenType.UNNEST): 2685 return None 2686 2687 expressions = self._parse_wrapped_csv(self._parse_type) 2688 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2689 2690 alias = self._parse_table_alias() if with_alias else None 2691 2692 if alias: 2693 if self.UNNEST_COLUMN_ONLY: 2694 if alias.args.get("columns"): 2695 self.raise_error("Unexpected extra column alias in unnest.") 2696 2697 alias.set("columns", [alias.this]) 2698 alias.set("this", None) 2699 2700 columns = alias.args.get("columns") or [] 2701 if offset and len(expressions) < len(columns): 2702 offset = columns.pop() 2703 2704 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2705 self._match(TokenType.ALIAS) 2706 offset = self._parse_id_var() or exp.to_identifier("offset") 2707 2708 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2709 2710 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2711 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2712 if not is_derived and not self._match(TokenType.VALUES): 2713 return None 2714 2715 expressions = self._parse_csv(self._parse_value) 2716 alias = self._parse_table_alias() 2717 2718 if is_derived: 2719 self._match_r_paren() 2720 2721 return self.expression( 2722 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2723 ) 2724 2725 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2726 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2727 as_modifier and self._match_text_seq("USING", "SAMPLE") 2728 ): 2729 return None 2730 2731 bucket_numerator = None 2732 bucket_denominator = None 2733 bucket_field = None 2734 percent = None 2735 rows = None 2736 size = None 2737 seed = None 2738 2739 kind = ( 2740 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2741 ) 2742 method = self._parse_var(tokens=(TokenType.ROW,)) 2743 2744 matched_l_paren = self._match(TokenType.L_PAREN) 2745 2746 if self.TABLESAMPLE_CSV: 2747 num = None 2748 expressions = self._parse_csv(self._parse_primary) 2749 else: 2750 expressions = None 2751 num = ( 2752 self._parse_factor() 2753 if self._match(TokenType.NUMBER, advance=False) 2754 else self._parse_primary() 2755 ) 2756 2757 if self._match_text_seq("BUCKET"): 2758 bucket_numerator = self._parse_number() 2759 self._match_text_seq("OUT", "OF") 2760 bucket_denominator = bucket_denominator = self._parse_number() 2761 self._match(TokenType.ON) 2762 bucket_field = self._parse_field() 2763 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2764 percent = num 2765 elif self._match(TokenType.ROWS): 2766 rows = num 2767 elif num: 2768 size = num 2769 2770 if matched_l_paren: 2771 self._match_r_paren() 2772 2773 if self._match(TokenType.L_PAREN): 2774 method = self._parse_var() 2775 seed = self._match(TokenType.COMMA) and self._parse_number() 2776 self._match_r_paren() 2777 elif self._match_texts(("SEED", "REPEATABLE")): 2778 seed = self._parse_wrapped(self._parse_number) 2779 2780 return self.expression( 2781 exp.TableSample, 2782 expressions=expressions, 2783 method=method, 2784 bucket_numerator=bucket_numerator, 2785 bucket_denominator=bucket_denominator, 2786 bucket_field=bucket_field, 2787 percent=percent, 2788 rows=rows, 2789 size=size, 2790 seed=seed, 2791 kind=kind, 2792 ) 2793 2794 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2795 return list(iter(self._parse_pivot, None)) or None 2796 2797 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2798 return list(iter(self._parse_join, None)) or None 2799 2800 # https://duckdb.org/docs/sql/statements/pivot 2801 def _parse_simplified_pivot(self) -> exp.Pivot: 2802 def _parse_on() -> t.Optional[exp.Expression]: 2803 this = self._parse_bitwise() 2804 return self._parse_in(this) if self._match(TokenType.IN) else this 2805 2806 this = self._parse_table() 2807 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2808 using = self._match(TokenType.USING) and self._parse_csv( 2809 lambda: self._parse_alias(self._parse_function()) 2810 ) 2811 group = self._parse_group() 2812 return self.expression( 2813 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2814 ) 2815 2816 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2817 index = self._index 2818 include_nulls = None 2819 2820 if self._match(TokenType.PIVOT): 2821 unpivot = False 2822 elif self._match(TokenType.UNPIVOT): 2823 unpivot = True 2824 2825 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2826 if self._match_text_seq("INCLUDE", "NULLS"): 2827 include_nulls = True 2828 elif self._match_text_seq("EXCLUDE", "NULLS"): 2829 include_nulls = False 2830 else: 2831 return None 2832 2833 expressions = [] 2834 field = None 2835 2836 if not self._match(TokenType.L_PAREN): 2837 self._retreat(index) 2838 return None 2839 2840 if unpivot: 2841 expressions = self._parse_csv(self._parse_column) 2842 else: 2843 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2844 2845 if not expressions: 2846 self.raise_error("Failed to parse PIVOT's aggregation list") 2847 2848 if not self._match(TokenType.FOR): 2849 self.raise_error("Expecting FOR") 2850 2851 value = self._parse_column() 2852 2853 if not self._match(TokenType.IN): 2854 self.raise_error("Expecting IN") 2855 2856 field = self._parse_in(value, alias=True) 2857 2858 self._match_r_paren() 2859 2860 pivot = self.expression( 2861 exp.Pivot, 2862 expressions=expressions, 2863 field=field, 2864 unpivot=unpivot, 2865 include_nulls=include_nulls, 2866 ) 2867 2868 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2869 pivot.set("alias", self._parse_table_alias()) 2870 2871 if not unpivot: 2872 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2873 2874 columns: t.List[exp.Expression] = [] 2875 for fld in pivot.args["field"].expressions: 2876 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2877 for name in names: 2878 if self.PREFIXED_PIVOT_COLUMNS: 2879 name = f"{name}_{field_name}" if name else field_name 2880 else: 2881 name = f"{field_name}_{name}" if name else field_name 2882 2883 columns.append(exp.to_identifier(name)) 2884 2885 pivot.set("columns", columns) 2886 2887 return pivot 2888 2889 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2890 return [agg.alias for agg in aggregations] 2891 2892 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2893 if not skip_where_token and not self._match(TokenType.WHERE): 2894 return None 2895 2896 return self.expression( 2897 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2898 ) 2899 2900 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2901 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2902 return None 2903 2904 elements = defaultdict(list) 2905 2906 if self._match(TokenType.ALL): 2907 return self.expression(exp.Group, all=True) 2908 2909 while True: 2910 expressions = self._parse_csv(self._parse_conjunction) 2911 if expressions: 2912 elements["expressions"].extend(expressions) 2913 2914 grouping_sets = self._parse_grouping_sets() 2915 if grouping_sets: 2916 elements["grouping_sets"].extend(grouping_sets) 2917 2918 rollup = None 2919 cube = None 2920 totals = None 2921 2922 with_ = self._match(TokenType.WITH) 2923 if self._match(TokenType.ROLLUP): 2924 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2925 elements["rollup"].extend(ensure_list(rollup)) 2926 2927 if self._match(TokenType.CUBE): 2928 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2929 elements["cube"].extend(ensure_list(cube)) 2930 2931 if self._match_text_seq("TOTALS"): 2932 totals = True 2933 elements["totals"] = True # type: ignore 2934 2935 if not (grouping_sets or rollup or cube or totals): 2936 break 2937 2938 return self.expression(exp.Group, **elements) # type: ignore 2939 2940 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2941 if not self._match(TokenType.GROUPING_SETS): 2942 return None 2943 2944 return self._parse_wrapped_csv(self._parse_grouping_set) 2945 2946 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2947 if self._match(TokenType.L_PAREN): 2948 grouping_set = self._parse_csv(self._parse_column) 2949 self._match_r_paren() 2950 return self.expression(exp.Tuple, expressions=grouping_set) 2951 2952 return self._parse_column() 2953 2954 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2955 if not skip_having_token and not self._match(TokenType.HAVING): 2956 return None 2957 return self.expression(exp.Having, this=self._parse_conjunction()) 2958 2959 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2960 if not self._match(TokenType.QUALIFY): 2961 return None 2962 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2963 2964 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2965 if skip_start_token: 2966 start = None 2967 elif self._match(TokenType.START_WITH): 2968 start = self._parse_conjunction() 2969 else: 2970 return None 2971 2972 self._match(TokenType.CONNECT_BY) 2973 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2974 exp.Prior, this=self._parse_bitwise() 2975 ) 2976 connect = self._parse_conjunction() 2977 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2978 2979 if not start and self._match(TokenType.START_WITH): 2980 start = self._parse_conjunction() 2981 2982 return self.expression(exp.Connect, start=start, connect=connect) 2983 2984 def _parse_order( 2985 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2986 ) -> t.Optional[exp.Expression]: 2987 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2988 return this 2989 2990 return self.expression( 2991 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2992 ) 2993 2994 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2995 if not self._match(token): 2996 return None 2997 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2998 2999 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3000 this = parse_method() if parse_method else self._parse_conjunction() 3001 3002 asc = self._match(TokenType.ASC) 3003 desc = self._match(TokenType.DESC) or (asc and False) 3004 3005 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3006 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3007 3008 nulls_first = is_nulls_first or False 3009 explicitly_null_ordered = is_nulls_first or is_nulls_last 3010 3011 if ( 3012 not explicitly_null_ordered 3013 and ( 3014 (not desc and self.NULL_ORDERING == "nulls_are_small") 3015 or (desc and self.NULL_ORDERING != "nulls_are_small") 3016 ) 3017 and self.NULL_ORDERING != "nulls_are_last" 3018 ): 3019 nulls_first = True 3020 3021 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3022 3023 def _parse_limit( 3024 self, this: t.Optional[exp.Expression] = None, top: bool = False 3025 ) -> t.Optional[exp.Expression]: 3026 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3027 comments = self._prev_comments 3028 if top: 3029 limit_paren = self._match(TokenType.L_PAREN) 3030 expression = self._parse_number() 3031 3032 if limit_paren: 3033 self._match_r_paren() 3034 else: 3035 expression = self._parse_term() 3036 3037 if self._match(TokenType.COMMA): 3038 offset = expression 3039 expression = self._parse_term() 3040 else: 3041 offset = None 3042 3043 limit_exp = self.expression( 3044 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3045 ) 3046 3047 return limit_exp 3048 3049 if self._match(TokenType.FETCH): 3050 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3051 direction = self._prev.text if direction else "FIRST" 3052 3053 count = self._parse_field(tokens=self.FETCH_TOKENS) 3054 percent = self._match(TokenType.PERCENT) 3055 3056 self._match_set((TokenType.ROW, TokenType.ROWS)) 3057 3058 only = self._match_text_seq("ONLY") 3059 with_ties = self._match_text_seq("WITH", "TIES") 3060 3061 if only and with_ties: 3062 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3063 3064 return self.expression( 3065 exp.Fetch, 3066 direction=direction, 3067 count=count, 3068 percent=percent, 3069 with_ties=with_ties, 3070 ) 3071 3072 return this 3073 3074 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3075 if not self._match(TokenType.OFFSET): 3076 return this 3077 3078 count = self._parse_term() 3079 self._match_set((TokenType.ROW, TokenType.ROWS)) 3080 return self.expression(exp.Offset, this=this, expression=count) 3081 3082 def _parse_locks(self) -> t.List[exp.Lock]: 3083 locks = [] 3084 while True: 3085 if self._match_text_seq("FOR", "UPDATE"): 3086 update = True 3087 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3088 "LOCK", "IN", "SHARE", "MODE" 3089 ): 3090 update = False 3091 else: 3092 break 3093 3094 expressions = None 3095 if self._match_text_seq("OF"): 3096 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3097 3098 wait: t.Optional[bool | exp.Expression] = None 3099 if self._match_text_seq("NOWAIT"): 3100 wait = True 3101 elif self._match_text_seq("WAIT"): 3102 wait = self._parse_primary() 3103 elif self._match_text_seq("SKIP", "LOCKED"): 3104 wait = False 3105 3106 locks.append( 3107 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3108 ) 3109 3110 return locks 3111 3112 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3113 if not self._match_set(self.SET_OPERATIONS): 3114 return this 3115 3116 token_type = self._prev.token_type 3117 3118 if token_type == TokenType.UNION: 3119 expression = exp.Union 3120 elif token_type == TokenType.EXCEPT: 3121 expression = exp.Except 3122 else: 3123 expression = exp.Intersect 3124 3125 return self.expression( 3126 expression, 3127 this=this, 3128 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3129 by_name=self._match_text_seq("BY", "NAME"), 3130 expression=self._parse_set_operations(self._parse_select(nested=True)), 3131 ) 3132 3133 def _parse_expression(self) -> t.Optional[exp.Expression]: 3134 return self._parse_alias(self._parse_conjunction()) 3135 3136 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3137 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3138 3139 def _parse_equality(self) -> t.Optional[exp.Expression]: 3140 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3141 3142 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3143 return self._parse_tokens(self._parse_range, self.COMPARISON) 3144 3145 def _parse_range(self) -> t.Optional[exp.Expression]: 3146 this = self._parse_bitwise() 3147 negate = self._match(TokenType.NOT) 3148 3149 if self._match_set(self.RANGE_PARSERS): 3150 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3151 if not expression: 3152 return this 3153 3154 this = expression 3155 elif self._match(TokenType.ISNULL): 3156 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3157 3158 # Postgres supports ISNULL and NOTNULL for conditions. 3159 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3160 if self._match(TokenType.NOTNULL): 3161 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3162 this = self.expression(exp.Not, this=this) 3163 3164 if negate: 3165 this = self.expression(exp.Not, this=this) 3166 3167 if self._match(TokenType.IS): 3168 this = self._parse_is(this) 3169 3170 return this 3171 3172 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3173 index = self._index - 1 3174 negate = self._match(TokenType.NOT) 3175 3176 if self._match_text_seq("DISTINCT", "FROM"): 3177 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3178 return self.expression(klass, this=this, expression=self._parse_expression()) 3179 3180 expression = self._parse_null() or self._parse_boolean() 3181 if not expression: 3182 self._retreat(index) 3183 return None 3184 3185 this = self.expression(exp.Is, this=this, expression=expression) 3186 return self.expression(exp.Not, this=this) if negate else this 3187 3188 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3189 unnest = self._parse_unnest(with_alias=False) 3190 if unnest: 3191 this = self.expression(exp.In, this=this, unnest=unnest) 3192 elif self._match(TokenType.L_PAREN): 3193 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3194 3195 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3196 this = self.expression(exp.In, this=this, query=expressions[0]) 3197 else: 3198 this = self.expression(exp.In, this=this, expressions=expressions) 3199 3200 self._match_r_paren(this) 3201 else: 3202 this = self.expression(exp.In, this=this, field=self._parse_field()) 3203 3204 return this 3205 3206 def _parse_between(self, this: exp.Expression) -> exp.Between: 3207 low = self._parse_bitwise() 3208 self._match(TokenType.AND) 3209 high = self._parse_bitwise() 3210 return self.expression(exp.Between, this=this, low=low, high=high) 3211 3212 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3213 if not self._match(TokenType.ESCAPE): 3214 return this 3215 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3216 3217 def _parse_interval(self) -> t.Optional[exp.Interval]: 3218 index = self._index 3219 3220 if not self._match(TokenType.INTERVAL): 3221 return None 3222 3223 if self._match(TokenType.STRING, advance=False): 3224 this = self._parse_primary() 3225 else: 3226 this = self._parse_term() 3227 3228 if not this: 3229 self._retreat(index) 3230 return None 3231 3232 unit = self._parse_function() or self._parse_var(any_token=True) 3233 3234 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3235 # each INTERVAL expression into this canonical form so it's easy to transpile 3236 if this and this.is_number: 3237 this = exp.Literal.string(this.name) 3238 elif this and this.is_string: 3239 parts = this.name.split() 3240 3241 if len(parts) == 2: 3242 if unit: 3243 # This is not actually a unit, it's something else (e.g. a "window side") 3244 unit = None 3245 self._retreat(self._index - 1) 3246 3247 this = exp.Literal.string(parts[0]) 3248 unit = self.expression(exp.Var, this=parts[1]) 3249 3250 return self.expression(exp.Interval, this=this, unit=unit) 3251 3252 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3253 this = self._parse_term() 3254 3255 while True: 3256 if self._match_set(self.BITWISE): 3257 this = self.expression( 3258 self.BITWISE[self._prev.token_type], 3259 this=this, 3260 expression=self._parse_term(), 3261 ) 3262 elif self._match(TokenType.DQMARK): 3263 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3264 elif self._match_pair(TokenType.LT, TokenType.LT): 3265 this = self.expression( 3266 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3267 ) 3268 elif self._match_pair(TokenType.GT, TokenType.GT): 3269 this = self.expression( 3270 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3271 ) 3272 else: 3273 break 3274 3275 return this 3276 3277 def _parse_term(self) -> t.Optional[exp.Expression]: 3278 return self._parse_tokens(self._parse_factor, self.TERM) 3279 3280 def _parse_factor(self) -> t.Optional[exp.Expression]: 3281 return self._parse_tokens(self._parse_unary, self.FACTOR) 3282 3283 def _parse_unary(self) -> t.Optional[exp.Expression]: 3284 if self._match_set(self.UNARY_PARSERS): 3285 return self.UNARY_PARSERS[self._prev.token_type](self) 3286 return self._parse_at_time_zone(self._parse_type()) 3287 3288 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3289 interval = parse_interval and self._parse_interval() 3290 if interval: 3291 return interval 3292 3293 index = self._index 3294 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3295 this = self._parse_column() 3296 3297 if data_type: 3298 if isinstance(this, exp.Literal): 3299 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3300 if parser: 3301 return parser(self, this, data_type) 3302 return self.expression(exp.Cast, this=this, to=data_type) 3303 if not data_type.expressions: 3304 self._retreat(index) 3305 return self._parse_column() 3306 return self._parse_column_ops(data_type) 3307 3308 return this and self._parse_column_ops(this) 3309 3310 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3311 this = self._parse_type() 3312 if not this: 3313 return None 3314 3315 return self.expression( 3316 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3317 ) 3318 3319 def _parse_types( 3320 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3321 ) -> t.Optional[exp.Expression]: 3322 index = self._index 3323 3324 prefix = self._match_text_seq("SYSUDTLIB", ".") 3325 3326 if not self._match_set(self.TYPE_TOKENS): 3327 identifier = allow_identifiers and self._parse_id_var( 3328 any_token=False, tokens=(TokenType.VAR,) 3329 ) 3330 3331 if identifier: 3332 tokens = self._tokenizer.tokenize(identifier.name) 3333 3334 if len(tokens) != 1: 3335 self.raise_error("Unexpected identifier", self._prev) 3336 3337 if tokens[0].token_type in self.TYPE_TOKENS: 3338 self._prev = tokens[0] 3339 elif self.SUPPORTS_USER_DEFINED_TYPES: 3340 type_name = identifier.name 3341 3342 while self._match(TokenType.DOT): 3343 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3344 3345 return exp.DataType.build(type_name, udt=True) 3346 else: 3347 return None 3348 else: 3349 return None 3350 3351 type_token = self._prev.token_type 3352 3353 if type_token == TokenType.PSEUDO_TYPE: 3354 return self.expression(exp.PseudoType, this=self._prev.text) 3355 3356 if type_token == TokenType.OBJECT_IDENTIFIER: 3357 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3358 3359 nested = type_token in self.NESTED_TYPE_TOKENS 3360 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3361 expressions = None 3362 maybe_func = False 3363 3364 if self._match(TokenType.L_PAREN): 3365 if is_struct: 3366 expressions = self._parse_csv(self._parse_struct_types) 3367 elif nested: 3368 expressions = self._parse_csv( 3369 lambda: self._parse_types( 3370 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3371 ) 3372 ) 3373 elif type_token in self.ENUM_TYPE_TOKENS: 3374 expressions = self._parse_csv(self._parse_equality) 3375 else: 3376 expressions = self._parse_csv(self._parse_type_size) 3377 3378 if not expressions or not self._match(TokenType.R_PAREN): 3379 self._retreat(index) 3380 return None 3381 3382 maybe_func = True 3383 3384 this: t.Optional[exp.Expression] = None 3385 values: t.Optional[t.List[exp.Expression]] = None 3386 3387 if nested and self._match(TokenType.LT): 3388 if is_struct: 3389 expressions = self._parse_csv(self._parse_struct_types) 3390 else: 3391 expressions = self._parse_csv( 3392 lambda: self._parse_types( 3393 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3394 ) 3395 ) 3396 3397 if not self._match(TokenType.GT): 3398 self.raise_error("Expecting >") 3399 3400 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3401 values = self._parse_csv(self._parse_conjunction) 3402 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3403 3404 if type_token in self.TIMESTAMPS: 3405 if self._match_text_seq("WITH", "TIME", "ZONE"): 3406 maybe_func = False 3407 tz_type = ( 3408 exp.DataType.Type.TIMETZ 3409 if type_token in self.TIMES 3410 else exp.DataType.Type.TIMESTAMPTZ 3411 ) 3412 this = exp.DataType(this=tz_type, expressions=expressions) 3413 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3414 maybe_func = False 3415 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3416 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3417 maybe_func = False 3418 elif type_token == TokenType.INTERVAL: 3419 unit = self._parse_var() 3420 3421 if self._match_text_seq("TO"): 3422 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3423 else: 3424 span = None 3425 3426 if span or not unit: 3427 this = self.expression( 3428 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3429 ) 3430 else: 3431 this = self.expression(exp.Interval, unit=unit) 3432 3433 if maybe_func and check_func: 3434 index2 = self._index 3435 peek = self._parse_string() 3436 3437 if not peek: 3438 self._retreat(index) 3439 return None 3440 3441 self._retreat(index2) 3442 3443 if not this: 3444 if self._match_text_seq("UNSIGNED"): 3445 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3446 if not unsigned_type_token: 3447 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3448 3449 type_token = unsigned_type_token or type_token 3450 3451 this = exp.DataType( 3452 this=exp.DataType.Type[type_token.value], 3453 expressions=expressions, 3454 nested=nested, 3455 values=values, 3456 prefix=prefix, 3457 ) 3458 3459 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3460 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3461 3462 return this 3463 3464 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3465 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3466 self._match(TokenType.COLON) 3467 return self._parse_column_def(this) 3468 3469 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3470 if not self._match_text_seq("AT", "TIME", "ZONE"): 3471 return this 3472 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3473 3474 def _parse_column(self) -> t.Optional[exp.Expression]: 3475 this = self._parse_field() 3476 if isinstance(this, exp.Identifier): 3477 this = self.expression(exp.Column, this=this) 3478 elif not this: 3479 return self._parse_bracket(this) 3480 return self._parse_column_ops(this) 3481 3482 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3483 this = self._parse_bracket(this) 3484 3485 while self._match_set(self.COLUMN_OPERATORS): 3486 op_token = self._prev.token_type 3487 op = self.COLUMN_OPERATORS.get(op_token) 3488 3489 if op_token == TokenType.DCOLON: 3490 field = self._parse_types() 3491 if not field: 3492 self.raise_error("Expected type") 3493 elif op and self._curr: 3494 self._advance() 3495 value = self._prev.text 3496 field = ( 3497 exp.Literal.number(value) 3498 if self._prev.token_type == TokenType.NUMBER 3499 else exp.Literal.string(value) 3500 ) 3501 else: 3502 field = self._parse_field(anonymous_func=True, any_token=True) 3503 3504 if isinstance(field, exp.Func): 3505 # bigquery allows function calls like x.y.count(...) 3506 # SAFE.SUBSTR(...) 3507 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3508 this = self._replace_columns_with_dots(this) 3509 3510 if op: 3511 this = op(self, this, field) 3512 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3513 this = self.expression( 3514 exp.Column, 3515 this=field, 3516 table=this.this, 3517 db=this.args.get("table"), 3518 catalog=this.args.get("db"), 3519 ) 3520 else: 3521 this = self.expression(exp.Dot, this=this, expression=field) 3522 this = self._parse_bracket(this) 3523 return this 3524 3525 def _parse_primary(self) -> t.Optional[exp.Expression]: 3526 if self._match_set(self.PRIMARY_PARSERS): 3527 token_type = self._prev.token_type 3528 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3529 3530 if token_type == TokenType.STRING: 3531 expressions = [primary] 3532 while self._match(TokenType.STRING): 3533 expressions.append(exp.Literal.string(self._prev.text)) 3534 3535 if len(expressions) > 1: 3536 return self.expression(exp.Concat, expressions=expressions) 3537 3538 return primary 3539 3540 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3541 return exp.Literal.number(f"0.{self._prev.text}") 3542 3543 if self._match(TokenType.L_PAREN): 3544 comments = self._prev_comments 3545 query = self._parse_select() 3546 3547 if query: 3548 expressions = [query] 3549 else: 3550 expressions = self._parse_expressions() 3551 3552 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3553 3554 if isinstance(this, exp.Subqueryable): 3555 this = self._parse_set_operations( 3556 self._parse_subquery(this=this, parse_alias=False) 3557 ) 3558 elif len(expressions) > 1: 3559 this = self.expression(exp.Tuple, expressions=expressions) 3560 else: 3561 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3562 3563 if this: 3564 this.add_comments(comments) 3565 3566 self._match_r_paren(expression=this) 3567 return this 3568 3569 return None 3570 3571 def _parse_field( 3572 self, 3573 any_token: bool = False, 3574 tokens: t.Optional[t.Collection[TokenType]] = None, 3575 anonymous_func: bool = False, 3576 ) -> t.Optional[exp.Expression]: 3577 return ( 3578 self._parse_primary() 3579 or self._parse_function(anonymous=anonymous_func) 3580 or self._parse_id_var(any_token=any_token, tokens=tokens) 3581 ) 3582 3583 def _parse_function( 3584 self, 3585 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3586 anonymous: bool = False, 3587 optional_parens: bool = True, 3588 ) -> t.Optional[exp.Expression]: 3589 if not self._curr: 3590 return None 3591 3592 token_type = self._curr.token_type 3593 this = self._curr.text 3594 upper = this.upper() 3595 3596 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3597 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3598 self._advance() 3599 return parser(self) 3600 3601 if not self._next or self._next.token_type != TokenType.L_PAREN: 3602 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3603 self._advance() 3604 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3605 3606 return None 3607 3608 if token_type not in self.FUNC_TOKENS: 3609 return None 3610 3611 self._advance(2) 3612 3613 parser = self.FUNCTION_PARSERS.get(upper) 3614 if parser and not anonymous: 3615 this = parser(self) 3616 else: 3617 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3618 3619 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3620 this = self.expression(subquery_predicate, this=self._parse_select()) 3621 self._match_r_paren() 3622 return this 3623 3624 if functions is None: 3625 functions = self.FUNCTIONS 3626 3627 function = functions.get(upper) 3628 3629 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3630 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3631 3632 if function and not anonymous: 3633 func = self.validate_expression(function(args), args) 3634 if not self.NORMALIZE_FUNCTIONS: 3635 func.meta["name"] = this 3636 this = func 3637 else: 3638 this = self.expression(exp.Anonymous, this=this, expressions=args) 3639 3640 self._match_r_paren(this) 3641 return self._parse_window(this) 3642 3643 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3644 return self._parse_column_def(self._parse_id_var()) 3645 3646 def _parse_user_defined_function( 3647 self, kind: t.Optional[TokenType] = None 3648 ) -> t.Optional[exp.Expression]: 3649 this = self._parse_id_var() 3650 3651 while self._match(TokenType.DOT): 3652 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3653 3654 if not self._match(TokenType.L_PAREN): 3655 return this 3656 3657 expressions = self._parse_csv(self._parse_function_parameter) 3658 self._match_r_paren() 3659 return self.expression( 3660 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3661 ) 3662 3663 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3664 literal = self._parse_primary() 3665 if literal: 3666 return self.expression(exp.Introducer, this=token.text, expression=literal) 3667 3668 return self.expression(exp.Identifier, this=token.text) 3669 3670 def _parse_session_parameter(self) -> exp.SessionParameter: 3671 kind = None 3672 this = self._parse_id_var() or self._parse_primary() 3673 3674 if this and self._match(TokenType.DOT): 3675 kind = this.name 3676 this = self._parse_var() or self._parse_primary() 3677 3678 return self.expression(exp.SessionParameter, this=this, kind=kind) 3679 3680 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3681 index = self._index 3682 3683 if self._match(TokenType.L_PAREN): 3684 expressions = t.cast( 3685 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3686 ) 3687 3688 if not self._match(TokenType.R_PAREN): 3689 self._retreat(index) 3690 else: 3691 expressions = [self._parse_id_var()] 3692 3693 if self._match_set(self.LAMBDAS): 3694 return self.LAMBDAS[self._prev.token_type](self, expressions) 3695 3696 self._retreat(index) 3697 3698 this: t.Optional[exp.Expression] 3699 3700 if self._match(TokenType.DISTINCT): 3701 this = self.expression( 3702 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3703 ) 3704 else: 3705 this = self._parse_select_or_expression(alias=alias) 3706 3707 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3708 3709 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3710 index = self._index 3711 3712 if not self.errors: 3713 try: 3714 if self._parse_select(nested=True): 3715 return this 3716 except ParseError: 3717 pass 3718 finally: 3719 self.errors.clear() 3720 self._retreat(index) 3721 3722 if not self._match(TokenType.L_PAREN): 3723 return this 3724 3725 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3726 3727 self._match_r_paren() 3728 return self.expression(exp.Schema, this=this, expressions=args) 3729 3730 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3731 return self._parse_column_def(self._parse_field(any_token=True)) 3732 3733 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3734 # column defs are not really columns, they're identifiers 3735 if isinstance(this, exp.Column): 3736 this = this.this 3737 3738 kind = self._parse_types(schema=True) 3739 3740 if self._match_text_seq("FOR", "ORDINALITY"): 3741 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3742 3743 constraints: t.List[exp.Expression] = [] 3744 3745 if not kind and self._match(TokenType.ALIAS): 3746 constraints.append( 3747 self.expression( 3748 exp.ComputedColumnConstraint, 3749 this=self._parse_conjunction(), 3750 persisted=self._match_text_seq("PERSISTED"), 3751 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3752 ) 3753 ) 3754 3755 while True: 3756 constraint = self._parse_column_constraint() 3757 if not constraint: 3758 break 3759 constraints.append(constraint) 3760 3761 if not kind and not constraints: 3762 return this 3763 3764 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3765 3766 def _parse_auto_increment( 3767 self, 3768 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3769 start = None 3770 increment = None 3771 3772 if self._match(TokenType.L_PAREN, advance=False): 3773 args = self._parse_wrapped_csv(self._parse_bitwise) 3774 start = seq_get(args, 0) 3775 increment = seq_get(args, 1) 3776 elif self._match_text_seq("START"): 3777 start = self._parse_bitwise() 3778 self._match_text_seq("INCREMENT") 3779 increment = self._parse_bitwise() 3780 3781 if start and increment: 3782 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3783 3784 return exp.AutoIncrementColumnConstraint() 3785 3786 def _parse_compress(self) -> exp.CompressColumnConstraint: 3787 if self._match(TokenType.L_PAREN, advance=False): 3788 return self.expression( 3789 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3790 ) 3791 3792 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3793 3794 def _parse_generated_as_identity( 3795 self, 3796 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3797 if self._match_text_seq("BY", "DEFAULT"): 3798 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3799 this = self.expression( 3800 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3801 ) 3802 else: 3803 self._match_text_seq("ALWAYS") 3804 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3805 3806 self._match(TokenType.ALIAS) 3807 identity = self._match_text_seq("IDENTITY") 3808 3809 if self._match(TokenType.L_PAREN): 3810 if self._match(TokenType.START_WITH): 3811 this.set("start", self._parse_bitwise()) 3812 if self._match_text_seq("INCREMENT", "BY"): 3813 this.set("increment", self._parse_bitwise()) 3814 if self._match_text_seq("MINVALUE"): 3815 this.set("minvalue", self._parse_bitwise()) 3816 if self._match_text_seq("MAXVALUE"): 3817 this.set("maxvalue", self._parse_bitwise()) 3818 3819 if self._match_text_seq("CYCLE"): 3820 this.set("cycle", True) 3821 elif self._match_text_seq("NO", "CYCLE"): 3822 this.set("cycle", False) 3823 3824 if not identity: 3825 this.set("expression", self._parse_bitwise()) 3826 3827 self._match_r_paren() 3828 3829 return this 3830 3831 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3832 self._match_text_seq("LENGTH") 3833 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3834 3835 def _parse_not_constraint( 3836 self, 3837 ) -> t.Optional[exp.Expression]: 3838 if self._match_text_seq("NULL"): 3839 return self.expression(exp.NotNullColumnConstraint) 3840 if self._match_text_seq("CASESPECIFIC"): 3841 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3842 if self._match_text_seq("FOR", "REPLICATION"): 3843 return self.expression(exp.NotForReplicationColumnConstraint) 3844 return None 3845 3846 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3847 if self._match(TokenType.CONSTRAINT): 3848 this = self._parse_id_var() 3849 else: 3850 this = None 3851 3852 if self._match_texts(self.CONSTRAINT_PARSERS): 3853 return self.expression( 3854 exp.ColumnConstraint, 3855 this=this, 3856 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3857 ) 3858 3859 return this 3860 3861 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3862 if not self._match(TokenType.CONSTRAINT): 3863 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3864 3865 this = self._parse_id_var() 3866 expressions = [] 3867 3868 while True: 3869 constraint = self._parse_unnamed_constraint() or self._parse_function() 3870 if not constraint: 3871 break 3872 expressions.append(constraint) 3873 3874 return self.expression(exp.Constraint, this=this, expressions=expressions) 3875 3876 def _parse_unnamed_constraint( 3877 self, constraints: t.Optional[t.Collection[str]] = None 3878 ) -> t.Optional[exp.Expression]: 3879 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3880 return None 3881 3882 constraint = self._prev.text.upper() 3883 if constraint not in self.CONSTRAINT_PARSERS: 3884 self.raise_error(f"No parser found for schema constraint {constraint}.") 3885 3886 return self.CONSTRAINT_PARSERS[constraint](self) 3887 3888 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3889 self._match_text_seq("KEY") 3890 return self.expression( 3891 exp.UniqueColumnConstraint, 3892 this=self._parse_schema(self._parse_id_var(any_token=False)), 3893 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3894 ) 3895 3896 def _parse_key_constraint_options(self) -> t.List[str]: 3897 options = [] 3898 while True: 3899 if not self._curr: 3900 break 3901 3902 if self._match(TokenType.ON): 3903 action = None 3904 on = self._advance_any() and self._prev.text 3905 3906 if self._match_text_seq("NO", "ACTION"): 3907 action = "NO ACTION" 3908 elif self._match_text_seq("CASCADE"): 3909 action = "CASCADE" 3910 elif self._match_text_seq("RESTRICT"): 3911 action = "RESTRICT" 3912 elif self._match_pair(TokenType.SET, TokenType.NULL): 3913 action = "SET NULL" 3914 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3915 action = "SET DEFAULT" 3916 else: 3917 self.raise_error("Invalid key constraint") 3918 3919 options.append(f"ON {on} {action}") 3920 elif self._match_text_seq("NOT", "ENFORCED"): 3921 options.append("NOT ENFORCED") 3922 elif self._match_text_seq("DEFERRABLE"): 3923 options.append("DEFERRABLE") 3924 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3925 options.append("INITIALLY DEFERRED") 3926 elif self._match_text_seq("NORELY"): 3927 options.append("NORELY") 3928 elif self._match_text_seq("MATCH", "FULL"): 3929 options.append("MATCH FULL") 3930 else: 3931 break 3932 3933 return options 3934 3935 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3936 if match and not self._match(TokenType.REFERENCES): 3937 return None 3938 3939 expressions = None 3940 this = self._parse_table(schema=True) 3941 options = self._parse_key_constraint_options() 3942 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3943 3944 def _parse_foreign_key(self) -> exp.ForeignKey: 3945 expressions = self._parse_wrapped_id_vars() 3946 reference = self._parse_references() 3947 options = {} 3948 3949 while self._match(TokenType.ON): 3950 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3951 self.raise_error("Expected DELETE or UPDATE") 3952 3953 kind = self._prev.text.lower() 3954 3955 if self._match_text_seq("NO", "ACTION"): 3956 action = "NO ACTION" 3957 elif self._match(TokenType.SET): 3958 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3959 action = "SET " + self._prev.text.upper() 3960 else: 3961 self._advance() 3962 action = self._prev.text.upper() 3963 3964 options[kind] = action 3965 3966 return self.expression( 3967 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3968 ) 3969 3970 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3971 return self._parse_field() 3972 3973 def _parse_primary_key( 3974 self, wrapped_optional: bool = False, in_props: bool = False 3975 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3976 desc = ( 3977 self._match_set((TokenType.ASC, TokenType.DESC)) 3978 and self._prev.token_type == TokenType.DESC 3979 ) 3980 3981 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3982 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3983 3984 expressions = self._parse_wrapped_csv( 3985 self._parse_primary_key_part, optional=wrapped_optional 3986 ) 3987 options = self._parse_key_constraint_options() 3988 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3989 3990 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3991 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3992 return this 3993 3994 bracket_kind = self._prev.token_type 3995 3996 if self._match(TokenType.COLON): 3997 expressions: t.List[exp.Expression] = [ 3998 self.expression(exp.Slice, expression=self._parse_conjunction()) 3999 ] 4000 else: 4001 expressions = self._parse_csv( 4002 lambda: self._parse_slice( 4003 self._parse_alias(self._parse_conjunction(), explicit=True) 4004 ) 4005 ) 4006 4007 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4008 if bracket_kind == TokenType.L_BRACE: 4009 this = self.expression(exp.Struct, expressions=expressions) 4010 elif not this or this.name.upper() == "ARRAY": 4011 this = self.expression(exp.Array, expressions=expressions) 4012 else: 4013 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4014 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4015 4016 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4017 self.raise_error("Expected ]") 4018 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4019 self.raise_error("Expected }") 4020 4021 self._add_comments(this) 4022 return self._parse_bracket(this) 4023 4024 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4025 if self._match(TokenType.COLON): 4026 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4027 return this 4028 4029 def _parse_case(self) -> t.Optional[exp.Expression]: 4030 ifs = [] 4031 default = None 4032 4033 comments = self._prev_comments 4034 expression = self._parse_conjunction() 4035 4036 while self._match(TokenType.WHEN): 4037 this = self._parse_conjunction() 4038 self._match(TokenType.THEN) 4039 then = self._parse_conjunction() 4040 ifs.append(self.expression(exp.If, this=this, true=then)) 4041 4042 if self._match(TokenType.ELSE): 4043 default = self._parse_conjunction() 4044 4045 if not self._match(TokenType.END): 4046 self.raise_error("Expected END after CASE", self._prev) 4047 4048 return self._parse_window( 4049 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4050 ) 4051 4052 def _parse_if(self) -> t.Optional[exp.Expression]: 4053 if self._match(TokenType.L_PAREN): 4054 args = self._parse_csv(self._parse_conjunction) 4055 this = self.validate_expression(exp.If.from_arg_list(args), args) 4056 self._match_r_paren() 4057 else: 4058 index = self._index - 1 4059 condition = self._parse_conjunction() 4060 4061 if not condition: 4062 self._retreat(index) 4063 return None 4064 4065 self._match(TokenType.THEN) 4066 true = self._parse_conjunction() 4067 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4068 self._match(TokenType.END) 4069 this = self.expression(exp.If, this=condition, true=true, false=false) 4070 4071 return self._parse_window(this) 4072 4073 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4074 if not self._match_text_seq("VALUE", "FOR"): 4075 self._retreat(self._index - 1) 4076 return None 4077 4078 return self.expression( 4079 exp.NextValueFor, 4080 this=self._parse_column(), 4081 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4082 ) 4083 4084 def _parse_extract(self) -> exp.Extract: 4085 this = self._parse_function() or self._parse_var() or self._parse_type() 4086 4087 if self._match(TokenType.FROM): 4088 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4089 4090 if not self._match(TokenType.COMMA): 4091 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4092 4093 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4094 4095 def _parse_any_value(self) -> exp.AnyValue: 4096 this = self._parse_lambda() 4097 is_max = None 4098 having = None 4099 4100 if self._match(TokenType.HAVING): 4101 self._match_texts(("MAX", "MIN")) 4102 is_max = self._prev.text == "MAX" 4103 having = self._parse_column() 4104 4105 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4106 4107 def _parse_cast(self, strict: bool) -> exp.Expression: 4108 this = self._parse_conjunction() 4109 4110 if not self._match(TokenType.ALIAS): 4111 if self._match(TokenType.COMMA): 4112 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4113 4114 self.raise_error("Expected AS after CAST") 4115 4116 fmt = None 4117 to = self._parse_types() 4118 4119 if not to: 4120 self.raise_error("Expected TYPE after CAST") 4121 elif isinstance(to, exp.Identifier): 4122 to = exp.DataType.build(to.name, udt=True) 4123 elif to.this == exp.DataType.Type.CHAR: 4124 if self._match(TokenType.CHARACTER_SET): 4125 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4126 elif self._match(TokenType.FORMAT): 4127 fmt_string = self._parse_string() 4128 fmt = self._parse_at_time_zone(fmt_string) 4129 4130 if to.this in exp.DataType.TEMPORAL_TYPES: 4131 this = self.expression( 4132 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4133 this=this, 4134 format=exp.Literal.string( 4135 format_time( 4136 fmt_string.this if fmt_string else "", 4137 self.FORMAT_MAPPING or self.TIME_MAPPING, 4138 self.FORMAT_TRIE or self.TIME_TRIE, 4139 ) 4140 ), 4141 ) 4142 4143 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4144 this.set("zone", fmt.args["zone"]) 4145 4146 return this 4147 4148 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4149 4150 def _parse_concat(self) -> t.Optional[exp.Expression]: 4151 args = self._parse_csv(self._parse_conjunction) 4152 if self.CONCAT_NULL_OUTPUTS_STRING: 4153 args = self._ensure_string_if_null(args) 4154 4155 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4156 # we find such a call we replace it with its argument. 4157 if len(args) == 1: 4158 return args[0] 4159 4160 return self.expression( 4161 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4162 ) 4163 4164 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4165 args = self._parse_csv(self._parse_conjunction) 4166 if len(args) < 2: 4167 return self.expression(exp.ConcatWs, expressions=args) 4168 delim, *values = args 4169 if self.CONCAT_NULL_OUTPUTS_STRING: 4170 values = self._ensure_string_if_null(values) 4171 4172 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4173 4174 def _parse_string_agg(self) -> exp.Expression: 4175 if self._match(TokenType.DISTINCT): 4176 args: t.List[t.Optional[exp.Expression]] = [ 4177 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4178 ] 4179 if self._match(TokenType.COMMA): 4180 args.extend(self._parse_csv(self._parse_conjunction)) 4181 else: 4182 args = self._parse_csv(self._parse_conjunction) # type: ignore 4183 4184 index = self._index 4185 if not self._match(TokenType.R_PAREN) and args: 4186 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4187 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4188 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4189 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4190 4191 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4192 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4193 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4194 if not self._match_text_seq("WITHIN", "GROUP"): 4195 self._retreat(index) 4196 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4197 4198 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4199 order = self._parse_order(this=seq_get(args, 0)) 4200 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4201 4202 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4203 this = self._parse_bitwise() 4204 4205 if self._match(TokenType.USING): 4206 to: t.Optional[exp.Expression] = self.expression( 4207 exp.CharacterSet, this=self._parse_var() 4208 ) 4209 elif self._match(TokenType.COMMA): 4210 to = self._parse_types() 4211 else: 4212 to = None 4213 4214 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4215 4216 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4217 """ 4218 There are generally two variants of the DECODE function: 4219 4220 - DECODE(bin, charset) 4221 - DECODE(expression, search, result [, search, result] ... [, default]) 4222 4223 The second variant will always be parsed into a CASE expression. Note that NULL 4224 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4225 instead of relying on pattern matching. 4226 """ 4227 args = self._parse_csv(self._parse_conjunction) 4228 4229 if len(args) < 3: 4230 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4231 4232 expression, *expressions = args 4233 if not expression: 4234 return None 4235 4236 ifs = [] 4237 for search, result in zip(expressions[::2], expressions[1::2]): 4238 if not search or not result: 4239 return None 4240 4241 if isinstance(search, exp.Literal): 4242 ifs.append( 4243 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4244 ) 4245 elif isinstance(search, exp.Null): 4246 ifs.append( 4247 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4248 ) 4249 else: 4250 cond = exp.or_( 4251 exp.EQ(this=expression.copy(), expression=search), 4252 exp.and_( 4253 exp.Is(this=expression.copy(), expression=exp.Null()), 4254 exp.Is(this=search.copy(), expression=exp.Null()), 4255 copy=False, 4256 ), 4257 copy=False, 4258 ) 4259 ifs.append(exp.If(this=cond, true=result)) 4260 4261 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4262 4263 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4264 self._match_text_seq("KEY") 4265 key = self._parse_column() 4266 self._match_set((TokenType.COLON, TokenType.COMMA)) 4267 self._match_text_seq("VALUE") 4268 value = self._parse_bitwise() 4269 4270 if not key and not value: 4271 return None 4272 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4273 4274 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4275 if not this or not self._match_text_seq("FORMAT", "JSON"): 4276 return this 4277 4278 return self.expression(exp.FormatJson, this=this) 4279 4280 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4281 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4282 for value in values: 4283 if self._match_text_seq(value, "ON", on): 4284 return f"{value} ON {on}" 4285 4286 return None 4287 4288 def _parse_json_object(self) -> exp.JSONObject: 4289 star = self._parse_star() 4290 expressions = ( 4291 [star] 4292 if star 4293 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4294 ) 4295 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4296 4297 unique_keys = None 4298 if self._match_text_seq("WITH", "UNIQUE"): 4299 unique_keys = True 4300 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4301 unique_keys = False 4302 4303 self._match_text_seq("KEYS") 4304 4305 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4306 self._parse_type() 4307 ) 4308 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4309 4310 return self.expression( 4311 exp.JSONObject, 4312 expressions=expressions, 4313 null_handling=null_handling, 4314 unique_keys=unique_keys, 4315 return_type=return_type, 4316 encoding=encoding, 4317 ) 4318 4319 def _parse_logarithm(self) -> exp.Func: 4320 # Default argument order is base, expression 4321 args = self._parse_csv(self._parse_range) 4322 4323 if len(args) > 1: 4324 if not self.LOG_BASE_FIRST: 4325 args.reverse() 4326 return exp.Log.from_arg_list(args) 4327 4328 return self.expression( 4329 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4330 ) 4331 4332 def _parse_match_against(self) -> exp.MatchAgainst: 4333 expressions = self._parse_csv(self._parse_column) 4334 4335 self._match_text_seq(")", "AGAINST", "(") 4336 4337 this = self._parse_string() 4338 4339 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4340 modifier = "IN NATURAL LANGUAGE MODE" 4341 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4342 modifier = f"{modifier} WITH QUERY EXPANSION" 4343 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4344 modifier = "IN BOOLEAN MODE" 4345 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4346 modifier = "WITH QUERY EXPANSION" 4347 else: 4348 modifier = None 4349 4350 return self.expression( 4351 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4352 ) 4353 4354 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4355 def _parse_open_json(self) -> exp.OpenJSON: 4356 this = self._parse_bitwise() 4357 path = self._match(TokenType.COMMA) and self._parse_string() 4358 4359 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4360 this = self._parse_field(any_token=True) 4361 kind = self._parse_types() 4362 path = self._parse_string() 4363 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4364 4365 return self.expression( 4366 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4367 ) 4368 4369 expressions = None 4370 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4371 self._match_l_paren() 4372 expressions = self._parse_csv(_parse_open_json_column_def) 4373 4374 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4375 4376 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4377 args = self._parse_csv(self._parse_bitwise) 4378 4379 if self._match(TokenType.IN): 4380 return self.expression( 4381 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4382 ) 4383 4384 if haystack_first: 4385 haystack = seq_get(args, 0) 4386 needle = seq_get(args, 1) 4387 else: 4388 needle = seq_get(args, 0) 4389 haystack = seq_get(args, 1) 4390 4391 return self.expression( 4392 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4393 ) 4394 4395 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4396 args = self._parse_csv(self._parse_table) 4397 return exp.JoinHint(this=func_name.upper(), expressions=args) 4398 4399 def _parse_substring(self) -> exp.Substring: 4400 # Postgres supports the form: substring(string [from int] [for int]) 4401 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4402 4403 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4404 4405 if self._match(TokenType.FROM): 4406 args.append(self._parse_bitwise()) 4407 if self._match(TokenType.FOR): 4408 args.append(self._parse_bitwise()) 4409 4410 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4411 4412 def _parse_trim(self) -> exp.Trim: 4413 # https://www.w3resource.com/sql/character-functions/trim.php 4414 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4415 4416 position = None 4417 collation = None 4418 expression = None 4419 4420 if self._match_texts(self.TRIM_TYPES): 4421 position = self._prev.text.upper() 4422 4423 this = self._parse_bitwise() 4424 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4425 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4426 expression = self._parse_bitwise() 4427 4428 if invert_order: 4429 this, expression = expression, this 4430 4431 if self._match(TokenType.COLLATE): 4432 collation = self._parse_bitwise() 4433 4434 return self.expression( 4435 exp.Trim, this=this, position=position, expression=expression, collation=collation 4436 ) 4437 4438 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4439 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4440 4441 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4442 return self._parse_window(self._parse_id_var(), alias=True) 4443 4444 def _parse_respect_or_ignore_nulls( 4445 self, this: t.Optional[exp.Expression] 4446 ) -> t.Optional[exp.Expression]: 4447 if self._match_text_seq("IGNORE", "NULLS"): 4448 return self.expression(exp.IgnoreNulls, this=this) 4449 if self._match_text_seq("RESPECT", "NULLS"): 4450 return self.expression(exp.RespectNulls, this=this) 4451 return this 4452 4453 def _parse_window( 4454 self, this: t.Optional[exp.Expression], alias: bool = False 4455 ) -> t.Optional[exp.Expression]: 4456 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4457 self._match(TokenType.WHERE) 4458 this = self.expression( 4459 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4460 ) 4461 self._match_r_paren() 4462 4463 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4464 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4465 if self._match_text_seq("WITHIN", "GROUP"): 4466 order = self._parse_wrapped(self._parse_order) 4467 this = self.expression(exp.WithinGroup, this=this, expression=order) 4468 4469 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4470 # Some dialects choose to implement and some do not. 4471 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4472 4473 # There is some code above in _parse_lambda that handles 4474 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4475 4476 # The below changes handle 4477 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4478 4479 # Oracle allows both formats 4480 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4481 # and Snowflake chose to do the same for familiarity 4482 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4483 this = self._parse_respect_or_ignore_nulls(this) 4484 4485 # bigquery select from window x AS (partition by ...) 4486 if alias: 4487 over = None 4488 self._match(TokenType.ALIAS) 4489 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4490 return this 4491 else: 4492 over = self._prev.text.upper() 4493 4494 if not self._match(TokenType.L_PAREN): 4495 return self.expression( 4496 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4497 ) 4498 4499 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4500 4501 first = self._match(TokenType.FIRST) 4502 if self._match_text_seq("LAST"): 4503 first = False 4504 4505 partition, order = self._parse_partition_and_order() 4506 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4507 4508 if kind: 4509 self._match(TokenType.BETWEEN) 4510 start = self._parse_window_spec() 4511 self._match(TokenType.AND) 4512 end = self._parse_window_spec() 4513 4514 spec = self.expression( 4515 exp.WindowSpec, 4516 kind=kind, 4517 start=start["value"], 4518 start_side=start["side"], 4519 end=end["value"], 4520 end_side=end["side"], 4521 ) 4522 else: 4523 spec = None 4524 4525 self._match_r_paren() 4526 4527 window = self.expression( 4528 exp.Window, 4529 this=this, 4530 partition_by=partition, 4531 order=order, 4532 spec=spec, 4533 alias=window_alias, 4534 over=over, 4535 first=first, 4536 ) 4537 4538 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4539 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4540 return self._parse_window(window, alias=alias) 4541 4542 return window 4543 4544 def _parse_partition_and_order( 4545 self, 4546 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4547 return self._parse_partition_by(), self._parse_order() 4548 4549 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4550 self._match(TokenType.BETWEEN) 4551 4552 return { 4553 "value": ( 4554 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4555 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4556 or self._parse_bitwise() 4557 ), 4558 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4559 } 4560 4561 def _parse_alias( 4562 self, this: t.Optional[exp.Expression], explicit: bool = False 4563 ) -> t.Optional[exp.Expression]: 4564 any_token = self._match(TokenType.ALIAS) 4565 4566 if explicit and not any_token: 4567 return this 4568 4569 if self._match(TokenType.L_PAREN): 4570 aliases = self.expression( 4571 exp.Aliases, 4572 this=this, 4573 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4574 ) 4575 self._match_r_paren(aliases) 4576 return aliases 4577 4578 alias = self._parse_id_var(any_token) 4579 4580 if alias: 4581 return self.expression(exp.Alias, this=this, alias=alias) 4582 4583 return this 4584 4585 def _parse_id_var( 4586 self, 4587 any_token: bool = True, 4588 tokens: t.Optional[t.Collection[TokenType]] = None, 4589 ) -> t.Optional[exp.Expression]: 4590 identifier = self._parse_identifier() 4591 4592 if identifier: 4593 return identifier 4594 4595 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4596 quoted = self._prev.token_type == TokenType.STRING 4597 return exp.Identifier(this=self._prev.text, quoted=quoted) 4598 4599 return None 4600 4601 def _parse_string(self) -> t.Optional[exp.Expression]: 4602 if self._match(TokenType.STRING): 4603 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4604 return self._parse_placeholder() 4605 4606 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4607 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4608 4609 def _parse_number(self) -> t.Optional[exp.Expression]: 4610 if self._match(TokenType.NUMBER): 4611 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4612 return self._parse_placeholder() 4613 4614 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4615 if self._match(TokenType.IDENTIFIER): 4616 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4617 return self._parse_placeholder() 4618 4619 def _parse_var( 4620 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4621 ) -> t.Optional[exp.Expression]: 4622 if ( 4623 (any_token and self._advance_any()) 4624 or self._match(TokenType.VAR) 4625 or (self._match_set(tokens) if tokens else False) 4626 ): 4627 return self.expression(exp.Var, this=self._prev.text) 4628 return self._parse_placeholder() 4629 4630 def _advance_any(self) -> t.Optional[Token]: 4631 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4632 self._advance() 4633 return self._prev 4634 return None 4635 4636 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4637 return self._parse_var() or self._parse_string() 4638 4639 def _parse_null(self) -> t.Optional[exp.Expression]: 4640 if self._match_set(self.NULL_TOKENS): 4641 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4642 return self._parse_placeholder() 4643 4644 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4645 if self._match(TokenType.TRUE): 4646 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4647 if self._match(TokenType.FALSE): 4648 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4649 return self._parse_placeholder() 4650 4651 def _parse_star(self) -> t.Optional[exp.Expression]: 4652 if self._match(TokenType.STAR): 4653 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4654 return self._parse_placeholder() 4655 4656 def _parse_parameter(self) -> exp.Parameter: 4657 wrapped = self._match(TokenType.L_BRACE) 4658 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4659 self._match(TokenType.R_BRACE) 4660 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4661 4662 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4663 if self._match_set(self.PLACEHOLDER_PARSERS): 4664 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4665 if placeholder: 4666 return placeholder 4667 self._advance(-1) 4668 return None 4669 4670 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4671 if not self._match(TokenType.EXCEPT): 4672 return None 4673 if self._match(TokenType.L_PAREN, advance=False): 4674 return self._parse_wrapped_csv(self._parse_column) 4675 4676 except_column = self._parse_column() 4677 return [except_column] if except_column else None 4678 4679 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4680 if not self._match(TokenType.REPLACE): 4681 return None 4682 if self._match(TokenType.L_PAREN, advance=False): 4683 return self._parse_wrapped_csv(self._parse_expression) 4684 4685 replace_expression = self._parse_expression() 4686 return [replace_expression] if replace_expression else None 4687 4688 def _parse_csv( 4689 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4690 ) -> t.List[exp.Expression]: 4691 parse_result = parse_method() 4692 items = [parse_result] if parse_result is not None else [] 4693 4694 while self._match(sep): 4695 self._add_comments(parse_result) 4696 parse_result = parse_method() 4697 if parse_result is not None: 4698 items.append(parse_result) 4699 4700 return items 4701 4702 def _parse_tokens( 4703 self, parse_method: t.Callable, expressions: t.Dict 4704 ) -> t.Optional[exp.Expression]: 4705 this = parse_method() 4706 4707 while self._match_set(expressions): 4708 this = self.expression( 4709 expressions[self._prev.token_type], 4710 this=this, 4711 comments=self._prev_comments, 4712 expression=parse_method(), 4713 ) 4714 4715 return this 4716 4717 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4718 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4719 4720 def _parse_wrapped_csv( 4721 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4722 ) -> t.List[exp.Expression]: 4723 return self._parse_wrapped( 4724 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4725 ) 4726 4727 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4728 wrapped = self._match(TokenType.L_PAREN) 4729 if not wrapped and not optional: 4730 self.raise_error("Expecting (") 4731 parse_result = parse_method() 4732 if wrapped: 4733 self._match_r_paren() 4734 return parse_result 4735 4736 def _parse_expressions(self) -> t.List[exp.Expression]: 4737 return self._parse_csv(self._parse_expression) 4738 4739 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4740 return self._parse_select() or self._parse_set_operations( 4741 self._parse_expression() if alias else self._parse_conjunction() 4742 ) 4743 4744 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4745 return self._parse_query_modifiers( 4746 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4747 ) 4748 4749 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4750 this = None 4751 if self._match_texts(self.TRANSACTION_KIND): 4752 this = self._prev.text 4753 4754 self._match_texts({"TRANSACTION", "WORK"}) 4755 4756 modes = [] 4757 while True: 4758 mode = [] 4759 while self._match(TokenType.VAR): 4760 mode.append(self._prev.text) 4761 4762 if mode: 4763 modes.append(" ".join(mode)) 4764 if not self._match(TokenType.COMMA): 4765 break 4766 4767 return self.expression(exp.Transaction, this=this, modes=modes) 4768 4769 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4770 chain = None 4771 savepoint = None 4772 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4773 4774 self._match_texts({"TRANSACTION", "WORK"}) 4775 4776 if self._match_text_seq("TO"): 4777 self._match_text_seq("SAVEPOINT") 4778 savepoint = self._parse_id_var() 4779 4780 if self._match(TokenType.AND): 4781 chain = not self._match_text_seq("NO") 4782 self._match_text_seq("CHAIN") 4783 4784 if is_rollback: 4785 return self.expression(exp.Rollback, savepoint=savepoint) 4786 4787 return self.expression(exp.Commit, chain=chain) 4788 4789 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4790 if not self._match_text_seq("ADD"): 4791 return None 4792 4793 self._match(TokenType.COLUMN) 4794 exists_column = self._parse_exists(not_=True) 4795 expression = self._parse_field_def() 4796 4797 if expression: 4798 expression.set("exists", exists_column) 4799 4800 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4801 if self._match_texts(("FIRST", "AFTER")): 4802 position = self._prev.text 4803 column_position = self.expression( 4804 exp.ColumnPosition, this=self._parse_column(), position=position 4805 ) 4806 expression.set("position", column_position) 4807 4808 return expression 4809 4810 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4811 drop = self._match(TokenType.DROP) and self._parse_drop() 4812 if drop and not isinstance(drop, exp.Command): 4813 drop.set("kind", drop.args.get("kind", "COLUMN")) 4814 return drop 4815 4816 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4817 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4818 return self.expression( 4819 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4820 ) 4821 4822 def _parse_add_constraint(self) -> exp.AddConstraint: 4823 this = None 4824 kind = self._prev.token_type 4825 4826 if kind == TokenType.CONSTRAINT: 4827 this = self._parse_id_var() 4828 4829 if self._match_text_seq("CHECK"): 4830 expression = self._parse_wrapped(self._parse_conjunction) 4831 enforced = self._match_text_seq("ENFORCED") 4832 4833 return self.expression( 4834 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4835 ) 4836 4837 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4838 expression = self._parse_foreign_key() 4839 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4840 expression = self._parse_primary_key() 4841 else: 4842 expression = None 4843 4844 return self.expression(exp.AddConstraint, this=this, expression=expression) 4845 4846 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4847 index = self._index - 1 4848 4849 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4850 return self._parse_csv(self._parse_add_constraint) 4851 4852 self._retreat(index) 4853 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4854 return self._parse_csv(self._parse_field_def) 4855 4856 return self._parse_csv(self._parse_add_column) 4857 4858 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4859 self._match(TokenType.COLUMN) 4860 column = self._parse_field(any_token=True) 4861 4862 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4863 return self.expression(exp.AlterColumn, this=column, drop=True) 4864 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4865 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4866 4867 self._match_text_seq("SET", "DATA") 4868 return self.expression( 4869 exp.AlterColumn, 4870 this=column, 4871 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4872 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4873 using=self._match(TokenType.USING) and self._parse_conjunction(), 4874 ) 4875 4876 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4877 index = self._index - 1 4878 4879 partition_exists = self._parse_exists() 4880 if self._match(TokenType.PARTITION, advance=False): 4881 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4882 4883 self._retreat(index) 4884 return self._parse_csv(self._parse_drop_column) 4885 4886 def _parse_alter_table_rename(self) -> exp.RenameTable: 4887 self._match_text_seq("TO") 4888 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4889 4890 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4891 start = self._prev 4892 4893 if not self._match(TokenType.TABLE): 4894 return self._parse_as_command(start) 4895 4896 exists = self._parse_exists() 4897 only = self._match_text_seq("ONLY") 4898 this = self._parse_table(schema=True) 4899 4900 if self._next: 4901 self._advance() 4902 4903 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4904 if parser: 4905 actions = ensure_list(parser(self)) 4906 4907 if not self._curr: 4908 return self.expression( 4909 exp.AlterTable, 4910 this=this, 4911 exists=exists, 4912 actions=actions, 4913 only=only, 4914 ) 4915 4916 return self._parse_as_command(start) 4917 4918 def _parse_merge(self) -> exp.Merge: 4919 self._match(TokenType.INTO) 4920 target = self._parse_table() 4921 4922 if target and self._match(TokenType.ALIAS, advance=False): 4923 target.set("alias", self._parse_table_alias()) 4924 4925 self._match(TokenType.USING) 4926 using = self._parse_table() 4927 4928 self._match(TokenType.ON) 4929 on = self._parse_conjunction() 4930 4931 whens = [] 4932 while self._match(TokenType.WHEN): 4933 matched = not self._match(TokenType.NOT) 4934 self._match_text_seq("MATCHED") 4935 source = ( 4936 False 4937 if self._match_text_seq("BY", "TARGET") 4938 else self._match_text_seq("BY", "SOURCE") 4939 ) 4940 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4941 4942 self._match(TokenType.THEN) 4943 4944 if self._match(TokenType.INSERT): 4945 _this = self._parse_star() 4946 if _this: 4947 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4948 else: 4949 then = self.expression( 4950 exp.Insert, 4951 this=self._parse_value(), 4952 expression=self._match(TokenType.VALUES) and self._parse_value(), 4953 ) 4954 elif self._match(TokenType.UPDATE): 4955 expressions = self._parse_star() 4956 if expressions: 4957 then = self.expression(exp.Update, expressions=expressions) 4958 else: 4959 then = self.expression( 4960 exp.Update, 4961 expressions=self._match(TokenType.SET) 4962 and self._parse_csv(self._parse_equality), 4963 ) 4964 elif self._match(TokenType.DELETE): 4965 then = self.expression(exp.Var, this=self._prev.text) 4966 else: 4967 then = None 4968 4969 whens.append( 4970 self.expression( 4971 exp.When, 4972 matched=matched, 4973 source=source, 4974 condition=condition, 4975 then=then, 4976 ) 4977 ) 4978 4979 return self.expression( 4980 exp.Merge, 4981 this=target, 4982 using=using, 4983 on=on, 4984 expressions=whens, 4985 ) 4986 4987 def _parse_show(self) -> t.Optional[exp.Expression]: 4988 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4989 if parser: 4990 return parser(self) 4991 return self._parse_as_command(self._prev) 4992 4993 def _parse_set_item_assignment( 4994 self, kind: t.Optional[str] = None 4995 ) -> t.Optional[exp.Expression]: 4996 index = self._index 4997 4998 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4999 return self._parse_set_transaction(global_=kind == "GLOBAL") 5000 5001 left = self._parse_primary() or self._parse_id_var() 5002 assignment_delimiter = self._match_texts(("=", "TO")) 5003 5004 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5005 self._retreat(index) 5006 return None 5007 5008 right = self._parse_statement() or self._parse_id_var() 5009 this = self.expression(exp.EQ, this=left, expression=right) 5010 5011 return self.expression(exp.SetItem, this=this, kind=kind) 5012 5013 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5014 self._match_text_seq("TRANSACTION") 5015 characteristics = self._parse_csv( 5016 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5017 ) 5018 return self.expression( 5019 exp.SetItem, 5020 expressions=characteristics, 5021 kind="TRANSACTION", 5022 **{"global": global_}, # type: ignore 5023 ) 5024 5025 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5026 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5027 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5028 5029 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5030 index = self._index 5031 set_ = self.expression( 5032 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5033 ) 5034 5035 if self._curr: 5036 self._retreat(index) 5037 return self._parse_as_command(self._prev) 5038 5039 return set_ 5040 5041 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5042 for option in options: 5043 if self._match_text_seq(*option.split(" ")): 5044 return exp.var(option) 5045 return None 5046 5047 def _parse_as_command(self, start: Token) -> exp.Command: 5048 while self._curr: 5049 self._advance() 5050 text = self._find_sql(start, self._prev) 5051 size = len(start.text) 5052 return exp.Command(this=text[:size], expression=text[size:]) 5053 5054 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5055 settings = [] 5056 5057 self._match_l_paren() 5058 kind = self._parse_id_var() 5059 5060 if self._match(TokenType.L_PAREN): 5061 while True: 5062 key = self._parse_id_var() 5063 value = self._parse_primary() 5064 5065 if not key and value is None: 5066 break 5067 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5068 self._match(TokenType.R_PAREN) 5069 5070 self._match_r_paren() 5071 5072 return self.expression( 5073 exp.DictProperty, 5074 this=this, 5075 kind=kind.this if kind else None, 5076 settings=settings, 5077 ) 5078 5079 def _parse_dict_range(self, this: str) -> exp.DictRange: 5080 self._match_l_paren() 5081 has_min = self._match_text_seq("MIN") 5082 if has_min: 5083 min = self._parse_var() or self._parse_primary() 5084 self._match_text_seq("MAX") 5085 max = self._parse_var() or self._parse_primary() 5086 else: 5087 max = self._parse_var() or self._parse_primary() 5088 min = exp.Literal.number(0) 5089 self._match_r_paren() 5090 return self.expression(exp.DictRange, this=this, min=min, max=max) 5091 5092 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5093 index = self._index 5094 expression = self._parse_column() 5095 if not self._match(TokenType.IN): 5096 self._retreat(index - 1) 5097 return None 5098 iterator = self._parse_column() 5099 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5100 return self.expression( 5101 exp.Comprehension, 5102 this=this, 5103 expression=expression, 5104 iterator=iterator, 5105 condition=condition, 5106 ) 5107 5108 def _find_parser( 5109 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5110 ) -> t.Optional[t.Callable]: 5111 if not self._curr: 5112 return None 5113 5114 index = self._index 5115 this = [] 5116 while True: 5117 # The current token might be multiple words 5118 curr = self._curr.text.upper() 5119 key = curr.split(" ") 5120 this.append(curr) 5121 5122 self._advance() 5123 result, trie = in_trie(trie, key) 5124 if result == TrieResult.FAILED: 5125 break 5126 5127 if result == TrieResult.EXISTS: 5128 subparser = parsers[" ".join(this)] 5129 return subparser 5130 5131 self._retreat(index) 5132 return None 5133 5134 def _match(self, token_type, advance=True, expression=None): 5135 if not self._curr: 5136 return None 5137 5138 if self._curr.token_type == token_type: 5139 if advance: 5140 self._advance() 5141 self._add_comments(expression) 5142 return True 5143 5144 return None 5145 5146 def _match_set(self, types, advance=True): 5147 if not self._curr: 5148 return None 5149 5150 if self._curr.token_type in types: 5151 if advance: 5152 self._advance() 5153 return True 5154 5155 return None 5156 5157 def _match_pair(self, token_type_a, token_type_b, advance=True): 5158 if not self._curr or not self._next: 5159 return None 5160 5161 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5162 if advance: 5163 self._advance(2) 5164 return True 5165 5166 return None 5167 5168 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5169 if not self._match(TokenType.L_PAREN, expression=expression): 5170 self.raise_error("Expecting (") 5171 5172 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5173 if not self._match(TokenType.R_PAREN, expression=expression): 5174 self.raise_error("Expecting )") 5175 5176 def _match_texts(self, texts, advance=True): 5177 if self._curr and self._curr.text.upper() in texts: 5178 if advance: 5179 self._advance() 5180 return True 5181 return False 5182 5183 def _match_text_seq(self, *texts, advance=True): 5184 index = self._index 5185 for text in texts: 5186 if self._curr and self._curr.text.upper() == text: 5187 self._advance() 5188 else: 5189 self._retreat(index) 5190 return False 5191 5192 if not advance: 5193 self._retreat(index) 5194 5195 return True 5196 5197 @t.overload 5198 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5199 ... 5200 5201 @t.overload 5202 def _replace_columns_with_dots( 5203 self, this: t.Optional[exp.Expression] 5204 ) -> t.Optional[exp.Expression]: 5205 ... 5206 5207 def _replace_columns_with_dots(self, this): 5208 if isinstance(this, exp.Dot): 5209 exp.replace_children(this, self._replace_columns_with_dots) 5210 elif isinstance(this, exp.Column): 5211 exp.replace_children(this, self._replace_columns_with_dots) 5212 table = this.args.get("table") 5213 this = ( 5214 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5215 ) 5216 5217 return this 5218 5219 def _replace_lambda( 5220 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5221 ) -> t.Optional[exp.Expression]: 5222 if not node: 5223 return node 5224 5225 for column in node.find_all(exp.Column): 5226 if column.parts[0].name in lambda_variables: 5227 dot_or_id = column.to_dot() if column.table else column.this 5228 parent = column.parent 5229 5230 while isinstance(parent, exp.Dot): 5231 if not isinstance(parent.parent, exp.Dot): 5232 parent.replace(dot_or_id) 5233 break 5234 parent = parent.parent 5235 else: 5236 if column is node: 5237 node = dot_or_id 5238 else: 5239 column.replace(dot_or_id) 5240 return node 5241 5242 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5243 return [ 5244 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5245 for value in values 5246 if value 5247 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
930 def __init__( 931 self, 932 error_level: t.Optional[ErrorLevel] = None, 933 error_message_context: int = 100, 934 max_errors: int = 3, 935 ): 936 self.error_level = error_level or ErrorLevel.IMMEDIATE 937 self.error_message_context = error_message_context 938 self.max_errors = max_errors 939 self._tokenizer = self.TOKENIZER_CLASS() 940 self.reset()
952 def parse( 953 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 954 ) -> t.List[t.Optional[exp.Expression]]: 955 """ 956 Parses a list of tokens and returns a list of syntax trees, one tree 957 per parsed SQL statement. 958 959 Args: 960 raw_tokens: The list of tokens. 961 sql: The original SQL string, used to produce helpful debug messages. 962 963 Returns: 964 The list of the produced syntax trees. 965 """ 966 return self._parse( 967 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 968 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
970 def parse_into( 971 self, 972 expression_types: exp.IntoType, 973 raw_tokens: t.List[Token], 974 sql: t.Optional[str] = None, 975 ) -> t.List[t.Optional[exp.Expression]]: 976 """ 977 Parses a list of tokens into a given Expression type. If a collection of Expression 978 types is given instead, this method will try to parse the token list into each one 979 of them, stopping at the first for which the parsing succeeds. 980 981 Args: 982 expression_types: The expression type(s) to try and parse the token list into. 983 raw_tokens: The list of tokens. 984 sql: The original SQL string, used to produce helpful debug messages. 985 986 Returns: 987 The target Expression. 988 """ 989 errors = [] 990 for expression_type in ensure_list(expression_types): 991 parser = self.EXPRESSION_PARSERS.get(expression_type) 992 if not parser: 993 raise TypeError(f"No parser registered for {expression_type}") 994 995 try: 996 return self._parse(parser, raw_tokens, sql) 997 except ParseError as e: 998 e.errors[0]["into_expression"] = expression_type 999 errors.append(e) 1000 1001 raise ParseError( 1002 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1003 errors=merge_errors(errors), 1004 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1041 def check_errors(self) -> None: 1042 """Logs or raises any found errors, depending on the chosen error level setting.""" 1043 if self.error_level == ErrorLevel.WARN: 1044 for error in self.errors: 1045 logger.error(str(error)) 1046 elif self.error_level == ErrorLevel.RAISE and self.errors: 1047 raise ParseError( 1048 concat_messages(self.errors, self.max_errors), 1049 errors=merge_errors(self.errors), 1050 )
Logs or raises any found errors, depending on the chosen error level setting.
1052 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1053 """ 1054 Appends an error in the list of recorded errors or raises it, depending on the chosen 1055 error level setting. 1056 """ 1057 token = token or self._curr or self._prev or Token.string("") 1058 start = token.start 1059 end = token.end + 1 1060 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1061 highlight = self.sql[start:end] 1062 end_context = self.sql[end : end + self.error_message_context] 1063 1064 error = ParseError.new( 1065 f"{message}. Line {token.line}, Col: {token.col}.\n" 1066 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1067 description=message, 1068 line=token.line, 1069 col=token.col, 1070 start_context=start_context, 1071 highlight=highlight, 1072 end_context=end_context, 1073 ) 1074 1075 if self.error_level == ErrorLevel.IMMEDIATE: 1076 raise error 1077 1078 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1080 def expression( 1081 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1082 ) -> E: 1083 """ 1084 Creates a new, validated Expression. 1085 1086 Args: 1087 exp_class: The expression class to instantiate. 1088 comments: An optional list of comments to attach to the expression. 1089 kwargs: The arguments to set for the expression along with their respective values. 1090 1091 Returns: 1092 The target expression. 1093 """ 1094 instance = exp_class(**kwargs) 1095 instance.add_comments(comments) if comments else self._add_comments(instance) 1096 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1103 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1104 """ 1105 Validates an Expression, making sure that all its mandatory arguments are set. 1106 1107 Args: 1108 expression: The expression to validate. 1109 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1110 1111 Returns: 1112 The validated expression. 1113 """ 1114 if self.error_level != ErrorLevel.IGNORE: 1115 for error_message in expression.error_messages(args): 1116 self.raise_error(error_message) 1117 1118 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.