sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.FIXEDSTRING, 141 TokenType.FLOAT, 142 TokenType.DOUBLE, 143 TokenType.CHAR, 144 TokenType.NCHAR, 145 TokenType.VARCHAR, 146 TokenType.NVARCHAR, 147 TokenType.TEXT, 148 TokenType.MEDIUMTEXT, 149 TokenType.LONGTEXT, 150 TokenType.MEDIUMBLOB, 151 TokenType.LONGBLOB, 152 TokenType.BINARY, 153 TokenType.VARBINARY, 154 TokenType.JSON, 155 TokenType.JSONB, 156 TokenType.INTERVAL, 157 TokenType.TIME, 158 TokenType.TIMETZ, 159 TokenType.TIMESTAMP, 160 TokenType.TIMESTAMPTZ, 161 TokenType.TIMESTAMPLTZ, 162 TokenType.DATETIME, 163 TokenType.DATETIME64, 164 TokenType.DATE, 165 TokenType.INT4RANGE, 166 TokenType.INT4MULTIRANGE, 167 TokenType.INT8RANGE, 168 TokenType.INT8MULTIRANGE, 169 TokenType.NUMRANGE, 170 TokenType.NUMMULTIRANGE, 171 TokenType.TSRANGE, 172 TokenType.TSMULTIRANGE, 173 TokenType.TSTZRANGE, 174 TokenType.TSTZMULTIRANGE, 175 TokenType.DATERANGE, 176 TokenType.DATEMULTIRANGE, 177 TokenType.DECIMAL, 178 TokenType.BIGDECIMAL, 179 TokenType.UUID, 180 TokenType.GEOGRAPHY, 181 TokenType.GEOMETRY, 182 TokenType.HLLSKETCH, 183 TokenType.HSTORE, 184 TokenType.PSEUDO_TYPE, 185 TokenType.SUPER, 186 TokenType.SERIAL, 187 TokenType.SMALLSERIAL, 188 TokenType.BIGSERIAL, 189 TokenType.XML, 190 TokenType.YEAR, 191 TokenType.UNIQUEIDENTIFIER, 192 TokenType.USERDEFINED, 193 TokenType.MONEY, 194 TokenType.SMALLMONEY, 195 TokenType.ROWVERSION, 196 TokenType.IMAGE, 197 TokenType.VARIANT, 198 TokenType.OBJECT, 199 TokenType.INET, 200 TokenType.IPADDRESS, 201 TokenType.IPPREFIX, 202 TokenType.UNKNOWN, 203 TokenType.NULL, 204 *ENUM_TYPE_TOKENS, 205 *NESTED_TYPE_TOKENS, 206 } 207 208 SUBQUERY_PREDICATES = { 209 TokenType.ANY: exp.Any, 210 TokenType.ALL: exp.All, 211 TokenType.EXISTS: exp.Exists, 212 TokenType.SOME: exp.Any, 213 } 214 215 RESERVED_KEYWORDS = { 216 *Tokenizer.SINGLE_TOKENS.values(), 217 TokenType.SELECT, 218 } 219 220 DB_CREATABLES = { 221 TokenType.DATABASE, 222 TokenType.SCHEMA, 223 TokenType.TABLE, 224 TokenType.VIEW, 225 TokenType.DICTIONARY, 226 } 227 228 CREATABLES = { 229 TokenType.COLUMN, 230 TokenType.FUNCTION, 231 TokenType.INDEX, 232 TokenType.PROCEDURE, 233 *DB_CREATABLES, 234 } 235 236 # Tokens that can represent identifiers 237 ID_VAR_TOKENS = { 238 TokenType.VAR, 239 TokenType.ANTI, 240 TokenType.APPLY, 241 TokenType.ASC, 242 TokenType.AUTO_INCREMENT, 243 TokenType.BEGIN, 244 TokenType.CACHE, 245 TokenType.CASE, 246 TokenType.COLLATE, 247 TokenType.COMMAND, 248 TokenType.COMMENT, 249 TokenType.COMMIT, 250 TokenType.CONSTRAINT, 251 TokenType.DEFAULT, 252 TokenType.DELETE, 253 TokenType.DESC, 254 TokenType.DESCRIBE, 255 TokenType.DICTIONARY, 256 TokenType.DIV, 257 TokenType.END, 258 TokenType.EXECUTE, 259 TokenType.ESCAPE, 260 TokenType.FALSE, 261 TokenType.FIRST, 262 TokenType.FILTER, 263 TokenType.FORMAT, 264 TokenType.FULL, 265 TokenType.IS, 266 TokenType.ISNULL, 267 TokenType.INTERVAL, 268 TokenType.KEEP, 269 TokenType.LEFT, 270 TokenType.LOAD, 271 TokenType.MERGE, 272 TokenType.NATURAL, 273 TokenType.NEXT, 274 TokenType.OFFSET, 275 TokenType.ORDINALITY, 276 TokenType.OVERWRITE, 277 TokenType.PARTITION, 278 TokenType.PERCENT, 279 TokenType.PIVOT, 280 TokenType.PRAGMA, 281 TokenType.RANGE, 282 TokenType.REFERENCES, 283 TokenType.RIGHT, 284 TokenType.ROW, 285 TokenType.ROWS, 286 TokenType.SEMI, 287 TokenType.SET, 288 TokenType.SETTINGS, 289 TokenType.SHOW, 290 TokenType.TEMPORARY, 291 TokenType.TOP, 292 TokenType.TRUE, 293 TokenType.UNIQUE, 294 TokenType.UNPIVOT, 295 TokenType.UPDATE, 296 TokenType.VOLATILE, 297 TokenType.WINDOW, 298 *CREATABLES, 299 *SUBQUERY_PREDICATES, 300 *TYPE_TOKENS, 301 *NO_PAREN_FUNCTIONS, 302 } 303 304 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 305 306 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 307 TokenType.APPLY, 308 TokenType.ASOF, 309 TokenType.FULL, 310 TokenType.LEFT, 311 TokenType.LOCK, 312 TokenType.NATURAL, 313 TokenType.OFFSET, 314 TokenType.RIGHT, 315 TokenType.WINDOW, 316 } 317 318 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 319 320 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 321 322 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 323 324 FUNC_TOKENS = { 325 TokenType.COMMAND, 326 TokenType.CURRENT_DATE, 327 TokenType.CURRENT_DATETIME, 328 TokenType.CURRENT_TIMESTAMP, 329 TokenType.CURRENT_TIME, 330 TokenType.CURRENT_USER, 331 TokenType.FILTER, 332 TokenType.FIRST, 333 TokenType.FORMAT, 334 TokenType.GLOB, 335 TokenType.IDENTIFIER, 336 TokenType.INDEX, 337 TokenType.ISNULL, 338 TokenType.ILIKE, 339 TokenType.INSERT, 340 TokenType.LIKE, 341 TokenType.MERGE, 342 TokenType.OFFSET, 343 TokenType.PRIMARY_KEY, 344 TokenType.RANGE, 345 TokenType.REPLACE, 346 TokenType.RLIKE, 347 TokenType.ROW, 348 TokenType.UNNEST, 349 TokenType.VAR, 350 TokenType.LEFT, 351 TokenType.RIGHT, 352 TokenType.DATE, 353 TokenType.DATETIME, 354 TokenType.TABLE, 355 TokenType.TIMESTAMP, 356 TokenType.TIMESTAMPTZ, 357 TokenType.WINDOW, 358 TokenType.XOR, 359 *TYPE_TOKENS, 360 *SUBQUERY_PREDICATES, 361 } 362 363 CONJUNCTION = { 364 TokenType.AND: exp.And, 365 TokenType.OR: exp.Or, 366 } 367 368 EQUALITY = { 369 TokenType.EQ: exp.EQ, 370 TokenType.NEQ: exp.NEQ, 371 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 372 } 373 374 COMPARISON = { 375 TokenType.GT: exp.GT, 376 TokenType.GTE: exp.GTE, 377 TokenType.LT: exp.LT, 378 TokenType.LTE: exp.LTE, 379 } 380 381 BITWISE = { 382 TokenType.AMP: exp.BitwiseAnd, 383 TokenType.CARET: exp.BitwiseXor, 384 TokenType.PIPE: exp.BitwiseOr, 385 TokenType.DPIPE: exp.DPipe, 386 } 387 388 TERM = { 389 TokenType.DASH: exp.Sub, 390 TokenType.PLUS: exp.Add, 391 TokenType.MOD: exp.Mod, 392 TokenType.COLLATE: exp.Collate, 393 } 394 395 FACTOR = { 396 TokenType.DIV: exp.IntDiv, 397 TokenType.LR_ARROW: exp.Distance, 398 TokenType.SLASH: exp.Div, 399 TokenType.STAR: exp.Mul, 400 } 401 402 TIMES = { 403 TokenType.TIME, 404 TokenType.TIMETZ, 405 } 406 407 TIMESTAMPS = { 408 TokenType.TIMESTAMP, 409 TokenType.TIMESTAMPTZ, 410 TokenType.TIMESTAMPLTZ, 411 *TIMES, 412 } 413 414 SET_OPERATIONS = { 415 TokenType.UNION, 416 TokenType.INTERSECT, 417 TokenType.EXCEPT, 418 } 419 420 JOIN_METHODS = { 421 TokenType.NATURAL, 422 TokenType.ASOF, 423 } 424 425 JOIN_SIDES = { 426 TokenType.LEFT, 427 TokenType.RIGHT, 428 TokenType.FULL, 429 } 430 431 JOIN_KINDS = { 432 TokenType.INNER, 433 TokenType.OUTER, 434 TokenType.CROSS, 435 TokenType.SEMI, 436 TokenType.ANTI, 437 } 438 439 JOIN_HINTS: t.Set[str] = set() 440 441 LAMBDAS = { 442 TokenType.ARROW: lambda self, expressions: self.expression( 443 exp.Lambda, 444 this=self._replace_lambda( 445 self._parse_conjunction(), 446 {node.name for node in expressions}, 447 ), 448 expressions=expressions, 449 ), 450 TokenType.FARROW: lambda self, expressions: self.expression( 451 exp.Kwarg, 452 this=exp.var(expressions[0].name), 453 expression=self._parse_conjunction(), 454 ), 455 } 456 457 COLUMN_OPERATORS = { 458 TokenType.DOT: None, 459 TokenType.DCOLON: lambda self, this, to: self.expression( 460 exp.Cast if self.STRICT_CAST else exp.TryCast, 461 this=this, 462 to=to, 463 ), 464 TokenType.ARROW: lambda self, this, path: self.expression( 465 exp.JSONExtract, 466 this=this, 467 expression=path, 468 ), 469 TokenType.DARROW: lambda self, this, path: self.expression( 470 exp.JSONExtractScalar, 471 this=this, 472 expression=path, 473 ), 474 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 475 exp.JSONBExtract, 476 this=this, 477 expression=path, 478 ), 479 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 480 exp.JSONBExtractScalar, 481 this=this, 482 expression=path, 483 ), 484 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 485 exp.JSONBContains, 486 this=this, 487 expression=key, 488 ), 489 } 490 491 EXPRESSION_PARSERS = { 492 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 493 exp.Column: lambda self: self._parse_column(), 494 exp.Condition: lambda self: self._parse_conjunction(), 495 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 496 exp.Expression: lambda self: self._parse_statement(), 497 exp.From: lambda self: self._parse_from(), 498 exp.Group: lambda self: self._parse_group(), 499 exp.Having: lambda self: self._parse_having(), 500 exp.Identifier: lambda self: self._parse_id_var(), 501 exp.Join: lambda self: self._parse_join(), 502 exp.Lambda: lambda self: self._parse_lambda(), 503 exp.Lateral: lambda self: self._parse_lateral(), 504 exp.Limit: lambda self: self._parse_limit(), 505 exp.Offset: lambda self: self._parse_offset(), 506 exp.Order: lambda self: self._parse_order(), 507 exp.Ordered: lambda self: self._parse_ordered(), 508 exp.Properties: lambda self: self._parse_properties(), 509 exp.Qualify: lambda self: self._parse_qualify(), 510 exp.Returning: lambda self: self._parse_returning(), 511 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 512 exp.Table: lambda self: self._parse_table_parts(), 513 exp.TableAlias: lambda self: self._parse_table_alias(), 514 exp.Where: lambda self: self._parse_where(), 515 exp.Window: lambda self: self._parse_named_window(), 516 exp.With: lambda self: self._parse_with(), 517 "JOIN_TYPE": lambda self: self._parse_join_parts(), 518 } 519 520 STATEMENT_PARSERS = { 521 TokenType.ALTER: lambda self: self._parse_alter(), 522 TokenType.BEGIN: lambda self: self._parse_transaction(), 523 TokenType.CACHE: lambda self: self._parse_cache(), 524 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 525 TokenType.COMMENT: lambda self: self._parse_comment(), 526 TokenType.CREATE: lambda self: self._parse_create(), 527 TokenType.DELETE: lambda self: self._parse_delete(), 528 TokenType.DESC: lambda self: self._parse_describe(), 529 TokenType.DESCRIBE: lambda self: self._parse_describe(), 530 TokenType.DROP: lambda self: self._parse_drop(), 531 TokenType.FROM: lambda self: exp.select("*").from_( 532 t.cast(exp.From, self._parse_from(skip_from_token=True)) 533 ), 534 TokenType.INSERT: lambda self: self._parse_insert(), 535 TokenType.LOAD: lambda self: self._parse_load(), 536 TokenType.MERGE: lambda self: self._parse_merge(), 537 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 538 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 539 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 540 TokenType.SET: lambda self: self._parse_set(), 541 TokenType.UNCACHE: lambda self: self._parse_uncache(), 542 TokenType.UPDATE: lambda self: self._parse_update(), 543 TokenType.USE: lambda self: self.expression( 544 exp.Use, 545 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 546 and exp.var(self._prev.text), 547 this=self._parse_table(schema=False), 548 ), 549 } 550 551 UNARY_PARSERS = { 552 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 553 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 554 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 555 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 556 } 557 558 PRIMARY_PARSERS = { 559 TokenType.STRING: lambda self, token: self.expression( 560 exp.Literal, this=token.text, is_string=True 561 ), 562 TokenType.NUMBER: lambda self, token: self.expression( 563 exp.Literal, this=token.text, is_string=False 564 ), 565 TokenType.STAR: lambda self, _: self.expression( 566 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 567 ), 568 TokenType.NULL: lambda self, _: self.expression(exp.Null), 569 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 570 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 571 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 572 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 573 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 574 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 575 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 576 exp.National, this=token.text 577 ), 578 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 579 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 580 } 581 582 PLACEHOLDER_PARSERS = { 583 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 584 TokenType.PARAMETER: lambda self: self._parse_parameter(), 585 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 586 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 587 else None, 588 } 589 590 RANGE_PARSERS = { 591 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 592 TokenType.GLOB: binary_range_parser(exp.Glob), 593 TokenType.ILIKE: binary_range_parser(exp.ILike), 594 TokenType.IN: lambda self, this: self._parse_in(this), 595 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 596 TokenType.IS: lambda self, this: self._parse_is(this), 597 TokenType.LIKE: binary_range_parser(exp.Like), 598 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 599 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 600 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 601 } 602 603 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 604 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 605 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 606 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 607 "CHARACTER SET": lambda self: self._parse_character_set(), 608 "CHECKSUM": lambda self: self._parse_checksum(), 609 "CLUSTER BY": lambda self: self._parse_cluster(), 610 "CLUSTERED": lambda self: self._parse_clustered_by(), 611 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 612 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 613 "COPY": lambda self: self._parse_copy_property(), 614 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 615 "DEFINER": lambda self: self._parse_definer(), 616 "DETERMINISTIC": lambda self: self.expression( 617 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 618 ), 619 "DISTKEY": lambda self: self._parse_distkey(), 620 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 621 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 622 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 623 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 624 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 625 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 626 "FREESPACE": lambda self: self._parse_freespace(), 627 "HEAP": lambda self: self.expression(exp.HeapProperty), 628 "IMMUTABLE": lambda self: self.expression( 629 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 630 ), 631 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 632 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 633 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 634 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 635 "LIKE": lambda self: self._parse_create_like(), 636 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 637 "LOCK": lambda self: self._parse_locking(), 638 "LOCKING": lambda self: self._parse_locking(), 639 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 640 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 641 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 642 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 643 "NO": lambda self: self._parse_no_property(), 644 "ON": lambda self: self._parse_on_property(), 645 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 646 "PARTITION BY": lambda self: self._parse_partitioned_by(), 647 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 648 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 649 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 650 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 651 "RETURNS": lambda self: self._parse_returns(), 652 "ROW": lambda self: self._parse_row(), 653 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 654 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 655 "SETTINGS": lambda self: self.expression( 656 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 657 ), 658 "SORTKEY": lambda self: self._parse_sortkey(), 659 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 660 "STABLE": lambda self: self.expression( 661 exp.StabilityProperty, this=exp.Literal.string("STABLE") 662 ), 663 "STORED": lambda self: self._parse_stored(), 664 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 665 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 666 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 667 "TO": lambda self: self._parse_to_table(), 668 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 669 "TTL": lambda self: self._parse_ttl(), 670 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 671 "VOLATILE": lambda self: self._parse_volatile_property(), 672 "WITH": lambda self: self._parse_with_property(), 673 } 674 675 CONSTRAINT_PARSERS = { 676 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 677 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 678 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 679 "CHARACTER SET": lambda self: self.expression( 680 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 681 ), 682 "CHECK": lambda self: self.expression( 683 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 684 ), 685 "COLLATE": lambda self: self.expression( 686 exp.CollateColumnConstraint, this=self._parse_var() 687 ), 688 "COMMENT": lambda self: self.expression( 689 exp.CommentColumnConstraint, this=self._parse_string() 690 ), 691 "COMPRESS": lambda self: self._parse_compress(), 692 "CLUSTERED": lambda self: self.expression( 693 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 694 ), 695 "DEFAULT": lambda self: self.expression( 696 exp.DefaultColumnConstraint, this=self._parse_bitwise() 697 ), 698 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 699 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 700 "FORMAT": lambda self: self.expression( 701 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 702 ), 703 "GENERATED": lambda self: self._parse_generated_as_identity(), 704 "IDENTITY": lambda self: self._parse_auto_increment(), 705 "INLINE": lambda self: self._parse_inline(), 706 "LIKE": lambda self: self._parse_create_like(), 707 "NOT": lambda self: self._parse_not_constraint(), 708 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 709 "ON": lambda self: ( 710 self._match(TokenType.UPDATE) 711 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 712 ) 713 or self.expression(exp.OnProperty, this=self._parse_id_var()), 714 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 715 "PRIMARY KEY": lambda self: self._parse_primary_key(), 716 "REFERENCES": lambda self: self._parse_references(match=False), 717 "TITLE": lambda self: self.expression( 718 exp.TitleColumnConstraint, this=self._parse_var_or_string() 719 ), 720 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 721 "UNIQUE": lambda self: self._parse_unique(), 722 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 723 "WITH": lambda self: self.expression( 724 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 725 ), 726 } 727 728 ALTER_PARSERS = { 729 "ADD": lambda self: self._parse_alter_table_add(), 730 "ALTER": lambda self: self._parse_alter_table_alter(), 731 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 732 "DROP": lambda self: self._parse_alter_table_drop(), 733 "RENAME": lambda self: self._parse_alter_table_rename(), 734 } 735 736 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 737 738 NO_PAREN_FUNCTION_PARSERS = { 739 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 740 "CASE": lambda self: self._parse_case(), 741 "IF": lambda self: self._parse_if(), 742 "NEXT": lambda self: self._parse_next_value_for(), 743 } 744 745 INVALID_FUNC_NAME_TOKENS = { 746 TokenType.IDENTIFIER, 747 TokenType.STRING, 748 } 749 750 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 751 752 FUNCTION_PARSERS = { 753 "ANY_VALUE": lambda self: self._parse_any_value(), 754 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 755 "CONCAT": lambda self: self._parse_concat(), 756 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 757 "DECODE": lambda self: self._parse_decode(), 758 "EXTRACT": lambda self: self._parse_extract(), 759 "JSON_OBJECT": lambda self: self._parse_json_object(), 760 "LOG": lambda self: self._parse_logarithm(), 761 "MATCH": lambda self: self._parse_match_against(), 762 "OPENJSON": lambda self: self._parse_open_json(), 763 "POSITION": lambda self: self._parse_position(), 764 "SAFE_CAST": lambda self: self._parse_cast(False), 765 "STRING_AGG": lambda self: self._parse_string_agg(), 766 "SUBSTRING": lambda self: self._parse_substring(), 767 "TRIM": lambda self: self._parse_trim(), 768 "TRY_CAST": lambda self: self._parse_cast(False), 769 "TRY_CONVERT": lambda self: self._parse_convert(False), 770 } 771 772 QUERY_MODIFIER_PARSERS = { 773 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 774 TokenType.WHERE: lambda self: ("where", self._parse_where()), 775 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 776 TokenType.HAVING: lambda self: ("having", self._parse_having()), 777 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 778 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 779 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 780 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 781 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 782 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 783 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 784 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 785 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 786 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 787 TokenType.CLUSTER_BY: lambda self: ( 788 "cluster", 789 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 790 ), 791 TokenType.DISTRIBUTE_BY: lambda self: ( 792 "distribute", 793 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 794 ), 795 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 796 } 797 798 SET_PARSERS = { 799 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 800 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 801 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 802 "TRANSACTION": lambda self: self._parse_set_transaction(), 803 } 804 805 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 806 807 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 808 809 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 810 811 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 812 813 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 814 815 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 816 TRANSACTION_CHARACTERISTICS = { 817 "ISOLATION LEVEL REPEATABLE READ", 818 "ISOLATION LEVEL READ COMMITTED", 819 "ISOLATION LEVEL READ UNCOMMITTED", 820 "ISOLATION LEVEL SERIALIZABLE", 821 "READ WRITE", 822 "READ ONLY", 823 } 824 825 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 826 827 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 828 829 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 830 831 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 832 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 833 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 834 835 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 836 837 DISTINCT_TOKENS = {TokenType.DISTINCT} 838 839 STRICT_CAST = True 840 841 # A NULL arg in CONCAT yields NULL by default 842 CONCAT_NULL_OUTPUTS_STRING = False 843 844 PREFIXED_PIVOT_COLUMNS = False 845 IDENTIFY_PIVOT_STRINGS = False 846 847 LOG_BASE_FIRST = True 848 LOG_DEFAULTS_TO_LN = False 849 850 SUPPORTS_USER_DEFINED_TYPES = True 851 852 __slots__ = ( 853 "error_level", 854 "error_message_context", 855 "max_errors", 856 "sql", 857 "errors", 858 "_tokens", 859 "_index", 860 "_curr", 861 "_next", 862 "_prev", 863 "_prev_comments", 864 "_tokenizer", 865 ) 866 867 # Autofilled 868 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 869 INDEX_OFFSET: int = 0 870 UNNEST_COLUMN_ONLY: bool = False 871 ALIAS_POST_TABLESAMPLE: bool = False 872 STRICT_STRING_CONCAT = False 873 NORMALIZE_FUNCTIONS = "upper" 874 NULL_ORDERING: str = "nulls_are_small" 875 SHOW_TRIE: t.Dict = {} 876 SET_TRIE: t.Dict = {} 877 FORMAT_MAPPING: t.Dict[str, str] = {} 878 FORMAT_TRIE: t.Dict = {} 879 TIME_MAPPING: t.Dict[str, str] = {} 880 TIME_TRIE: t.Dict = {} 881 882 def __init__( 883 self, 884 error_level: t.Optional[ErrorLevel] = None, 885 error_message_context: int = 100, 886 max_errors: int = 3, 887 ): 888 self.error_level = error_level or ErrorLevel.IMMEDIATE 889 self.error_message_context = error_message_context 890 self.max_errors = max_errors 891 self._tokenizer = self.TOKENIZER_CLASS() 892 self.reset() 893 894 def reset(self): 895 self.sql = "" 896 self.errors = [] 897 self._tokens = [] 898 self._index = 0 899 self._curr = None 900 self._next = None 901 self._prev = None 902 self._prev_comments = None 903 904 def parse( 905 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 906 ) -> t.List[t.Optional[exp.Expression]]: 907 """ 908 Parses a list of tokens and returns a list of syntax trees, one tree 909 per parsed SQL statement. 910 911 Args: 912 raw_tokens: The list of tokens. 913 sql: The original SQL string, used to produce helpful debug messages. 914 915 Returns: 916 The list of the produced syntax trees. 917 """ 918 return self._parse( 919 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 920 ) 921 922 def parse_into( 923 self, 924 expression_types: exp.IntoType, 925 raw_tokens: t.List[Token], 926 sql: t.Optional[str] = None, 927 ) -> t.List[t.Optional[exp.Expression]]: 928 """ 929 Parses a list of tokens into a given Expression type. If a collection of Expression 930 types is given instead, this method will try to parse the token list into each one 931 of them, stopping at the first for which the parsing succeeds. 932 933 Args: 934 expression_types: The expression type(s) to try and parse the token list into. 935 raw_tokens: The list of tokens. 936 sql: The original SQL string, used to produce helpful debug messages. 937 938 Returns: 939 The target Expression. 940 """ 941 errors = [] 942 for expression_type in ensure_list(expression_types): 943 parser = self.EXPRESSION_PARSERS.get(expression_type) 944 if not parser: 945 raise TypeError(f"No parser registered for {expression_type}") 946 947 try: 948 return self._parse(parser, raw_tokens, sql) 949 except ParseError as e: 950 e.errors[0]["into_expression"] = expression_type 951 errors.append(e) 952 953 raise ParseError( 954 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 955 errors=merge_errors(errors), 956 ) from errors[-1] 957 958 def _parse( 959 self, 960 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 961 raw_tokens: t.List[Token], 962 sql: t.Optional[str] = None, 963 ) -> t.List[t.Optional[exp.Expression]]: 964 self.reset() 965 self.sql = sql or "" 966 967 total = len(raw_tokens) 968 chunks: t.List[t.List[Token]] = [[]] 969 970 for i, token in enumerate(raw_tokens): 971 if token.token_type == TokenType.SEMICOLON: 972 if i < total - 1: 973 chunks.append([]) 974 else: 975 chunks[-1].append(token) 976 977 expressions = [] 978 979 for tokens in chunks: 980 self._index = -1 981 self._tokens = tokens 982 self._advance() 983 984 expressions.append(parse_method(self)) 985 986 if self._index < len(self._tokens): 987 self.raise_error("Invalid expression / Unexpected token") 988 989 self.check_errors() 990 991 return expressions 992 993 def check_errors(self) -> None: 994 """Logs or raises any found errors, depending on the chosen error level setting.""" 995 if self.error_level == ErrorLevel.WARN: 996 for error in self.errors: 997 logger.error(str(error)) 998 elif self.error_level == ErrorLevel.RAISE and self.errors: 999 raise ParseError( 1000 concat_messages(self.errors, self.max_errors), 1001 errors=merge_errors(self.errors), 1002 ) 1003 1004 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1005 """ 1006 Appends an error in the list of recorded errors or raises it, depending on the chosen 1007 error level setting. 1008 """ 1009 token = token or self._curr or self._prev or Token.string("") 1010 start = token.start 1011 end = token.end + 1 1012 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1013 highlight = self.sql[start:end] 1014 end_context = self.sql[end : end + self.error_message_context] 1015 1016 error = ParseError.new( 1017 f"{message}. Line {token.line}, Col: {token.col}.\n" 1018 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1019 description=message, 1020 line=token.line, 1021 col=token.col, 1022 start_context=start_context, 1023 highlight=highlight, 1024 end_context=end_context, 1025 ) 1026 1027 if self.error_level == ErrorLevel.IMMEDIATE: 1028 raise error 1029 1030 self.errors.append(error) 1031 1032 def expression( 1033 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1034 ) -> E: 1035 """ 1036 Creates a new, validated Expression. 1037 1038 Args: 1039 exp_class: The expression class to instantiate. 1040 comments: An optional list of comments to attach to the expression. 1041 kwargs: The arguments to set for the expression along with their respective values. 1042 1043 Returns: 1044 The target expression. 1045 """ 1046 instance = exp_class(**kwargs) 1047 instance.add_comments(comments) if comments else self._add_comments(instance) 1048 return self.validate_expression(instance) 1049 1050 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1051 if expression and self._prev_comments: 1052 expression.add_comments(self._prev_comments) 1053 self._prev_comments = None 1054 1055 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1056 """ 1057 Validates an Expression, making sure that all its mandatory arguments are set. 1058 1059 Args: 1060 expression: The expression to validate. 1061 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1062 1063 Returns: 1064 The validated expression. 1065 """ 1066 if self.error_level != ErrorLevel.IGNORE: 1067 for error_message in expression.error_messages(args): 1068 self.raise_error(error_message) 1069 1070 return expression 1071 1072 def _find_sql(self, start: Token, end: Token) -> str: 1073 return self.sql[start.start : end.end + 1] 1074 1075 def _advance(self, times: int = 1) -> None: 1076 self._index += times 1077 self._curr = seq_get(self._tokens, self._index) 1078 self._next = seq_get(self._tokens, self._index + 1) 1079 1080 if self._index > 0: 1081 self._prev = self._tokens[self._index - 1] 1082 self._prev_comments = self._prev.comments 1083 else: 1084 self._prev = None 1085 self._prev_comments = None 1086 1087 def _retreat(self, index: int) -> None: 1088 if index != self._index: 1089 self._advance(index - self._index) 1090 1091 def _parse_command(self) -> exp.Command: 1092 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1093 1094 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1095 start = self._prev 1096 exists = self._parse_exists() if allow_exists else None 1097 1098 self._match(TokenType.ON) 1099 1100 kind = self._match_set(self.CREATABLES) and self._prev 1101 if not kind: 1102 return self._parse_as_command(start) 1103 1104 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1105 this = self._parse_user_defined_function(kind=kind.token_type) 1106 elif kind.token_type == TokenType.TABLE: 1107 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1108 elif kind.token_type == TokenType.COLUMN: 1109 this = self._parse_column() 1110 else: 1111 this = self._parse_id_var() 1112 1113 self._match(TokenType.IS) 1114 1115 return self.expression( 1116 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1117 ) 1118 1119 def _parse_to_table( 1120 self, 1121 ) -> exp.ToTableProperty: 1122 table = self._parse_table_parts(schema=True) 1123 return self.expression(exp.ToTableProperty, this=table) 1124 1125 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1126 def _parse_ttl(self) -> exp.Expression: 1127 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1128 this = self._parse_bitwise() 1129 1130 if self._match_text_seq("DELETE"): 1131 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1132 if self._match_text_seq("RECOMPRESS"): 1133 return self.expression( 1134 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1135 ) 1136 if self._match_text_seq("TO", "DISK"): 1137 return self.expression( 1138 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1139 ) 1140 if self._match_text_seq("TO", "VOLUME"): 1141 return self.expression( 1142 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1143 ) 1144 1145 return this 1146 1147 expressions = self._parse_csv(_parse_ttl_action) 1148 where = self._parse_where() 1149 group = self._parse_group() 1150 1151 aggregates = None 1152 if group and self._match(TokenType.SET): 1153 aggregates = self._parse_csv(self._parse_set_item) 1154 1155 return self.expression( 1156 exp.MergeTreeTTL, 1157 expressions=expressions, 1158 where=where, 1159 group=group, 1160 aggregates=aggregates, 1161 ) 1162 1163 def _parse_statement(self) -> t.Optional[exp.Expression]: 1164 if self._curr is None: 1165 return None 1166 1167 if self._match_set(self.STATEMENT_PARSERS): 1168 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1169 1170 if self._match_set(Tokenizer.COMMANDS): 1171 return self._parse_command() 1172 1173 expression = self._parse_expression() 1174 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1175 return self._parse_query_modifiers(expression) 1176 1177 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1178 start = self._prev 1179 temporary = self._match(TokenType.TEMPORARY) 1180 materialized = self._match_text_seq("MATERIALIZED") 1181 1182 kind = self._match_set(self.CREATABLES) and self._prev.text 1183 if not kind: 1184 return self._parse_as_command(start) 1185 1186 return self.expression( 1187 exp.Drop, 1188 comments=start.comments, 1189 exists=exists or self._parse_exists(), 1190 this=self._parse_table(schema=True), 1191 kind=kind, 1192 temporary=temporary, 1193 materialized=materialized, 1194 cascade=self._match_text_seq("CASCADE"), 1195 constraints=self._match_text_seq("CONSTRAINTS"), 1196 purge=self._match_text_seq("PURGE"), 1197 ) 1198 1199 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1200 return ( 1201 self._match_text_seq("IF") 1202 and (not not_ or self._match(TokenType.NOT)) 1203 and self._match(TokenType.EXISTS) 1204 ) 1205 1206 def _parse_create(self) -> exp.Create | exp.Command: 1207 # Note: this can't be None because we've matched a statement parser 1208 start = self._prev 1209 comments = self._prev_comments 1210 1211 replace = start.text.upper() == "REPLACE" or self._match_pair( 1212 TokenType.OR, TokenType.REPLACE 1213 ) 1214 unique = self._match(TokenType.UNIQUE) 1215 1216 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1217 self._advance() 1218 1219 properties = None 1220 create_token = self._match_set(self.CREATABLES) and self._prev 1221 1222 if not create_token: 1223 # exp.Properties.Location.POST_CREATE 1224 properties = self._parse_properties() 1225 create_token = self._match_set(self.CREATABLES) and self._prev 1226 1227 if not properties or not create_token: 1228 return self._parse_as_command(start) 1229 1230 exists = self._parse_exists(not_=True) 1231 this = None 1232 expression: t.Optional[exp.Expression] = None 1233 indexes = None 1234 no_schema_binding = None 1235 begin = None 1236 clone = None 1237 1238 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1239 nonlocal properties 1240 if properties and temp_props: 1241 properties.expressions.extend(temp_props.expressions) 1242 elif temp_props: 1243 properties = temp_props 1244 1245 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1246 this = self._parse_user_defined_function(kind=create_token.token_type) 1247 1248 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1249 extend_props(self._parse_properties()) 1250 1251 self._match(TokenType.ALIAS) 1252 1253 if self._match(TokenType.COMMAND): 1254 expression = self._parse_as_command(self._prev) 1255 else: 1256 begin = self._match(TokenType.BEGIN) 1257 return_ = self._match_text_seq("RETURN") 1258 expression = self._parse_statement() 1259 1260 if return_: 1261 expression = self.expression(exp.Return, this=expression) 1262 elif create_token.token_type == TokenType.INDEX: 1263 this = self._parse_index(index=self._parse_id_var()) 1264 elif create_token.token_type in self.DB_CREATABLES: 1265 table_parts = self._parse_table_parts(schema=True) 1266 1267 # exp.Properties.Location.POST_NAME 1268 self._match(TokenType.COMMA) 1269 extend_props(self._parse_properties(before=True)) 1270 1271 this = self._parse_schema(this=table_parts) 1272 1273 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1274 extend_props(self._parse_properties()) 1275 1276 self._match(TokenType.ALIAS) 1277 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1278 # exp.Properties.Location.POST_ALIAS 1279 extend_props(self._parse_properties()) 1280 1281 expression = self._parse_ddl_select() 1282 1283 if create_token.token_type == TokenType.TABLE: 1284 # exp.Properties.Location.POST_EXPRESSION 1285 extend_props(self._parse_properties()) 1286 1287 indexes = [] 1288 while True: 1289 index = self._parse_index() 1290 1291 # exp.Properties.Location.POST_INDEX 1292 extend_props(self._parse_properties()) 1293 1294 if not index: 1295 break 1296 else: 1297 self._match(TokenType.COMMA) 1298 indexes.append(index) 1299 elif create_token.token_type == TokenType.VIEW: 1300 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1301 no_schema_binding = True 1302 1303 if self._match_text_seq("CLONE"): 1304 clone = self._parse_table(schema=True) 1305 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1306 clone_kind = ( 1307 self._match(TokenType.L_PAREN) 1308 and self._match_texts(self.CLONE_KINDS) 1309 and self._prev.text.upper() 1310 ) 1311 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1312 self._match(TokenType.R_PAREN) 1313 clone = self.expression( 1314 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1315 ) 1316 1317 return self.expression( 1318 exp.Create, 1319 comments=comments, 1320 this=this, 1321 kind=create_token.text, 1322 replace=replace, 1323 unique=unique, 1324 expression=expression, 1325 exists=exists, 1326 properties=properties, 1327 indexes=indexes, 1328 no_schema_binding=no_schema_binding, 1329 begin=begin, 1330 clone=clone, 1331 ) 1332 1333 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1334 # only used for teradata currently 1335 self._match(TokenType.COMMA) 1336 1337 kwargs = { 1338 "no": self._match_text_seq("NO"), 1339 "dual": self._match_text_seq("DUAL"), 1340 "before": self._match_text_seq("BEFORE"), 1341 "default": self._match_text_seq("DEFAULT"), 1342 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1343 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1344 "after": self._match_text_seq("AFTER"), 1345 "minimum": self._match_texts(("MIN", "MINIMUM")), 1346 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1347 } 1348 1349 if self._match_texts(self.PROPERTY_PARSERS): 1350 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1351 try: 1352 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1353 except TypeError: 1354 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1355 1356 return None 1357 1358 def _parse_property(self) -> t.Optional[exp.Expression]: 1359 if self._match_texts(self.PROPERTY_PARSERS): 1360 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1361 1362 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1363 return self._parse_character_set(default=True) 1364 1365 if self._match_text_seq("COMPOUND", "SORTKEY"): 1366 return self._parse_sortkey(compound=True) 1367 1368 if self._match_text_seq("SQL", "SECURITY"): 1369 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1370 1371 assignment = self._match_pair( 1372 TokenType.VAR, TokenType.EQ, advance=False 1373 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1374 1375 if assignment: 1376 key = self._parse_var_or_string() 1377 self._match(TokenType.EQ) 1378 return self.expression( 1379 exp.Property, 1380 this=key, 1381 value=self._parse_column() or self._parse_var(any_token=True), 1382 ) 1383 1384 return None 1385 1386 def _parse_stored(self) -> exp.FileFormatProperty: 1387 self._match(TokenType.ALIAS) 1388 1389 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1390 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1391 1392 return self.expression( 1393 exp.FileFormatProperty, 1394 this=self.expression( 1395 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1396 ) 1397 if input_format or output_format 1398 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1399 ) 1400 1401 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1402 self._match(TokenType.EQ) 1403 self._match(TokenType.ALIAS) 1404 return self.expression(exp_class, this=self._parse_field()) 1405 1406 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1407 properties = [] 1408 while True: 1409 if before: 1410 prop = self._parse_property_before() 1411 else: 1412 prop = self._parse_property() 1413 1414 if not prop: 1415 break 1416 for p in ensure_list(prop): 1417 properties.append(p) 1418 1419 if properties: 1420 return self.expression(exp.Properties, expressions=properties) 1421 1422 return None 1423 1424 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1425 return self.expression( 1426 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1427 ) 1428 1429 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1430 if self._index >= 2: 1431 pre_volatile_token = self._tokens[self._index - 2] 1432 else: 1433 pre_volatile_token = None 1434 1435 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1436 return exp.VolatileProperty() 1437 1438 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1439 1440 def _parse_with_property( 1441 self, 1442 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1443 if self._match(TokenType.L_PAREN, advance=False): 1444 return self._parse_wrapped_csv(self._parse_property) 1445 1446 if self._match_text_seq("JOURNAL"): 1447 return self._parse_withjournaltable() 1448 1449 if self._match_text_seq("DATA"): 1450 return self._parse_withdata(no=False) 1451 elif self._match_text_seq("NO", "DATA"): 1452 return self._parse_withdata(no=True) 1453 1454 if not self._next: 1455 return None 1456 1457 return self._parse_withisolatedloading() 1458 1459 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1460 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1461 self._match(TokenType.EQ) 1462 1463 user = self._parse_id_var() 1464 self._match(TokenType.PARAMETER) 1465 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1466 1467 if not user or not host: 1468 return None 1469 1470 return exp.DefinerProperty(this=f"{user}@{host}") 1471 1472 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1473 self._match(TokenType.TABLE) 1474 self._match(TokenType.EQ) 1475 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1476 1477 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1478 return self.expression(exp.LogProperty, no=no) 1479 1480 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1481 return self.expression(exp.JournalProperty, **kwargs) 1482 1483 def _parse_checksum(self) -> exp.ChecksumProperty: 1484 self._match(TokenType.EQ) 1485 1486 on = None 1487 if self._match(TokenType.ON): 1488 on = True 1489 elif self._match_text_seq("OFF"): 1490 on = False 1491 1492 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1493 1494 def _parse_cluster(self) -> exp.Cluster: 1495 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1496 1497 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1498 self._match_text_seq("BY") 1499 1500 self._match_l_paren() 1501 expressions = self._parse_csv(self._parse_column) 1502 self._match_r_paren() 1503 1504 if self._match_text_seq("SORTED", "BY"): 1505 self._match_l_paren() 1506 sorted_by = self._parse_csv(self._parse_ordered) 1507 self._match_r_paren() 1508 else: 1509 sorted_by = None 1510 1511 self._match(TokenType.INTO) 1512 buckets = self._parse_number() 1513 self._match_text_seq("BUCKETS") 1514 1515 return self.expression( 1516 exp.ClusteredByProperty, 1517 expressions=expressions, 1518 sorted_by=sorted_by, 1519 buckets=buckets, 1520 ) 1521 1522 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1523 if not self._match_text_seq("GRANTS"): 1524 self._retreat(self._index - 1) 1525 return None 1526 1527 return self.expression(exp.CopyGrantsProperty) 1528 1529 def _parse_freespace(self) -> exp.FreespaceProperty: 1530 self._match(TokenType.EQ) 1531 return self.expression( 1532 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1533 ) 1534 1535 def _parse_mergeblockratio( 1536 self, no: bool = False, default: bool = False 1537 ) -> exp.MergeBlockRatioProperty: 1538 if self._match(TokenType.EQ): 1539 return self.expression( 1540 exp.MergeBlockRatioProperty, 1541 this=self._parse_number(), 1542 percent=self._match(TokenType.PERCENT), 1543 ) 1544 1545 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1546 1547 def _parse_datablocksize( 1548 self, 1549 default: t.Optional[bool] = None, 1550 minimum: t.Optional[bool] = None, 1551 maximum: t.Optional[bool] = None, 1552 ) -> exp.DataBlocksizeProperty: 1553 self._match(TokenType.EQ) 1554 size = self._parse_number() 1555 1556 units = None 1557 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1558 units = self._prev.text 1559 1560 return self.expression( 1561 exp.DataBlocksizeProperty, 1562 size=size, 1563 units=units, 1564 default=default, 1565 minimum=minimum, 1566 maximum=maximum, 1567 ) 1568 1569 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1570 self._match(TokenType.EQ) 1571 always = self._match_text_seq("ALWAYS") 1572 manual = self._match_text_seq("MANUAL") 1573 never = self._match_text_seq("NEVER") 1574 default = self._match_text_seq("DEFAULT") 1575 1576 autotemp = None 1577 if self._match_text_seq("AUTOTEMP"): 1578 autotemp = self._parse_schema() 1579 1580 return self.expression( 1581 exp.BlockCompressionProperty, 1582 always=always, 1583 manual=manual, 1584 never=never, 1585 default=default, 1586 autotemp=autotemp, 1587 ) 1588 1589 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1590 no = self._match_text_seq("NO") 1591 concurrent = self._match_text_seq("CONCURRENT") 1592 self._match_text_seq("ISOLATED", "LOADING") 1593 for_all = self._match_text_seq("FOR", "ALL") 1594 for_insert = self._match_text_seq("FOR", "INSERT") 1595 for_none = self._match_text_seq("FOR", "NONE") 1596 return self.expression( 1597 exp.IsolatedLoadingProperty, 1598 no=no, 1599 concurrent=concurrent, 1600 for_all=for_all, 1601 for_insert=for_insert, 1602 for_none=for_none, 1603 ) 1604 1605 def _parse_locking(self) -> exp.LockingProperty: 1606 if self._match(TokenType.TABLE): 1607 kind = "TABLE" 1608 elif self._match(TokenType.VIEW): 1609 kind = "VIEW" 1610 elif self._match(TokenType.ROW): 1611 kind = "ROW" 1612 elif self._match_text_seq("DATABASE"): 1613 kind = "DATABASE" 1614 else: 1615 kind = None 1616 1617 if kind in ("DATABASE", "TABLE", "VIEW"): 1618 this = self._parse_table_parts() 1619 else: 1620 this = None 1621 1622 if self._match(TokenType.FOR): 1623 for_or_in = "FOR" 1624 elif self._match(TokenType.IN): 1625 for_or_in = "IN" 1626 else: 1627 for_or_in = None 1628 1629 if self._match_text_seq("ACCESS"): 1630 lock_type = "ACCESS" 1631 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1632 lock_type = "EXCLUSIVE" 1633 elif self._match_text_seq("SHARE"): 1634 lock_type = "SHARE" 1635 elif self._match_text_seq("READ"): 1636 lock_type = "READ" 1637 elif self._match_text_seq("WRITE"): 1638 lock_type = "WRITE" 1639 elif self._match_text_seq("CHECKSUM"): 1640 lock_type = "CHECKSUM" 1641 else: 1642 lock_type = None 1643 1644 override = self._match_text_seq("OVERRIDE") 1645 1646 return self.expression( 1647 exp.LockingProperty, 1648 this=this, 1649 kind=kind, 1650 for_or_in=for_or_in, 1651 lock_type=lock_type, 1652 override=override, 1653 ) 1654 1655 def _parse_partition_by(self) -> t.List[exp.Expression]: 1656 if self._match(TokenType.PARTITION_BY): 1657 return self._parse_csv(self._parse_conjunction) 1658 return [] 1659 1660 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1661 self._match(TokenType.EQ) 1662 return self.expression( 1663 exp.PartitionedByProperty, 1664 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1665 ) 1666 1667 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1668 if self._match_text_seq("AND", "STATISTICS"): 1669 statistics = True 1670 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1671 statistics = False 1672 else: 1673 statistics = None 1674 1675 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1676 1677 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1678 if self._match_text_seq("PRIMARY", "INDEX"): 1679 return exp.NoPrimaryIndexProperty() 1680 return None 1681 1682 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1683 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1684 return exp.OnCommitProperty() 1685 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1686 return exp.OnCommitProperty(delete=True) 1687 return self.expression(exp.OnProperty, this=self._parse_id_var()) 1688 1689 def _parse_distkey(self) -> exp.DistKeyProperty: 1690 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1691 1692 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1693 table = self._parse_table(schema=True) 1694 1695 options = [] 1696 while self._match_texts(("INCLUDING", "EXCLUDING")): 1697 this = self._prev.text.upper() 1698 1699 id_var = self._parse_id_var() 1700 if not id_var: 1701 return None 1702 1703 options.append( 1704 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1705 ) 1706 1707 return self.expression(exp.LikeProperty, this=table, expressions=options) 1708 1709 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1710 return self.expression( 1711 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1712 ) 1713 1714 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1715 self._match(TokenType.EQ) 1716 return self.expression( 1717 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1718 ) 1719 1720 def _parse_returns(self) -> exp.ReturnsProperty: 1721 value: t.Optional[exp.Expression] 1722 is_table = self._match(TokenType.TABLE) 1723 1724 if is_table: 1725 if self._match(TokenType.LT): 1726 value = self.expression( 1727 exp.Schema, 1728 this="TABLE", 1729 expressions=self._parse_csv(self._parse_struct_types), 1730 ) 1731 if not self._match(TokenType.GT): 1732 self.raise_error("Expecting >") 1733 else: 1734 value = self._parse_schema(exp.var("TABLE")) 1735 else: 1736 value = self._parse_types() 1737 1738 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1739 1740 def _parse_describe(self) -> exp.Describe: 1741 kind = self._match_set(self.CREATABLES) and self._prev.text 1742 this = self._parse_table() 1743 return self.expression(exp.Describe, this=this, kind=kind) 1744 1745 def _parse_insert(self) -> exp.Insert: 1746 comments = ensure_list(self._prev_comments) 1747 overwrite = self._match(TokenType.OVERWRITE) 1748 ignore = self._match(TokenType.IGNORE) 1749 local = self._match_text_seq("LOCAL") 1750 alternative = None 1751 1752 if self._match_text_seq("DIRECTORY"): 1753 this: t.Optional[exp.Expression] = self.expression( 1754 exp.Directory, 1755 this=self._parse_var_or_string(), 1756 local=local, 1757 row_format=self._parse_row_format(match_row=True), 1758 ) 1759 else: 1760 if self._match(TokenType.OR): 1761 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1762 1763 self._match(TokenType.INTO) 1764 comments += ensure_list(self._prev_comments) 1765 self._match(TokenType.TABLE) 1766 this = self._parse_table(schema=True) 1767 1768 returning = self._parse_returning() 1769 1770 return self.expression( 1771 exp.Insert, 1772 comments=comments, 1773 this=this, 1774 exists=self._parse_exists(), 1775 partition=self._parse_partition(), 1776 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1777 and self._parse_conjunction(), 1778 expression=self._parse_ddl_select(), 1779 conflict=self._parse_on_conflict(), 1780 returning=returning or self._parse_returning(), 1781 overwrite=overwrite, 1782 alternative=alternative, 1783 ignore=ignore, 1784 ) 1785 1786 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1787 conflict = self._match_text_seq("ON", "CONFLICT") 1788 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1789 1790 if not conflict and not duplicate: 1791 return None 1792 1793 nothing = None 1794 expressions = None 1795 key = None 1796 constraint = None 1797 1798 if conflict: 1799 if self._match_text_seq("ON", "CONSTRAINT"): 1800 constraint = self._parse_id_var() 1801 else: 1802 key = self._parse_csv(self._parse_value) 1803 1804 self._match_text_seq("DO") 1805 if self._match_text_seq("NOTHING"): 1806 nothing = True 1807 else: 1808 self._match(TokenType.UPDATE) 1809 self._match(TokenType.SET) 1810 expressions = self._parse_csv(self._parse_equality) 1811 1812 return self.expression( 1813 exp.OnConflict, 1814 duplicate=duplicate, 1815 expressions=expressions, 1816 nothing=nothing, 1817 key=key, 1818 constraint=constraint, 1819 ) 1820 1821 def _parse_returning(self) -> t.Optional[exp.Returning]: 1822 if not self._match(TokenType.RETURNING): 1823 return None 1824 return self.expression( 1825 exp.Returning, 1826 expressions=self._parse_csv(self._parse_expression), 1827 into=self._match(TokenType.INTO) and self._parse_table_part(), 1828 ) 1829 1830 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1831 if not self._match(TokenType.FORMAT): 1832 return None 1833 return self._parse_row_format() 1834 1835 def _parse_row_format( 1836 self, match_row: bool = False 1837 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1838 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1839 return None 1840 1841 if self._match_text_seq("SERDE"): 1842 this = self._parse_string() 1843 1844 serde_properties = None 1845 if self._match(TokenType.SERDE_PROPERTIES): 1846 serde_properties = self.expression( 1847 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1848 ) 1849 1850 return self.expression( 1851 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1852 ) 1853 1854 self._match_text_seq("DELIMITED") 1855 1856 kwargs = {} 1857 1858 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1859 kwargs["fields"] = self._parse_string() 1860 if self._match_text_seq("ESCAPED", "BY"): 1861 kwargs["escaped"] = self._parse_string() 1862 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1863 kwargs["collection_items"] = self._parse_string() 1864 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1865 kwargs["map_keys"] = self._parse_string() 1866 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1867 kwargs["lines"] = self._parse_string() 1868 if self._match_text_seq("NULL", "DEFINED", "AS"): 1869 kwargs["null"] = self._parse_string() 1870 1871 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1872 1873 def _parse_load(self) -> exp.LoadData | exp.Command: 1874 if self._match_text_seq("DATA"): 1875 local = self._match_text_seq("LOCAL") 1876 self._match_text_seq("INPATH") 1877 inpath = self._parse_string() 1878 overwrite = self._match(TokenType.OVERWRITE) 1879 self._match_pair(TokenType.INTO, TokenType.TABLE) 1880 1881 return self.expression( 1882 exp.LoadData, 1883 this=self._parse_table(schema=True), 1884 local=local, 1885 overwrite=overwrite, 1886 inpath=inpath, 1887 partition=self._parse_partition(), 1888 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1889 serde=self._match_text_seq("SERDE") and self._parse_string(), 1890 ) 1891 return self._parse_as_command(self._prev) 1892 1893 def _parse_delete(self) -> exp.Delete: 1894 # This handles MySQL's "Multiple-Table Syntax" 1895 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1896 tables = None 1897 comments = self._prev_comments 1898 if not self._match(TokenType.FROM, advance=False): 1899 tables = self._parse_csv(self._parse_table) or None 1900 1901 returning = self._parse_returning() 1902 1903 return self.expression( 1904 exp.Delete, 1905 comments=comments, 1906 tables=tables, 1907 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1908 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1909 where=self._parse_where(), 1910 returning=returning or self._parse_returning(), 1911 limit=self._parse_limit(), 1912 ) 1913 1914 def _parse_update(self) -> exp.Update: 1915 comments = self._prev_comments 1916 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1917 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1918 returning = self._parse_returning() 1919 return self.expression( 1920 exp.Update, 1921 comments=comments, 1922 **{ # type: ignore 1923 "this": this, 1924 "expressions": expressions, 1925 "from": self._parse_from(joins=True), 1926 "where": self._parse_where(), 1927 "returning": returning or self._parse_returning(), 1928 "limit": self._parse_limit(), 1929 }, 1930 ) 1931 1932 def _parse_uncache(self) -> exp.Uncache: 1933 if not self._match(TokenType.TABLE): 1934 self.raise_error("Expecting TABLE after UNCACHE") 1935 1936 return self.expression( 1937 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1938 ) 1939 1940 def _parse_cache(self) -> exp.Cache: 1941 lazy = self._match_text_seq("LAZY") 1942 self._match(TokenType.TABLE) 1943 table = self._parse_table(schema=True) 1944 1945 options = [] 1946 if self._match_text_seq("OPTIONS"): 1947 self._match_l_paren() 1948 k = self._parse_string() 1949 self._match(TokenType.EQ) 1950 v = self._parse_string() 1951 options = [k, v] 1952 self._match_r_paren() 1953 1954 self._match(TokenType.ALIAS) 1955 return self.expression( 1956 exp.Cache, 1957 this=table, 1958 lazy=lazy, 1959 options=options, 1960 expression=self._parse_select(nested=True), 1961 ) 1962 1963 def _parse_partition(self) -> t.Optional[exp.Partition]: 1964 if not self._match(TokenType.PARTITION): 1965 return None 1966 1967 return self.expression( 1968 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1969 ) 1970 1971 def _parse_value(self) -> exp.Tuple: 1972 if self._match(TokenType.L_PAREN): 1973 expressions = self._parse_csv(self._parse_conjunction) 1974 self._match_r_paren() 1975 return self.expression(exp.Tuple, expressions=expressions) 1976 1977 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1978 # https://prestodb.io/docs/current/sql/values.html 1979 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1980 1981 def _parse_projections(self) -> t.List[exp.Expression]: 1982 return self._parse_expressions() 1983 1984 def _parse_select( 1985 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1986 ) -> t.Optional[exp.Expression]: 1987 cte = self._parse_with() 1988 if cte: 1989 this = self._parse_statement() 1990 1991 if not this: 1992 self.raise_error("Failed to parse any statement following CTE") 1993 return cte 1994 1995 if "with" in this.arg_types: 1996 this.set("with", cte) 1997 else: 1998 self.raise_error(f"{this.key} does not support CTE") 1999 this = cte 2000 elif self._match(TokenType.SELECT): 2001 comments = self._prev_comments 2002 2003 hint = self._parse_hint() 2004 all_ = self._match(TokenType.ALL) 2005 distinct = self._match_set(self.DISTINCT_TOKENS) 2006 2007 kind = ( 2008 self._match(TokenType.ALIAS) 2009 and self._match_texts(("STRUCT", "VALUE")) 2010 and self._prev.text 2011 ) 2012 2013 if distinct: 2014 distinct = self.expression( 2015 exp.Distinct, 2016 on=self._parse_value() if self._match(TokenType.ON) else None, 2017 ) 2018 2019 if all_ and distinct: 2020 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2021 2022 limit = self._parse_limit(top=True) 2023 projections = self._parse_projections() 2024 2025 this = self.expression( 2026 exp.Select, 2027 kind=kind, 2028 hint=hint, 2029 distinct=distinct, 2030 expressions=projections, 2031 limit=limit, 2032 ) 2033 this.comments = comments 2034 2035 into = self._parse_into() 2036 if into: 2037 this.set("into", into) 2038 2039 from_ = self._parse_from() 2040 if from_: 2041 this.set("from", from_) 2042 2043 this = self._parse_query_modifiers(this) 2044 elif (table or nested) and self._match(TokenType.L_PAREN): 2045 if self._match(TokenType.PIVOT): 2046 this = self._parse_simplified_pivot() 2047 elif self._match(TokenType.FROM): 2048 this = exp.select("*").from_( 2049 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2050 ) 2051 else: 2052 this = self._parse_table() if table else self._parse_select(nested=True) 2053 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2054 2055 self._match_r_paren() 2056 2057 # We return early here so that the UNION isn't attached to the subquery by the 2058 # following call to _parse_set_operations, but instead becomes the parent node 2059 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2060 elif self._match(TokenType.VALUES): 2061 this = self.expression( 2062 exp.Values, 2063 expressions=self._parse_csv(self._parse_value), 2064 alias=self._parse_table_alias(), 2065 ) 2066 else: 2067 this = None 2068 2069 return self._parse_set_operations(this) 2070 2071 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2072 if not skip_with_token and not self._match(TokenType.WITH): 2073 return None 2074 2075 comments = self._prev_comments 2076 recursive = self._match(TokenType.RECURSIVE) 2077 2078 expressions = [] 2079 while True: 2080 expressions.append(self._parse_cte()) 2081 2082 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2083 break 2084 else: 2085 self._match(TokenType.WITH) 2086 2087 return self.expression( 2088 exp.With, comments=comments, expressions=expressions, recursive=recursive 2089 ) 2090 2091 def _parse_cte(self) -> exp.CTE: 2092 alias = self._parse_table_alias() 2093 if not alias or not alias.this: 2094 self.raise_error("Expected CTE to have alias") 2095 2096 self._match(TokenType.ALIAS) 2097 return self.expression( 2098 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2099 ) 2100 2101 def _parse_table_alias( 2102 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2103 ) -> t.Optional[exp.TableAlias]: 2104 any_token = self._match(TokenType.ALIAS) 2105 alias = ( 2106 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2107 or self._parse_string_as_identifier() 2108 ) 2109 2110 index = self._index 2111 if self._match(TokenType.L_PAREN): 2112 columns = self._parse_csv(self._parse_function_parameter) 2113 self._match_r_paren() if columns else self._retreat(index) 2114 else: 2115 columns = None 2116 2117 if not alias and not columns: 2118 return None 2119 2120 return self.expression(exp.TableAlias, this=alias, columns=columns) 2121 2122 def _parse_subquery( 2123 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2124 ) -> t.Optional[exp.Subquery]: 2125 if not this: 2126 return None 2127 2128 return self.expression( 2129 exp.Subquery, 2130 this=this, 2131 pivots=self._parse_pivots(), 2132 alias=self._parse_table_alias() if parse_alias else None, 2133 ) 2134 2135 def _parse_query_modifiers( 2136 self, this: t.Optional[exp.Expression] 2137 ) -> t.Optional[exp.Expression]: 2138 if isinstance(this, self.MODIFIABLES): 2139 for join in iter(self._parse_join, None): 2140 this.append("joins", join) 2141 for lateral in iter(self._parse_lateral, None): 2142 this.append("laterals", lateral) 2143 2144 while True: 2145 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2146 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2147 key, expression = parser(self) 2148 2149 if expression: 2150 this.set(key, expression) 2151 if key == "limit": 2152 offset = expression.args.pop("offset", None) 2153 if offset: 2154 this.set("offset", exp.Offset(expression=offset)) 2155 continue 2156 break 2157 return this 2158 2159 def _parse_hint(self) -> t.Optional[exp.Hint]: 2160 if self._match(TokenType.HINT): 2161 hints = [] 2162 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2163 hints.extend(hint) 2164 2165 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2166 self.raise_error("Expected */ after HINT") 2167 2168 return self.expression(exp.Hint, expressions=hints) 2169 2170 return None 2171 2172 def _parse_into(self) -> t.Optional[exp.Into]: 2173 if not self._match(TokenType.INTO): 2174 return None 2175 2176 temp = self._match(TokenType.TEMPORARY) 2177 unlogged = self._match_text_seq("UNLOGGED") 2178 self._match(TokenType.TABLE) 2179 2180 return self.expression( 2181 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2182 ) 2183 2184 def _parse_from( 2185 self, joins: bool = False, skip_from_token: bool = False 2186 ) -> t.Optional[exp.From]: 2187 if not skip_from_token and not self._match(TokenType.FROM): 2188 return None 2189 2190 return self.expression( 2191 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2192 ) 2193 2194 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2195 if not self._match(TokenType.MATCH_RECOGNIZE): 2196 return None 2197 2198 self._match_l_paren() 2199 2200 partition = self._parse_partition_by() 2201 order = self._parse_order() 2202 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2203 2204 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2205 rows = exp.var("ONE ROW PER MATCH") 2206 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2207 text = "ALL ROWS PER MATCH" 2208 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2209 text += f" SHOW EMPTY MATCHES" 2210 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2211 text += f" OMIT EMPTY MATCHES" 2212 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2213 text += f" WITH UNMATCHED ROWS" 2214 rows = exp.var(text) 2215 else: 2216 rows = None 2217 2218 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2219 text = "AFTER MATCH SKIP" 2220 if self._match_text_seq("PAST", "LAST", "ROW"): 2221 text += f" PAST LAST ROW" 2222 elif self._match_text_seq("TO", "NEXT", "ROW"): 2223 text += f" TO NEXT ROW" 2224 elif self._match_text_seq("TO", "FIRST"): 2225 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2226 elif self._match_text_seq("TO", "LAST"): 2227 text += f" TO LAST {self._advance_any().text}" # type: ignore 2228 after = exp.var(text) 2229 else: 2230 after = None 2231 2232 if self._match_text_seq("PATTERN"): 2233 self._match_l_paren() 2234 2235 if not self._curr: 2236 self.raise_error("Expecting )", self._curr) 2237 2238 paren = 1 2239 start = self._curr 2240 2241 while self._curr and paren > 0: 2242 if self._curr.token_type == TokenType.L_PAREN: 2243 paren += 1 2244 if self._curr.token_type == TokenType.R_PAREN: 2245 paren -= 1 2246 2247 end = self._prev 2248 self._advance() 2249 2250 if paren > 0: 2251 self.raise_error("Expecting )", self._curr) 2252 2253 pattern = exp.var(self._find_sql(start, end)) 2254 else: 2255 pattern = None 2256 2257 define = ( 2258 self._parse_csv( 2259 lambda: self.expression( 2260 exp.Alias, 2261 alias=self._parse_id_var(any_token=True), 2262 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2263 ) 2264 ) 2265 if self._match_text_seq("DEFINE") 2266 else None 2267 ) 2268 2269 self._match_r_paren() 2270 2271 return self.expression( 2272 exp.MatchRecognize, 2273 partition_by=partition, 2274 order=order, 2275 measures=measures, 2276 rows=rows, 2277 after=after, 2278 pattern=pattern, 2279 define=define, 2280 alias=self._parse_table_alias(), 2281 ) 2282 2283 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2284 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2285 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2286 2287 if outer_apply or cross_apply: 2288 this = self._parse_select(table=True) 2289 view = None 2290 outer = not cross_apply 2291 elif self._match(TokenType.LATERAL): 2292 this = self._parse_select(table=True) 2293 view = self._match(TokenType.VIEW) 2294 outer = self._match(TokenType.OUTER) 2295 else: 2296 return None 2297 2298 if not this: 2299 this = ( 2300 self._parse_unnest() 2301 or self._parse_function() 2302 or self._parse_id_var(any_token=False) 2303 ) 2304 2305 while self._match(TokenType.DOT): 2306 this = exp.Dot( 2307 this=this, 2308 expression=self._parse_function() or self._parse_id_var(any_token=False), 2309 ) 2310 2311 if view: 2312 table = self._parse_id_var(any_token=False) 2313 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2314 table_alias: t.Optional[exp.TableAlias] = self.expression( 2315 exp.TableAlias, this=table, columns=columns 2316 ) 2317 elif isinstance(this, exp.Subquery) and this.alias: 2318 # Ensures parity between the Subquery's and the Lateral's "alias" args 2319 table_alias = this.args["alias"].copy() 2320 else: 2321 table_alias = self._parse_table_alias() 2322 2323 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2324 2325 def _parse_join_parts( 2326 self, 2327 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2328 return ( 2329 self._match_set(self.JOIN_METHODS) and self._prev, 2330 self._match_set(self.JOIN_SIDES) and self._prev, 2331 self._match_set(self.JOIN_KINDS) and self._prev, 2332 ) 2333 2334 def _parse_join( 2335 self, skip_join_token: bool = False, parse_bracket: bool = False 2336 ) -> t.Optional[exp.Join]: 2337 if self._match(TokenType.COMMA): 2338 return self.expression(exp.Join, this=self._parse_table()) 2339 2340 index = self._index 2341 method, side, kind = self._parse_join_parts() 2342 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2343 join = self._match(TokenType.JOIN) 2344 2345 if not skip_join_token and not join: 2346 self._retreat(index) 2347 kind = None 2348 method = None 2349 side = None 2350 2351 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2352 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2353 2354 if not skip_join_token and not join and not outer_apply and not cross_apply: 2355 return None 2356 2357 if outer_apply: 2358 side = Token(TokenType.LEFT, "LEFT") 2359 2360 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2361 2362 if method: 2363 kwargs["method"] = method.text 2364 if side: 2365 kwargs["side"] = side.text 2366 if kind: 2367 kwargs["kind"] = kind.text 2368 if hint: 2369 kwargs["hint"] = hint 2370 2371 if self._match(TokenType.ON): 2372 kwargs["on"] = self._parse_conjunction() 2373 elif self._match(TokenType.USING): 2374 kwargs["using"] = self._parse_wrapped_id_vars() 2375 elif not (kind and kind.token_type == TokenType.CROSS): 2376 index = self._index 2377 joins = self._parse_joins() 2378 2379 if joins and self._match(TokenType.ON): 2380 kwargs["on"] = self._parse_conjunction() 2381 elif joins and self._match(TokenType.USING): 2382 kwargs["using"] = self._parse_wrapped_id_vars() 2383 else: 2384 joins = None 2385 self._retreat(index) 2386 2387 kwargs["this"].set("joins", joins) 2388 2389 comments = [c for token in (method, side, kind) if token for c in token.comments] 2390 return self.expression(exp.Join, comments=comments, **kwargs) 2391 2392 def _parse_index( 2393 self, 2394 index: t.Optional[exp.Expression] = None, 2395 ) -> t.Optional[exp.Index]: 2396 if index: 2397 unique = None 2398 primary = None 2399 amp = None 2400 2401 self._match(TokenType.ON) 2402 self._match(TokenType.TABLE) # hive 2403 table = self._parse_table_parts(schema=True) 2404 else: 2405 unique = self._match(TokenType.UNIQUE) 2406 primary = self._match_text_seq("PRIMARY") 2407 amp = self._match_text_seq("AMP") 2408 2409 if not self._match(TokenType.INDEX): 2410 return None 2411 2412 index = self._parse_id_var() 2413 table = None 2414 2415 using = self._parse_field() if self._match(TokenType.USING) else None 2416 2417 if self._match(TokenType.L_PAREN, advance=False): 2418 columns = self._parse_wrapped_csv(self._parse_ordered) 2419 else: 2420 columns = None 2421 2422 return self.expression( 2423 exp.Index, 2424 this=index, 2425 table=table, 2426 using=using, 2427 columns=columns, 2428 unique=unique, 2429 primary=primary, 2430 amp=amp, 2431 partition_by=self._parse_partition_by(), 2432 ) 2433 2434 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2435 hints: t.List[exp.Expression] = [] 2436 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2437 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2438 hints.append( 2439 self.expression( 2440 exp.WithTableHint, 2441 expressions=self._parse_csv( 2442 lambda: self._parse_function() or self._parse_var(any_token=True) 2443 ), 2444 ) 2445 ) 2446 self._match_r_paren() 2447 else: 2448 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2449 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2450 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2451 2452 self._match_texts({"INDEX", "KEY"}) 2453 if self._match(TokenType.FOR): 2454 hint.set("target", self._advance_any() and self._prev.text.upper()) 2455 2456 hint.set("expressions", self._parse_wrapped_id_vars()) 2457 hints.append(hint) 2458 2459 return hints or None 2460 2461 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2462 return ( 2463 (not schema and self._parse_function(optional_parens=False)) 2464 or self._parse_id_var(any_token=False) 2465 or self._parse_string_as_identifier() 2466 or self._parse_placeholder() 2467 ) 2468 2469 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2470 catalog = None 2471 db = None 2472 table = self._parse_table_part(schema=schema) 2473 2474 while self._match(TokenType.DOT): 2475 if catalog: 2476 # This allows nesting the table in arbitrarily many dot expressions if needed 2477 table = self.expression( 2478 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2479 ) 2480 else: 2481 catalog = db 2482 db = table 2483 table = self._parse_table_part(schema=schema) 2484 2485 if not table: 2486 self.raise_error(f"Expected table name but got {self._curr}") 2487 2488 return self.expression( 2489 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2490 ) 2491 2492 def _parse_table( 2493 self, 2494 schema: bool = False, 2495 joins: bool = False, 2496 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2497 parse_bracket: bool = False, 2498 ) -> t.Optional[exp.Expression]: 2499 lateral = self._parse_lateral() 2500 if lateral: 2501 return lateral 2502 2503 unnest = self._parse_unnest() 2504 if unnest: 2505 return unnest 2506 2507 values = self._parse_derived_table_values() 2508 if values: 2509 return values 2510 2511 subquery = self._parse_select(table=True) 2512 if subquery: 2513 if not subquery.args.get("pivots"): 2514 subquery.set("pivots", self._parse_pivots()) 2515 return subquery 2516 2517 bracket = parse_bracket and self._parse_bracket(None) 2518 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2519 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2520 2521 if schema: 2522 return self._parse_schema(this=this) 2523 2524 if self.ALIAS_POST_TABLESAMPLE: 2525 table_sample = self._parse_table_sample() 2526 2527 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2528 if alias: 2529 this.set("alias", alias) 2530 2531 if not this.args.get("pivots"): 2532 this.set("pivots", self._parse_pivots()) 2533 2534 this.set("hints", self._parse_table_hints()) 2535 2536 if not self.ALIAS_POST_TABLESAMPLE: 2537 table_sample = self._parse_table_sample() 2538 2539 if table_sample: 2540 table_sample.set("this", this) 2541 this = table_sample 2542 2543 if joins: 2544 for join in iter(self._parse_join, None): 2545 this.append("joins", join) 2546 2547 return this 2548 2549 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2550 if not self._match(TokenType.UNNEST): 2551 return None 2552 2553 expressions = self._parse_wrapped_csv(self._parse_type) 2554 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2555 2556 alias = self._parse_table_alias() if with_alias else None 2557 2558 if alias and self.UNNEST_COLUMN_ONLY: 2559 if alias.args.get("columns"): 2560 self.raise_error("Unexpected extra column alias in unnest.") 2561 2562 alias.set("columns", [alias.this]) 2563 alias.set("this", None) 2564 2565 offset = None 2566 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2567 self._match(TokenType.ALIAS) 2568 offset = self._parse_id_var() or exp.to_identifier("offset") 2569 2570 return self.expression( 2571 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2572 ) 2573 2574 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2575 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2576 if not is_derived and not self._match(TokenType.VALUES): 2577 return None 2578 2579 expressions = self._parse_csv(self._parse_value) 2580 alias = self._parse_table_alias() 2581 2582 if is_derived: 2583 self._match_r_paren() 2584 2585 return self.expression( 2586 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2587 ) 2588 2589 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2590 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2591 as_modifier and self._match_text_seq("USING", "SAMPLE") 2592 ): 2593 return None 2594 2595 bucket_numerator = None 2596 bucket_denominator = None 2597 bucket_field = None 2598 percent = None 2599 rows = None 2600 size = None 2601 seed = None 2602 2603 kind = ( 2604 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2605 ) 2606 method = self._parse_var(tokens=(TokenType.ROW,)) 2607 2608 self._match(TokenType.L_PAREN) 2609 2610 num = self._parse_number() 2611 2612 if self._match_text_seq("BUCKET"): 2613 bucket_numerator = self._parse_number() 2614 self._match_text_seq("OUT", "OF") 2615 bucket_denominator = bucket_denominator = self._parse_number() 2616 self._match(TokenType.ON) 2617 bucket_field = self._parse_field() 2618 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2619 percent = num 2620 elif self._match(TokenType.ROWS): 2621 rows = num 2622 else: 2623 size = num 2624 2625 self._match(TokenType.R_PAREN) 2626 2627 if self._match(TokenType.L_PAREN): 2628 method = self._parse_var() 2629 seed = self._match(TokenType.COMMA) and self._parse_number() 2630 self._match_r_paren() 2631 elif self._match_texts(("SEED", "REPEATABLE")): 2632 seed = self._parse_wrapped(self._parse_number) 2633 2634 return self.expression( 2635 exp.TableSample, 2636 method=method, 2637 bucket_numerator=bucket_numerator, 2638 bucket_denominator=bucket_denominator, 2639 bucket_field=bucket_field, 2640 percent=percent, 2641 rows=rows, 2642 size=size, 2643 seed=seed, 2644 kind=kind, 2645 ) 2646 2647 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2648 return list(iter(self._parse_pivot, None)) or None 2649 2650 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2651 return list(iter(self._parse_join, None)) or None 2652 2653 # https://duckdb.org/docs/sql/statements/pivot 2654 def _parse_simplified_pivot(self) -> exp.Pivot: 2655 def _parse_on() -> t.Optional[exp.Expression]: 2656 this = self._parse_bitwise() 2657 return self._parse_in(this) if self._match(TokenType.IN) else this 2658 2659 this = self._parse_table() 2660 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2661 using = self._match(TokenType.USING) and self._parse_csv( 2662 lambda: self._parse_alias(self._parse_function()) 2663 ) 2664 group = self._parse_group() 2665 return self.expression( 2666 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2667 ) 2668 2669 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2670 index = self._index 2671 include_nulls = None 2672 2673 if self._match(TokenType.PIVOT): 2674 unpivot = False 2675 elif self._match(TokenType.UNPIVOT): 2676 unpivot = True 2677 2678 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2679 if self._match_text_seq("INCLUDE", "NULLS"): 2680 include_nulls = True 2681 elif self._match_text_seq("EXCLUDE", "NULLS"): 2682 include_nulls = False 2683 else: 2684 return None 2685 2686 expressions = [] 2687 field = None 2688 2689 if not self._match(TokenType.L_PAREN): 2690 self._retreat(index) 2691 return None 2692 2693 if unpivot: 2694 expressions = self._parse_csv(self._parse_column) 2695 else: 2696 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2697 2698 if not expressions: 2699 self.raise_error("Failed to parse PIVOT's aggregation list") 2700 2701 if not self._match(TokenType.FOR): 2702 self.raise_error("Expecting FOR") 2703 2704 value = self._parse_column() 2705 2706 if not self._match(TokenType.IN): 2707 self.raise_error("Expecting IN") 2708 2709 field = self._parse_in(value, alias=True) 2710 2711 self._match_r_paren() 2712 2713 pivot = self.expression( 2714 exp.Pivot, 2715 expressions=expressions, 2716 field=field, 2717 unpivot=unpivot, 2718 include_nulls=include_nulls, 2719 ) 2720 2721 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2722 pivot.set("alias", self._parse_table_alias()) 2723 2724 if not unpivot: 2725 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2726 2727 columns: t.List[exp.Expression] = [] 2728 for fld in pivot.args["field"].expressions: 2729 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2730 for name in names: 2731 if self.PREFIXED_PIVOT_COLUMNS: 2732 name = f"{name}_{field_name}" if name else field_name 2733 else: 2734 name = f"{field_name}_{name}" if name else field_name 2735 2736 columns.append(exp.to_identifier(name)) 2737 2738 pivot.set("columns", columns) 2739 2740 return pivot 2741 2742 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2743 return [agg.alias for agg in aggregations] 2744 2745 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2746 if not skip_where_token and not self._match(TokenType.WHERE): 2747 return None 2748 2749 return self.expression( 2750 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2751 ) 2752 2753 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2754 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2755 return None 2756 2757 elements = defaultdict(list) 2758 2759 if self._match(TokenType.ALL): 2760 return self.expression(exp.Group, all=True) 2761 2762 while True: 2763 expressions = self._parse_csv(self._parse_conjunction) 2764 if expressions: 2765 elements["expressions"].extend(expressions) 2766 2767 grouping_sets = self._parse_grouping_sets() 2768 if grouping_sets: 2769 elements["grouping_sets"].extend(grouping_sets) 2770 2771 rollup = None 2772 cube = None 2773 totals = None 2774 2775 with_ = self._match(TokenType.WITH) 2776 if self._match(TokenType.ROLLUP): 2777 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2778 elements["rollup"].extend(ensure_list(rollup)) 2779 2780 if self._match(TokenType.CUBE): 2781 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2782 elements["cube"].extend(ensure_list(cube)) 2783 2784 if self._match_text_seq("TOTALS"): 2785 totals = True 2786 elements["totals"] = True # type: ignore 2787 2788 if not (grouping_sets or rollup or cube or totals): 2789 break 2790 2791 return self.expression(exp.Group, **elements) # type: ignore 2792 2793 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2794 if not self._match(TokenType.GROUPING_SETS): 2795 return None 2796 2797 return self._parse_wrapped_csv(self._parse_grouping_set) 2798 2799 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2800 if self._match(TokenType.L_PAREN): 2801 grouping_set = self._parse_csv(self._parse_column) 2802 self._match_r_paren() 2803 return self.expression(exp.Tuple, expressions=grouping_set) 2804 2805 return self._parse_column() 2806 2807 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2808 if not skip_having_token and not self._match(TokenType.HAVING): 2809 return None 2810 return self.expression(exp.Having, this=self._parse_conjunction()) 2811 2812 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2813 if not self._match(TokenType.QUALIFY): 2814 return None 2815 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2816 2817 def _parse_order( 2818 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2819 ) -> t.Optional[exp.Expression]: 2820 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2821 return this 2822 2823 return self.expression( 2824 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2825 ) 2826 2827 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2828 if not self._match(token): 2829 return None 2830 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2831 2832 def _parse_ordered(self) -> exp.Ordered: 2833 this = self._parse_conjunction() 2834 self._match(TokenType.ASC) 2835 2836 is_desc = self._match(TokenType.DESC) 2837 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2838 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2839 desc = is_desc or False 2840 asc = not desc 2841 nulls_first = is_nulls_first or False 2842 explicitly_null_ordered = is_nulls_first or is_nulls_last 2843 2844 if ( 2845 not explicitly_null_ordered 2846 and ( 2847 (asc and self.NULL_ORDERING == "nulls_are_small") 2848 or (desc and self.NULL_ORDERING != "nulls_are_small") 2849 ) 2850 and self.NULL_ORDERING != "nulls_are_last" 2851 ): 2852 nulls_first = True 2853 2854 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2855 2856 def _parse_limit( 2857 self, this: t.Optional[exp.Expression] = None, top: bool = False 2858 ) -> t.Optional[exp.Expression]: 2859 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2860 comments = self._prev_comments 2861 if top: 2862 limit_paren = self._match(TokenType.L_PAREN) 2863 expression = self._parse_number() 2864 2865 if limit_paren: 2866 self._match_r_paren() 2867 else: 2868 expression = self._parse_term() 2869 2870 if self._match(TokenType.COMMA): 2871 offset = expression 2872 expression = self._parse_term() 2873 else: 2874 offset = None 2875 2876 limit_exp = self.expression( 2877 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2878 ) 2879 2880 return limit_exp 2881 2882 if self._match(TokenType.FETCH): 2883 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2884 direction = self._prev.text if direction else "FIRST" 2885 2886 count = self._parse_number() 2887 percent = self._match(TokenType.PERCENT) 2888 2889 self._match_set((TokenType.ROW, TokenType.ROWS)) 2890 2891 only = self._match_text_seq("ONLY") 2892 with_ties = self._match_text_seq("WITH", "TIES") 2893 2894 if only and with_ties: 2895 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2896 2897 return self.expression( 2898 exp.Fetch, 2899 direction=direction, 2900 count=count, 2901 percent=percent, 2902 with_ties=with_ties, 2903 ) 2904 2905 return this 2906 2907 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2908 if not self._match(TokenType.OFFSET): 2909 return this 2910 2911 count = self._parse_term() 2912 self._match_set((TokenType.ROW, TokenType.ROWS)) 2913 return self.expression(exp.Offset, this=this, expression=count) 2914 2915 def _parse_locks(self) -> t.List[exp.Lock]: 2916 locks = [] 2917 while True: 2918 if self._match_text_seq("FOR", "UPDATE"): 2919 update = True 2920 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2921 "LOCK", "IN", "SHARE", "MODE" 2922 ): 2923 update = False 2924 else: 2925 break 2926 2927 expressions = None 2928 if self._match_text_seq("OF"): 2929 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2930 2931 wait: t.Optional[bool | exp.Expression] = None 2932 if self._match_text_seq("NOWAIT"): 2933 wait = True 2934 elif self._match_text_seq("WAIT"): 2935 wait = self._parse_primary() 2936 elif self._match_text_seq("SKIP", "LOCKED"): 2937 wait = False 2938 2939 locks.append( 2940 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2941 ) 2942 2943 return locks 2944 2945 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2946 if not self._match_set(self.SET_OPERATIONS): 2947 return this 2948 2949 token_type = self._prev.token_type 2950 2951 if token_type == TokenType.UNION: 2952 expression = exp.Union 2953 elif token_type == TokenType.EXCEPT: 2954 expression = exp.Except 2955 else: 2956 expression = exp.Intersect 2957 2958 return self.expression( 2959 expression, 2960 this=this, 2961 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2962 expression=self._parse_set_operations(self._parse_select(nested=True)), 2963 ) 2964 2965 def _parse_expression(self) -> t.Optional[exp.Expression]: 2966 return self._parse_alias(self._parse_conjunction()) 2967 2968 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2969 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2970 2971 def _parse_equality(self) -> t.Optional[exp.Expression]: 2972 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2973 2974 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2975 return self._parse_tokens(self._parse_range, self.COMPARISON) 2976 2977 def _parse_range(self) -> t.Optional[exp.Expression]: 2978 this = self._parse_bitwise() 2979 negate = self._match(TokenType.NOT) 2980 2981 if self._match_set(self.RANGE_PARSERS): 2982 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2983 if not expression: 2984 return this 2985 2986 this = expression 2987 elif self._match(TokenType.ISNULL): 2988 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2989 2990 # Postgres supports ISNULL and NOTNULL for conditions. 2991 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2992 if self._match(TokenType.NOTNULL): 2993 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2994 this = self.expression(exp.Not, this=this) 2995 2996 if negate: 2997 this = self.expression(exp.Not, this=this) 2998 2999 if self._match(TokenType.IS): 3000 this = self._parse_is(this) 3001 3002 return this 3003 3004 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3005 index = self._index - 1 3006 negate = self._match(TokenType.NOT) 3007 3008 if self._match_text_seq("DISTINCT", "FROM"): 3009 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3010 return self.expression(klass, this=this, expression=self._parse_expression()) 3011 3012 expression = self._parse_null() or self._parse_boolean() 3013 if not expression: 3014 self._retreat(index) 3015 return None 3016 3017 this = self.expression(exp.Is, this=this, expression=expression) 3018 return self.expression(exp.Not, this=this) if negate else this 3019 3020 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3021 unnest = self._parse_unnest(with_alias=False) 3022 if unnest: 3023 this = self.expression(exp.In, this=this, unnest=unnest) 3024 elif self._match(TokenType.L_PAREN): 3025 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3026 3027 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3028 this = self.expression(exp.In, this=this, query=expressions[0]) 3029 else: 3030 this = self.expression(exp.In, this=this, expressions=expressions) 3031 3032 self._match_r_paren(this) 3033 else: 3034 this = self.expression(exp.In, this=this, field=self._parse_field()) 3035 3036 return this 3037 3038 def _parse_between(self, this: exp.Expression) -> exp.Between: 3039 low = self._parse_bitwise() 3040 self._match(TokenType.AND) 3041 high = self._parse_bitwise() 3042 return self.expression(exp.Between, this=this, low=low, high=high) 3043 3044 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3045 if not self._match(TokenType.ESCAPE): 3046 return this 3047 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3048 3049 def _parse_interval(self) -> t.Optional[exp.Interval]: 3050 index = self._index 3051 3052 if not self._match(TokenType.INTERVAL): 3053 return None 3054 3055 if self._match(TokenType.STRING, advance=False): 3056 this = self._parse_primary() 3057 else: 3058 this = self._parse_term() 3059 3060 if not this: 3061 self._retreat(index) 3062 return None 3063 3064 unit = self._parse_function() or self._parse_var() 3065 3066 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3067 # each INTERVAL expression into this canonical form so it's easy to transpile 3068 if this and this.is_number: 3069 this = exp.Literal.string(this.name) 3070 elif this and this.is_string: 3071 parts = this.name.split() 3072 3073 if len(parts) == 2: 3074 if unit: 3075 # this is not actually a unit, it's something else 3076 unit = None 3077 self._retreat(self._index - 1) 3078 else: 3079 this = exp.Literal.string(parts[0]) 3080 unit = self.expression(exp.Var, this=parts[1]) 3081 3082 return self.expression(exp.Interval, this=this, unit=unit) 3083 3084 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3085 this = self._parse_term() 3086 3087 while True: 3088 if self._match_set(self.BITWISE): 3089 this = self.expression( 3090 self.BITWISE[self._prev.token_type], 3091 this=this, 3092 expression=self._parse_term(), 3093 ) 3094 elif self._match(TokenType.DQMARK): 3095 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3096 elif self._match_pair(TokenType.LT, TokenType.LT): 3097 this = self.expression( 3098 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3099 ) 3100 elif self._match_pair(TokenType.GT, TokenType.GT): 3101 this = self.expression( 3102 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3103 ) 3104 else: 3105 break 3106 3107 return this 3108 3109 def _parse_term(self) -> t.Optional[exp.Expression]: 3110 return self._parse_tokens(self._parse_factor, self.TERM) 3111 3112 def _parse_factor(self) -> t.Optional[exp.Expression]: 3113 return self._parse_tokens(self._parse_unary, self.FACTOR) 3114 3115 def _parse_unary(self) -> t.Optional[exp.Expression]: 3116 if self._match_set(self.UNARY_PARSERS): 3117 return self.UNARY_PARSERS[self._prev.token_type](self) 3118 return self._parse_at_time_zone(self._parse_type()) 3119 3120 def _parse_type(self) -> t.Optional[exp.Expression]: 3121 interval = self._parse_interval() 3122 if interval: 3123 return interval 3124 3125 index = self._index 3126 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3127 this = self._parse_column() 3128 3129 if data_type: 3130 if isinstance(this, exp.Literal): 3131 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3132 if parser: 3133 return parser(self, this, data_type) 3134 return self.expression(exp.Cast, this=this, to=data_type) 3135 if not data_type.expressions: 3136 self._retreat(index) 3137 return self._parse_column() 3138 return self._parse_column_ops(data_type) 3139 3140 return this 3141 3142 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3143 this = self._parse_type() 3144 if not this: 3145 return None 3146 3147 return self.expression( 3148 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3149 ) 3150 3151 def _parse_types( 3152 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3153 ) -> t.Optional[exp.Expression]: 3154 index = self._index 3155 3156 prefix = self._match_text_seq("SYSUDTLIB", ".") 3157 3158 if not self._match_set(self.TYPE_TOKENS): 3159 identifier = allow_identifiers and self._parse_id_var( 3160 any_token=False, tokens=(TokenType.VAR,) 3161 ) 3162 3163 if identifier: 3164 tokens = self._tokenizer.tokenize(identifier.name) 3165 3166 if len(tokens) != 1: 3167 self.raise_error("Unexpected identifier", self._prev) 3168 3169 if tokens[0].token_type in self.TYPE_TOKENS: 3170 self._prev = tokens[0] 3171 elif self.SUPPORTS_USER_DEFINED_TYPES: 3172 return identifier 3173 else: 3174 return None 3175 else: 3176 return None 3177 3178 type_token = self._prev.token_type 3179 3180 if type_token == TokenType.PSEUDO_TYPE: 3181 return self.expression(exp.PseudoType, this=self._prev.text) 3182 3183 nested = type_token in self.NESTED_TYPE_TOKENS 3184 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3185 expressions = None 3186 maybe_func = False 3187 3188 if self._match(TokenType.L_PAREN): 3189 if is_struct: 3190 expressions = self._parse_csv(self._parse_struct_types) 3191 elif nested: 3192 expressions = self._parse_csv( 3193 lambda: self._parse_types( 3194 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3195 ) 3196 ) 3197 elif type_token in self.ENUM_TYPE_TOKENS: 3198 expressions = self._parse_csv(self._parse_equality) 3199 else: 3200 expressions = self._parse_csv(self._parse_type_size) 3201 3202 if not expressions or not self._match(TokenType.R_PAREN): 3203 self._retreat(index) 3204 return None 3205 3206 maybe_func = True 3207 3208 this: t.Optional[exp.Expression] = None 3209 values: t.Optional[t.List[exp.Expression]] = None 3210 3211 if nested and self._match(TokenType.LT): 3212 if is_struct: 3213 expressions = self._parse_csv(self._parse_struct_types) 3214 else: 3215 expressions = self._parse_csv( 3216 lambda: self._parse_types( 3217 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3218 ) 3219 ) 3220 3221 if not self._match(TokenType.GT): 3222 self.raise_error("Expecting >") 3223 3224 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3225 values = self._parse_csv(self._parse_conjunction) 3226 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3227 3228 if type_token in self.TIMESTAMPS: 3229 if self._match_text_seq("WITH", "TIME", "ZONE"): 3230 maybe_func = False 3231 tz_type = ( 3232 exp.DataType.Type.TIMETZ 3233 if type_token in self.TIMES 3234 else exp.DataType.Type.TIMESTAMPTZ 3235 ) 3236 this = exp.DataType(this=tz_type, expressions=expressions) 3237 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3238 maybe_func = False 3239 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3240 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3241 maybe_func = False 3242 elif type_token == TokenType.INTERVAL: 3243 if self._match_text_seq("YEAR", "TO", "MONTH"): 3244 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3245 elif self._match_text_seq("DAY", "TO", "SECOND"): 3246 span = [exp.IntervalDayToSecondSpan()] 3247 else: 3248 span = None 3249 3250 unit = not span and self._parse_var() 3251 if not unit: 3252 this = self.expression( 3253 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3254 ) 3255 else: 3256 this = self.expression(exp.Interval, unit=unit) 3257 3258 if maybe_func and check_func: 3259 index2 = self._index 3260 peek = self._parse_string() 3261 3262 if not peek: 3263 self._retreat(index) 3264 return None 3265 3266 self._retreat(index2) 3267 3268 if not this: 3269 this = exp.DataType( 3270 this=exp.DataType.Type[type_token.value], 3271 expressions=expressions, 3272 nested=nested, 3273 values=values, 3274 prefix=prefix, 3275 ) 3276 3277 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3278 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3279 3280 return this 3281 3282 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3283 this = self._parse_type() or self._parse_id_var() 3284 self._match(TokenType.COLON) 3285 return self._parse_column_def(this) 3286 3287 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3288 if not self._match_text_seq("AT", "TIME", "ZONE"): 3289 return this 3290 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3291 3292 def _parse_column(self) -> t.Optional[exp.Expression]: 3293 this = self._parse_field() 3294 if isinstance(this, exp.Identifier): 3295 this = self.expression(exp.Column, this=this) 3296 elif not this: 3297 return self._parse_bracket(this) 3298 return self._parse_column_ops(this) 3299 3300 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3301 this = self._parse_bracket(this) 3302 3303 while self._match_set(self.COLUMN_OPERATORS): 3304 op_token = self._prev.token_type 3305 op = self.COLUMN_OPERATORS.get(op_token) 3306 3307 if op_token == TokenType.DCOLON: 3308 field = self._parse_types() 3309 if not field: 3310 self.raise_error("Expected type") 3311 elif op and self._curr: 3312 self._advance() 3313 value = self._prev.text 3314 field = ( 3315 exp.Literal.number(value) 3316 if self._prev.token_type == TokenType.NUMBER 3317 else exp.Literal.string(value) 3318 ) 3319 else: 3320 field = self._parse_field(anonymous_func=True, any_token=True) 3321 3322 if isinstance(field, exp.Func): 3323 # bigquery allows function calls like x.y.count(...) 3324 # SAFE.SUBSTR(...) 3325 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3326 this = self._replace_columns_with_dots(this) 3327 3328 if op: 3329 this = op(self, this, field) 3330 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3331 this = self.expression( 3332 exp.Column, 3333 this=field, 3334 table=this.this, 3335 db=this.args.get("table"), 3336 catalog=this.args.get("db"), 3337 ) 3338 else: 3339 this = self.expression(exp.Dot, this=this, expression=field) 3340 this = self._parse_bracket(this) 3341 return this 3342 3343 def _parse_primary(self) -> t.Optional[exp.Expression]: 3344 if self._match_set(self.PRIMARY_PARSERS): 3345 token_type = self._prev.token_type 3346 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3347 3348 if token_type == TokenType.STRING: 3349 expressions = [primary] 3350 while self._match(TokenType.STRING): 3351 expressions.append(exp.Literal.string(self._prev.text)) 3352 3353 if len(expressions) > 1: 3354 return self.expression(exp.Concat, expressions=expressions) 3355 3356 return primary 3357 3358 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3359 return exp.Literal.number(f"0.{self._prev.text}") 3360 3361 if self._match(TokenType.L_PAREN): 3362 comments = self._prev_comments 3363 query = self._parse_select() 3364 3365 if query: 3366 expressions = [query] 3367 else: 3368 expressions = self._parse_expressions() 3369 3370 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3371 3372 if isinstance(this, exp.Subqueryable): 3373 this = self._parse_set_operations( 3374 self._parse_subquery(this=this, parse_alias=False) 3375 ) 3376 elif len(expressions) > 1: 3377 this = self.expression(exp.Tuple, expressions=expressions) 3378 else: 3379 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3380 3381 if this: 3382 this.add_comments(comments) 3383 3384 self._match_r_paren(expression=this) 3385 return this 3386 3387 return None 3388 3389 def _parse_field( 3390 self, 3391 any_token: bool = False, 3392 tokens: t.Optional[t.Collection[TokenType]] = None, 3393 anonymous_func: bool = False, 3394 ) -> t.Optional[exp.Expression]: 3395 return ( 3396 self._parse_primary() 3397 or self._parse_function(anonymous=anonymous_func) 3398 or self._parse_id_var(any_token=any_token, tokens=tokens) 3399 ) 3400 3401 def _parse_function( 3402 self, 3403 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3404 anonymous: bool = False, 3405 optional_parens: bool = True, 3406 ) -> t.Optional[exp.Expression]: 3407 if not self._curr: 3408 return None 3409 3410 token_type = self._curr.token_type 3411 this = self._curr.text 3412 upper = this.upper() 3413 3414 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3415 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3416 self._advance() 3417 return parser(self) 3418 3419 if not self._next or self._next.token_type != TokenType.L_PAREN: 3420 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3421 self._advance() 3422 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3423 3424 return None 3425 3426 if token_type not in self.FUNC_TOKENS: 3427 return None 3428 3429 self._advance(2) 3430 3431 parser = self.FUNCTION_PARSERS.get(upper) 3432 if parser and not anonymous: 3433 this = parser(self) 3434 else: 3435 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3436 3437 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3438 this = self.expression(subquery_predicate, this=self._parse_select()) 3439 self._match_r_paren() 3440 return this 3441 3442 if functions is None: 3443 functions = self.FUNCTIONS 3444 3445 function = functions.get(upper) 3446 3447 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3448 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3449 3450 if function and not anonymous: 3451 func = self.validate_expression(function(args), args) 3452 if not self.NORMALIZE_FUNCTIONS: 3453 func.meta["name"] = this 3454 this = func 3455 else: 3456 this = self.expression(exp.Anonymous, this=this, expressions=args) 3457 3458 self._match_r_paren(this) 3459 return self._parse_window(this) 3460 3461 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3462 return self._parse_column_def(self._parse_id_var()) 3463 3464 def _parse_user_defined_function( 3465 self, kind: t.Optional[TokenType] = None 3466 ) -> t.Optional[exp.Expression]: 3467 this = self._parse_id_var() 3468 3469 while self._match(TokenType.DOT): 3470 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3471 3472 if not self._match(TokenType.L_PAREN): 3473 return this 3474 3475 expressions = self._parse_csv(self._parse_function_parameter) 3476 self._match_r_paren() 3477 return self.expression( 3478 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3479 ) 3480 3481 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3482 literal = self._parse_primary() 3483 if literal: 3484 return self.expression(exp.Introducer, this=token.text, expression=literal) 3485 3486 return self.expression(exp.Identifier, this=token.text) 3487 3488 def _parse_session_parameter(self) -> exp.SessionParameter: 3489 kind = None 3490 this = self._parse_id_var() or self._parse_primary() 3491 3492 if this and self._match(TokenType.DOT): 3493 kind = this.name 3494 this = self._parse_var() or self._parse_primary() 3495 3496 return self.expression(exp.SessionParameter, this=this, kind=kind) 3497 3498 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3499 index = self._index 3500 3501 if self._match(TokenType.L_PAREN): 3502 expressions = t.cast( 3503 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3504 ) 3505 3506 if not self._match(TokenType.R_PAREN): 3507 self._retreat(index) 3508 else: 3509 expressions = [self._parse_id_var()] 3510 3511 if self._match_set(self.LAMBDAS): 3512 return self.LAMBDAS[self._prev.token_type](self, expressions) 3513 3514 self._retreat(index) 3515 3516 this: t.Optional[exp.Expression] 3517 3518 if self._match(TokenType.DISTINCT): 3519 this = self.expression( 3520 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3521 ) 3522 else: 3523 this = self._parse_select_or_expression(alias=alias) 3524 3525 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3526 3527 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3528 index = self._index 3529 3530 if not self.errors: 3531 try: 3532 if self._parse_select(nested=True): 3533 return this 3534 except ParseError: 3535 pass 3536 finally: 3537 self.errors.clear() 3538 self._retreat(index) 3539 3540 if not self._match(TokenType.L_PAREN): 3541 return this 3542 3543 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3544 3545 self._match_r_paren() 3546 return self.expression(exp.Schema, this=this, expressions=args) 3547 3548 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3549 return self._parse_column_def(self._parse_field(any_token=True)) 3550 3551 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3552 # column defs are not really columns, they're identifiers 3553 if isinstance(this, exp.Column): 3554 this = this.this 3555 3556 kind = self._parse_types(schema=True) 3557 3558 if self._match_text_seq("FOR", "ORDINALITY"): 3559 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3560 3561 constraints: t.List[exp.Expression] = [] 3562 3563 if not kind and self._match(TokenType.ALIAS): 3564 constraints.append( 3565 self.expression( 3566 exp.ComputedColumnConstraint, 3567 this=self._parse_conjunction(), 3568 persisted=self._match_text_seq("PERSISTED"), 3569 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3570 ) 3571 ) 3572 3573 while True: 3574 constraint = self._parse_column_constraint() 3575 if not constraint: 3576 break 3577 constraints.append(constraint) 3578 3579 if not kind and not constraints: 3580 return this 3581 3582 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3583 3584 def _parse_auto_increment( 3585 self, 3586 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3587 start = None 3588 increment = None 3589 3590 if self._match(TokenType.L_PAREN, advance=False): 3591 args = self._parse_wrapped_csv(self._parse_bitwise) 3592 start = seq_get(args, 0) 3593 increment = seq_get(args, 1) 3594 elif self._match_text_seq("START"): 3595 start = self._parse_bitwise() 3596 self._match_text_seq("INCREMENT") 3597 increment = self._parse_bitwise() 3598 3599 if start and increment: 3600 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3601 3602 return exp.AutoIncrementColumnConstraint() 3603 3604 def _parse_compress(self) -> exp.CompressColumnConstraint: 3605 if self._match(TokenType.L_PAREN, advance=False): 3606 return self.expression( 3607 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3608 ) 3609 3610 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3611 3612 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3613 if self._match_text_seq("BY", "DEFAULT"): 3614 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3615 this = self.expression( 3616 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3617 ) 3618 else: 3619 self._match_text_seq("ALWAYS") 3620 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3621 3622 self._match(TokenType.ALIAS) 3623 identity = self._match_text_seq("IDENTITY") 3624 3625 if self._match(TokenType.L_PAREN): 3626 if self._match_text_seq("START", "WITH"): 3627 this.set("start", self._parse_bitwise()) 3628 if self._match_text_seq("INCREMENT", "BY"): 3629 this.set("increment", self._parse_bitwise()) 3630 if self._match_text_seq("MINVALUE"): 3631 this.set("minvalue", self._parse_bitwise()) 3632 if self._match_text_seq("MAXVALUE"): 3633 this.set("maxvalue", self._parse_bitwise()) 3634 3635 if self._match_text_seq("CYCLE"): 3636 this.set("cycle", True) 3637 elif self._match_text_seq("NO", "CYCLE"): 3638 this.set("cycle", False) 3639 3640 if not identity: 3641 this.set("expression", self._parse_bitwise()) 3642 3643 self._match_r_paren() 3644 3645 return this 3646 3647 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3648 self._match_text_seq("LENGTH") 3649 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3650 3651 def _parse_not_constraint( 3652 self, 3653 ) -> t.Optional[exp.Expression]: 3654 if self._match_text_seq("NULL"): 3655 return self.expression(exp.NotNullColumnConstraint) 3656 if self._match_text_seq("CASESPECIFIC"): 3657 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3658 if self._match_text_seq("FOR", "REPLICATION"): 3659 return self.expression(exp.NotForReplicationColumnConstraint) 3660 return None 3661 3662 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3663 if self._match(TokenType.CONSTRAINT): 3664 this = self._parse_id_var() 3665 else: 3666 this = None 3667 3668 if self._match_texts(self.CONSTRAINT_PARSERS): 3669 return self.expression( 3670 exp.ColumnConstraint, 3671 this=this, 3672 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3673 ) 3674 3675 return this 3676 3677 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3678 if not self._match(TokenType.CONSTRAINT): 3679 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3680 3681 this = self._parse_id_var() 3682 expressions = [] 3683 3684 while True: 3685 constraint = self._parse_unnamed_constraint() or self._parse_function() 3686 if not constraint: 3687 break 3688 expressions.append(constraint) 3689 3690 return self.expression(exp.Constraint, this=this, expressions=expressions) 3691 3692 def _parse_unnamed_constraint( 3693 self, constraints: t.Optional[t.Collection[str]] = None 3694 ) -> t.Optional[exp.Expression]: 3695 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3696 return None 3697 3698 constraint = self._prev.text.upper() 3699 if constraint not in self.CONSTRAINT_PARSERS: 3700 self.raise_error(f"No parser found for schema constraint {constraint}.") 3701 3702 return self.CONSTRAINT_PARSERS[constraint](self) 3703 3704 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3705 self._match_text_seq("KEY") 3706 return self.expression( 3707 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3708 ) 3709 3710 def _parse_key_constraint_options(self) -> t.List[str]: 3711 options = [] 3712 while True: 3713 if not self._curr: 3714 break 3715 3716 if self._match(TokenType.ON): 3717 action = None 3718 on = self._advance_any() and self._prev.text 3719 3720 if self._match_text_seq("NO", "ACTION"): 3721 action = "NO ACTION" 3722 elif self._match_text_seq("CASCADE"): 3723 action = "CASCADE" 3724 elif self._match_pair(TokenType.SET, TokenType.NULL): 3725 action = "SET NULL" 3726 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3727 action = "SET DEFAULT" 3728 else: 3729 self.raise_error("Invalid key constraint") 3730 3731 options.append(f"ON {on} {action}") 3732 elif self._match_text_seq("NOT", "ENFORCED"): 3733 options.append("NOT ENFORCED") 3734 elif self._match_text_seq("DEFERRABLE"): 3735 options.append("DEFERRABLE") 3736 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3737 options.append("INITIALLY DEFERRED") 3738 elif self._match_text_seq("NORELY"): 3739 options.append("NORELY") 3740 elif self._match_text_seq("MATCH", "FULL"): 3741 options.append("MATCH FULL") 3742 else: 3743 break 3744 3745 return options 3746 3747 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3748 if match and not self._match(TokenType.REFERENCES): 3749 return None 3750 3751 expressions = None 3752 this = self._parse_table(schema=True) 3753 options = self._parse_key_constraint_options() 3754 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3755 3756 def _parse_foreign_key(self) -> exp.ForeignKey: 3757 expressions = self._parse_wrapped_id_vars() 3758 reference = self._parse_references() 3759 options = {} 3760 3761 while self._match(TokenType.ON): 3762 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3763 self.raise_error("Expected DELETE or UPDATE") 3764 3765 kind = self._prev.text.lower() 3766 3767 if self._match_text_seq("NO", "ACTION"): 3768 action = "NO ACTION" 3769 elif self._match(TokenType.SET): 3770 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3771 action = "SET " + self._prev.text.upper() 3772 else: 3773 self._advance() 3774 action = self._prev.text.upper() 3775 3776 options[kind] = action 3777 3778 return self.expression( 3779 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3780 ) 3781 3782 def _parse_primary_key( 3783 self, wrapped_optional: bool = False, in_props: bool = False 3784 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3785 desc = ( 3786 self._match_set((TokenType.ASC, TokenType.DESC)) 3787 and self._prev.token_type == TokenType.DESC 3788 ) 3789 3790 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3791 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3792 3793 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3794 options = self._parse_key_constraint_options() 3795 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3796 3797 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3798 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3799 return this 3800 3801 bracket_kind = self._prev.token_type 3802 3803 if self._match(TokenType.COLON): 3804 expressions: t.List[exp.Expression] = [ 3805 self.expression(exp.Slice, expression=self._parse_conjunction()) 3806 ] 3807 else: 3808 expressions = self._parse_csv( 3809 lambda: self._parse_slice( 3810 self._parse_alias(self._parse_conjunction(), explicit=True) 3811 ) 3812 ) 3813 3814 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3815 if bracket_kind == TokenType.L_BRACE: 3816 this = self.expression(exp.Struct, expressions=expressions) 3817 elif not this or this.name.upper() == "ARRAY": 3818 this = self.expression(exp.Array, expressions=expressions) 3819 else: 3820 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3821 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3822 3823 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3824 self.raise_error("Expected ]") 3825 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3826 self.raise_error("Expected }") 3827 3828 self._add_comments(this) 3829 return self._parse_bracket(this) 3830 3831 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3832 if self._match(TokenType.COLON): 3833 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3834 return this 3835 3836 def _parse_case(self) -> t.Optional[exp.Expression]: 3837 ifs = [] 3838 default = None 3839 3840 comments = self._prev_comments 3841 expression = self._parse_conjunction() 3842 3843 while self._match(TokenType.WHEN): 3844 this = self._parse_conjunction() 3845 self._match(TokenType.THEN) 3846 then = self._parse_conjunction() 3847 ifs.append(self.expression(exp.If, this=this, true=then)) 3848 3849 if self._match(TokenType.ELSE): 3850 default = self._parse_conjunction() 3851 3852 if not self._match(TokenType.END): 3853 self.raise_error("Expected END after CASE", self._prev) 3854 3855 return self._parse_window( 3856 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3857 ) 3858 3859 def _parse_if(self) -> t.Optional[exp.Expression]: 3860 if self._match(TokenType.L_PAREN): 3861 args = self._parse_csv(self._parse_conjunction) 3862 this = self.validate_expression(exp.If.from_arg_list(args), args) 3863 self._match_r_paren() 3864 else: 3865 index = self._index - 1 3866 condition = self._parse_conjunction() 3867 3868 if not condition: 3869 self._retreat(index) 3870 return None 3871 3872 self._match(TokenType.THEN) 3873 true = self._parse_conjunction() 3874 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3875 self._match(TokenType.END) 3876 this = self.expression(exp.If, this=condition, true=true, false=false) 3877 3878 return self._parse_window(this) 3879 3880 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3881 if not self._match_text_seq("VALUE", "FOR"): 3882 self._retreat(self._index - 1) 3883 return None 3884 3885 return self.expression( 3886 exp.NextValueFor, 3887 this=self._parse_column(), 3888 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3889 ) 3890 3891 def _parse_extract(self) -> exp.Extract: 3892 this = self._parse_function() or self._parse_var() or self._parse_type() 3893 3894 if self._match(TokenType.FROM): 3895 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3896 3897 if not self._match(TokenType.COMMA): 3898 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3899 3900 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3901 3902 def _parse_any_value(self) -> exp.AnyValue: 3903 this = self._parse_lambda() 3904 is_max = None 3905 having = None 3906 3907 if self._match(TokenType.HAVING): 3908 self._match_texts(("MAX", "MIN")) 3909 is_max = self._prev.text == "MAX" 3910 having = self._parse_column() 3911 3912 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3913 3914 def _parse_cast(self, strict: bool) -> exp.Expression: 3915 this = self._parse_conjunction() 3916 3917 if not self._match(TokenType.ALIAS): 3918 if self._match(TokenType.COMMA): 3919 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3920 3921 self.raise_error("Expected AS after CAST") 3922 3923 fmt = None 3924 to = self._parse_types() 3925 3926 if not to: 3927 self.raise_error("Expected TYPE after CAST") 3928 elif isinstance(to, exp.Identifier): 3929 to = exp.DataType.build(to.name, udt=True) 3930 elif to.this == exp.DataType.Type.CHAR: 3931 if self._match(TokenType.CHARACTER_SET): 3932 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3933 elif self._match(TokenType.FORMAT): 3934 fmt_string = self._parse_string() 3935 fmt = self._parse_at_time_zone(fmt_string) 3936 3937 if to.this in exp.DataType.TEMPORAL_TYPES: 3938 this = self.expression( 3939 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3940 this=this, 3941 format=exp.Literal.string( 3942 format_time( 3943 fmt_string.this if fmt_string else "", 3944 self.FORMAT_MAPPING or self.TIME_MAPPING, 3945 self.FORMAT_TRIE or self.TIME_TRIE, 3946 ) 3947 ), 3948 ) 3949 3950 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3951 this.set("zone", fmt.args["zone"]) 3952 3953 return this 3954 3955 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3956 3957 def _parse_concat(self) -> t.Optional[exp.Expression]: 3958 args = self._parse_csv(self._parse_conjunction) 3959 if self.CONCAT_NULL_OUTPUTS_STRING: 3960 args = [ 3961 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3962 for arg in args 3963 if arg 3964 ] 3965 3966 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3967 # we find such a call we replace it with its argument. 3968 if len(args) == 1: 3969 return args[0] 3970 3971 return self.expression( 3972 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3973 ) 3974 3975 def _parse_string_agg(self) -> exp.Expression: 3976 if self._match(TokenType.DISTINCT): 3977 args: t.List[t.Optional[exp.Expression]] = [ 3978 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3979 ] 3980 if self._match(TokenType.COMMA): 3981 args.extend(self._parse_csv(self._parse_conjunction)) 3982 else: 3983 args = self._parse_csv(self._parse_conjunction) # type: ignore 3984 3985 index = self._index 3986 if not self._match(TokenType.R_PAREN) and args: 3987 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3988 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3989 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3990 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3991 3992 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3993 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3994 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3995 if not self._match_text_seq("WITHIN", "GROUP"): 3996 self._retreat(index) 3997 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3998 3999 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4000 order = self._parse_order(this=seq_get(args, 0)) 4001 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4002 4003 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4004 this = self._parse_bitwise() 4005 4006 if self._match(TokenType.USING): 4007 to: t.Optional[exp.Expression] = self.expression( 4008 exp.CharacterSet, this=self._parse_var() 4009 ) 4010 elif self._match(TokenType.COMMA): 4011 to = self._parse_types() 4012 else: 4013 to = None 4014 4015 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4016 4017 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4018 """ 4019 There are generally two variants of the DECODE function: 4020 4021 - DECODE(bin, charset) 4022 - DECODE(expression, search, result [, search, result] ... [, default]) 4023 4024 The second variant will always be parsed into a CASE expression. Note that NULL 4025 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4026 instead of relying on pattern matching. 4027 """ 4028 args = self._parse_csv(self._parse_conjunction) 4029 4030 if len(args) < 3: 4031 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4032 4033 expression, *expressions = args 4034 if not expression: 4035 return None 4036 4037 ifs = [] 4038 for search, result in zip(expressions[::2], expressions[1::2]): 4039 if not search or not result: 4040 return None 4041 4042 if isinstance(search, exp.Literal): 4043 ifs.append( 4044 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4045 ) 4046 elif isinstance(search, exp.Null): 4047 ifs.append( 4048 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4049 ) 4050 else: 4051 cond = exp.or_( 4052 exp.EQ(this=expression.copy(), expression=search), 4053 exp.and_( 4054 exp.Is(this=expression.copy(), expression=exp.Null()), 4055 exp.Is(this=search.copy(), expression=exp.Null()), 4056 copy=False, 4057 ), 4058 copy=False, 4059 ) 4060 ifs.append(exp.If(this=cond, true=result)) 4061 4062 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4063 4064 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4065 self._match_text_seq("KEY") 4066 key = self._parse_field() 4067 self._match(TokenType.COLON) 4068 self._match_text_seq("VALUE") 4069 value = self._parse_field() 4070 4071 if not key and not value: 4072 return None 4073 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4074 4075 def _parse_json_object(self) -> exp.JSONObject: 4076 star = self._parse_star() 4077 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4078 4079 null_handling = None 4080 if self._match_text_seq("NULL", "ON", "NULL"): 4081 null_handling = "NULL ON NULL" 4082 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4083 null_handling = "ABSENT ON NULL" 4084 4085 unique_keys = None 4086 if self._match_text_seq("WITH", "UNIQUE"): 4087 unique_keys = True 4088 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4089 unique_keys = False 4090 4091 self._match_text_seq("KEYS") 4092 4093 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4094 format_json = self._match_text_seq("FORMAT", "JSON") 4095 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4096 4097 return self.expression( 4098 exp.JSONObject, 4099 expressions=expressions, 4100 null_handling=null_handling, 4101 unique_keys=unique_keys, 4102 return_type=return_type, 4103 format_json=format_json, 4104 encoding=encoding, 4105 ) 4106 4107 def _parse_logarithm(self) -> exp.Func: 4108 # Default argument order is base, expression 4109 args = self._parse_csv(self._parse_range) 4110 4111 if len(args) > 1: 4112 if not self.LOG_BASE_FIRST: 4113 args.reverse() 4114 return exp.Log.from_arg_list(args) 4115 4116 return self.expression( 4117 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4118 ) 4119 4120 def _parse_match_against(self) -> exp.MatchAgainst: 4121 expressions = self._parse_csv(self._parse_column) 4122 4123 self._match_text_seq(")", "AGAINST", "(") 4124 4125 this = self._parse_string() 4126 4127 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4128 modifier = "IN NATURAL LANGUAGE MODE" 4129 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4130 modifier = f"{modifier} WITH QUERY EXPANSION" 4131 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4132 modifier = "IN BOOLEAN MODE" 4133 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4134 modifier = "WITH QUERY EXPANSION" 4135 else: 4136 modifier = None 4137 4138 return self.expression( 4139 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4140 ) 4141 4142 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4143 def _parse_open_json(self) -> exp.OpenJSON: 4144 this = self._parse_bitwise() 4145 path = self._match(TokenType.COMMA) and self._parse_string() 4146 4147 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4148 this = self._parse_field(any_token=True) 4149 kind = self._parse_types() 4150 path = self._parse_string() 4151 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4152 4153 return self.expression( 4154 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4155 ) 4156 4157 expressions = None 4158 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4159 self._match_l_paren() 4160 expressions = self._parse_csv(_parse_open_json_column_def) 4161 4162 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4163 4164 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4165 args = self._parse_csv(self._parse_bitwise) 4166 4167 if self._match(TokenType.IN): 4168 return self.expression( 4169 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4170 ) 4171 4172 if haystack_first: 4173 haystack = seq_get(args, 0) 4174 needle = seq_get(args, 1) 4175 else: 4176 needle = seq_get(args, 0) 4177 haystack = seq_get(args, 1) 4178 4179 return self.expression( 4180 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4181 ) 4182 4183 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4184 args = self._parse_csv(self._parse_table) 4185 return exp.JoinHint(this=func_name.upper(), expressions=args) 4186 4187 def _parse_substring(self) -> exp.Substring: 4188 # Postgres supports the form: substring(string [from int] [for int]) 4189 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4190 4191 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4192 4193 if self._match(TokenType.FROM): 4194 args.append(self._parse_bitwise()) 4195 if self._match(TokenType.FOR): 4196 args.append(self._parse_bitwise()) 4197 4198 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4199 4200 def _parse_trim(self) -> exp.Trim: 4201 # https://www.w3resource.com/sql/character-functions/trim.php 4202 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4203 4204 position = None 4205 collation = None 4206 4207 if self._match_texts(self.TRIM_TYPES): 4208 position = self._prev.text.upper() 4209 4210 expression = self._parse_bitwise() 4211 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4212 this = self._parse_bitwise() 4213 else: 4214 this = expression 4215 expression = None 4216 4217 if self._match(TokenType.COLLATE): 4218 collation = self._parse_bitwise() 4219 4220 return self.expression( 4221 exp.Trim, this=this, position=position, expression=expression, collation=collation 4222 ) 4223 4224 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4225 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4226 4227 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4228 return self._parse_window(self._parse_id_var(), alias=True) 4229 4230 def _parse_respect_or_ignore_nulls( 4231 self, this: t.Optional[exp.Expression] 4232 ) -> t.Optional[exp.Expression]: 4233 if self._match_text_seq("IGNORE", "NULLS"): 4234 return self.expression(exp.IgnoreNulls, this=this) 4235 if self._match_text_seq("RESPECT", "NULLS"): 4236 return self.expression(exp.RespectNulls, this=this) 4237 return this 4238 4239 def _parse_window( 4240 self, this: t.Optional[exp.Expression], alias: bool = False 4241 ) -> t.Optional[exp.Expression]: 4242 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4243 self._match(TokenType.WHERE) 4244 this = self.expression( 4245 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4246 ) 4247 self._match_r_paren() 4248 4249 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4250 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4251 if self._match_text_seq("WITHIN", "GROUP"): 4252 order = self._parse_wrapped(self._parse_order) 4253 this = self.expression(exp.WithinGroup, this=this, expression=order) 4254 4255 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4256 # Some dialects choose to implement and some do not. 4257 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4258 4259 # There is some code above in _parse_lambda that handles 4260 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4261 4262 # The below changes handle 4263 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4264 4265 # Oracle allows both formats 4266 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4267 # and Snowflake chose to do the same for familiarity 4268 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4269 this = self._parse_respect_or_ignore_nulls(this) 4270 4271 # bigquery select from window x AS (partition by ...) 4272 if alias: 4273 over = None 4274 self._match(TokenType.ALIAS) 4275 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4276 return this 4277 else: 4278 over = self._prev.text.upper() 4279 4280 if not self._match(TokenType.L_PAREN): 4281 return self.expression( 4282 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4283 ) 4284 4285 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4286 4287 first = self._match(TokenType.FIRST) 4288 if self._match_text_seq("LAST"): 4289 first = False 4290 4291 partition, order = self._parse_partition_and_order() 4292 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4293 4294 if kind: 4295 self._match(TokenType.BETWEEN) 4296 start = self._parse_window_spec() 4297 self._match(TokenType.AND) 4298 end = self._parse_window_spec() 4299 4300 spec = self.expression( 4301 exp.WindowSpec, 4302 kind=kind, 4303 start=start["value"], 4304 start_side=start["side"], 4305 end=end["value"], 4306 end_side=end["side"], 4307 ) 4308 else: 4309 spec = None 4310 4311 self._match_r_paren() 4312 4313 window = self.expression( 4314 exp.Window, 4315 this=this, 4316 partition_by=partition, 4317 order=order, 4318 spec=spec, 4319 alias=window_alias, 4320 over=over, 4321 first=first, 4322 ) 4323 4324 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4325 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4326 return self._parse_window(window, alias=alias) 4327 4328 return window 4329 4330 def _parse_partition_and_order( 4331 self, 4332 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4333 return self._parse_partition_by(), self._parse_order() 4334 4335 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4336 self._match(TokenType.BETWEEN) 4337 4338 return { 4339 "value": ( 4340 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4341 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4342 or self._parse_bitwise() 4343 ), 4344 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4345 } 4346 4347 def _parse_alias( 4348 self, this: t.Optional[exp.Expression], explicit: bool = False 4349 ) -> t.Optional[exp.Expression]: 4350 any_token = self._match(TokenType.ALIAS) 4351 4352 if explicit and not any_token: 4353 return this 4354 4355 if self._match(TokenType.L_PAREN): 4356 aliases = self.expression( 4357 exp.Aliases, 4358 this=this, 4359 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4360 ) 4361 self._match_r_paren(aliases) 4362 return aliases 4363 4364 alias = self._parse_id_var(any_token) 4365 4366 if alias: 4367 return self.expression(exp.Alias, this=this, alias=alias) 4368 4369 return this 4370 4371 def _parse_id_var( 4372 self, 4373 any_token: bool = True, 4374 tokens: t.Optional[t.Collection[TokenType]] = None, 4375 ) -> t.Optional[exp.Expression]: 4376 identifier = self._parse_identifier() 4377 4378 if identifier: 4379 return identifier 4380 4381 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4382 quoted = self._prev.token_type == TokenType.STRING 4383 return exp.Identifier(this=self._prev.text, quoted=quoted) 4384 4385 return None 4386 4387 def _parse_string(self) -> t.Optional[exp.Expression]: 4388 if self._match(TokenType.STRING): 4389 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4390 return self._parse_placeholder() 4391 4392 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4393 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4394 4395 def _parse_number(self) -> t.Optional[exp.Expression]: 4396 if self._match(TokenType.NUMBER): 4397 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4398 return self._parse_placeholder() 4399 4400 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4401 if self._match(TokenType.IDENTIFIER): 4402 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4403 return self._parse_placeholder() 4404 4405 def _parse_var( 4406 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4407 ) -> t.Optional[exp.Expression]: 4408 if ( 4409 (any_token and self._advance_any()) 4410 or self._match(TokenType.VAR) 4411 or (self._match_set(tokens) if tokens else False) 4412 ): 4413 return self.expression(exp.Var, this=self._prev.text) 4414 return self._parse_placeholder() 4415 4416 def _advance_any(self) -> t.Optional[Token]: 4417 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4418 self._advance() 4419 return self._prev 4420 return None 4421 4422 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4423 return self._parse_var() or self._parse_string() 4424 4425 def _parse_null(self) -> t.Optional[exp.Expression]: 4426 if self._match(TokenType.NULL): 4427 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4428 return self._parse_placeholder() 4429 4430 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4431 if self._match(TokenType.TRUE): 4432 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4433 if self._match(TokenType.FALSE): 4434 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4435 return self._parse_placeholder() 4436 4437 def _parse_star(self) -> t.Optional[exp.Expression]: 4438 if self._match(TokenType.STAR): 4439 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4440 return self._parse_placeholder() 4441 4442 def _parse_parameter(self) -> exp.Parameter: 4443 wrapped = self._match(TokenType.L_BRACE) 4444 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4445 self._match(TokenType.R_BRACE) 4446 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4447 4448 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4449 if self._match_set(self.PLACEHOLDER_PARSERS): 4450 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4451 if placeholder: 4452 return placeholder 4453 self._advance(-1) 4454 return None 4455 4456 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4457 if not self._match(TokenType.EXCEPT): 4458 return None 4459 if self._match(TokenType.L_PAREN, advance=False): 4460 return self._parse_wrapped_csv(self._parse_column) 4461 return self._parse_csv(self._parse_column) 4462 4463 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4464 if not self._match(TokenType.REPLACE): 4465 return None 4466 if self._match(TokenType.L_PAREN, advance=False): 4467 return self._parse_wrapped_csv(self._parse_expression) 4468 return self._parse_expressions() 4469 4470 def _parse_csv( 4471 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4472 ) -> t.List[exp.Expression]: 4473 parse_result = parse_method() 4474 items = [parse_result] if parse_result is not None else [] 4475 4476 while self._match(sep): 4477 self._add_comments(parse_result) 4478 parse_result = parse_method() 4479 if parse_result is not None: 4480 items.append(parse_result) 4481 4482 return items 4483 4484 def _parse_tokens( 4485 self, parse_method: t.Callable, expressions: t.Dict 4486 ) -> t.Optional[exp.Expression]: 4487 this = parse_method() 4488 4489 while self._match_set(expressions): 4490 this = self.expression( 4491 expressions[self._prev.token_type], 4492 this=this, 4493 comments=self._prev_comments, 4494 expression=parse_method(), 4495 ) 4496 4497 return this 4498 4499 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4500 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4501 4502 def _parse_wrapped_csv( 4503 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4504 ) -> t.List[exp.Expression]: 4505 return self._parse_wrapped( 4506 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4507 ) 4508 4509 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4510 wrapped = self._match(TokenType.L_PAREN) 4511 if not wrapped and not optional: 4512 self.raise_error("Expecting (") 4513 parse_result = parse_method() 4514 if wrapped: 4515 self._match_r_paren() 4516 return parse_result 4517 4518 def _parse_expressions(self) -> t.List[exp.Expression]: 4519 return self._parse_csv(self._parse_expression) 4520 4521 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4522 return self._parse_select() or self._parse_set_operations( 4523 self._parse_expression() if alias else self._parse_conjunction() 4524 ) 4525 4526 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4527 return self._parse_query_modifiers( 4528 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4529 ) 4530 4531 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4532 this = None 4533 if self._match_texts(self.TRANSACTION_KIND): 4534 this = self._prev.text 4535 4536 self._match_texts({"TRANSACTION", "WORK"}) 4537 4538 modes = [] 4539 while True: 4540 mode = [] 4541 while self._match(TokenType.VAR): 4542 mode.append(self._prev.text) 4543 4544 if mode: 4545 modes.append(" ".join(mode)) 4546 if not self._match(TokenType.COMMA): 4547 break 4548 4549 return self.expression(exp.Transaction, this=this, modes=modes) 4550 4551 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4552 chain = None 4553 savepoint = None 4554 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4555 4556 self._match_texts({"TRANSACTION", "WORK"}) 4557 4558 if self._match_text_seq("TO"): 4559 self._match_text_seq("SAVEPOINT") 4560 savepoint = self._parse_id_var() 4561 4562 if self._match(TokenType.AND): 4563 chain = not self._match_text_seq("NO") 4564 self._match_text_seq("CHAIN") 4565 4566 if is_rollback: 4567 return self.expression(exp.Rollback, savepoint=savepoint) 4568 4569 return self.expression(exp.Commit, chain=chain) 4570 4571 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4572 if not self._match_text_seq("ADD"): 4573 return None 4574 4575 self._match(TokenType.COLUMN) 4576 exists_column = self._parse_exists(not_=True) 4577 expression = self._parse_field_def() 4578 4579 if expression: 4580 expression.set("exists", exists_column) 4581 4582 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4583 if self._match_texts(("FIRST", "AFTER")): 4584 position = self._prev.text 4585 column_position = self.expression( 4586 exp.ColumnPosition, this=self._parse_column(), position=position 4587 ) 4588 expression.set("position", column_position) 4589 4590 return expression 4591 4592 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4593 drop = self._match(TokenType.DROP) and self._parse_drop() 4594 if drop and not isinstance(drop, exp.Command): 4595 drop.set("kind", drop.args.get("kind", "COLUMN")) 4596 return drop 4597 4598 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4599 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4600 return self.expression( 4601 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4602 ) 4603 4604 def _parse_add_constraint(self) -> exp.AddConstraint: 4605 this = None 4606 kind = self._prev.token_type 4607 4608 if kind == TokenType.CONSTRAINT: 4609 this = self._parse_id_var() 4610 4611 if self._match_text_seq("CHECK"): 4612 expression = self._parse_wrapped(self._parse_conjunction) 4613 enforced = self._match_text_seq("ENFORCED") 4614 4615 return self.expression( 4616 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4617 ) 4618 4619 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4620 expression = self._parse_foreign_key() 4621 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4622 expression = self._parse_primary_key() 4623 else: 4624 expression = None 4625 4626 return self.expression(exp.AddConstraint, this=this, expression=expression) 4627 4628 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4629 index = self._index - 1 4630 4631 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4632 return self._parse_csv(self._parse_add_constraint) 4633 4634 self._retreat(index) 4635 return self._parse_csv(self._parse_add_column) 4636 4637 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4638 self._match(TokenType.COLUMN) 4639 column = self._parse_field(any_token=True) 4640 4641 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4642 return self.expression(exp.AlterColumn, this=column, drop=True) 4643 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4644 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4645 4646 self._match_text_seq("SET", "DATA") 4647 return self.expression( 4648 exp.AlterColumn, 4649 this=column, 4650 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4651 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4652 using=self._match(TokenType.USING) and self._parse_conjunction(), 4653 ) 4654 4655 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4656 index = self._index - 1 4657 4658 partition_exists = self._parse_exists() 4659 if self._match(TokenType.PARTITION, advance=False): 4660 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4661 4662 self._retreat(index) 4663 return self._parse_csv(self._parse_drop_column) 4664 4665 def _parse_alter_table_rename(self) -> exp.RenameTable: 4666 self._match_text_seq("TO") 4667 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4668 4669 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4670 start = self._prev 4671 4672 if not self._match(TokenType.TABLE): 4673 return self._parse_as_command(start) 4674 4675 exists = self._parse_exists() 4676 this = self._parse_table(schema=True) 4677 4678 if self._next: 4679 self._advance() 4680 4681 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4682 if parser: 4683 actions = ensure_list(parser(self)) 4684 4685 if not self._curr: 4686 return self.expression( 4687 exp.AlterTable, 4688 this=this, 4689 exists=exists, 4690 actions=actions, 4691 ) 4692 return self._parse_as_command(start) 4693 4694 def _parse_merge(self) -> exp.Merge: 4695 self._match(TokenType.INTO) 4696 target = self._parse_table() 4697 4698 if target and self._match(TokenType.ALIAS, advance=False): 4699 target.set("alias", self._parse_table_alias()) 4700 4701 self._match(TokenType.USING) 4702 using = self._parse_table() 4703 4704 self._match(TokenType.ON) 4705 on = self._parse_conjunction() 4706 4707 whens = [] 4708 while self._match(TokenType.WHEN): 4709 matched = not self._match(TokenType.NOT) 4710 self._match_text_seq("MATCHED") 4711 source = ( 4712 False 4713 if self._match_text_seq("BY", "TARGET") 4714 else self._match_text_seq("BY", "SOURCE") 4715 ) 4716 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4717 4718 self._match(TokenType.THEN) 4719 4720 if self._match(TokenType.INSERT): 4721 _this = self._parse_star() 4722 if _this: 4723 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4724 else: 4725 then = self.expression( 4726 exp.Insert, 4727 this=self._parse_value(), 4728 expression=self._match(TokenType.VALUES) and self._parse_value(), 4729 ) 4730 elif self._match(TokenType.UPDATE): 4731 expressions = self._parse_star() 4732 if expressions: 4733 then = self.expression(exp.Update, expressions=expressions) 4734 else: 4735 then = self.expression( 4736 exp.Update, 4737 expressions=self._match(TokenType.SET) 4738 and self._parse_csv(self._parse_equality), 4739 ) 4740 elif self._match(TokenType.DELETE): 4741 then = self.expression(exp.Var, this=self._prev.text) 4742 else: 4743 then = None 4744 4745 whens.append( 4746 self.expression( 4747 exp.When, 4748 matched=matched, 4749 source=source, 4750 condition=condition, 4751 then=then, 4752 ) 4753 ) 4754 4755 return self.expression( 4756 exp.Merge, 4757 this=target, 4758 using=using, 4759 on=on, 4760 expressions=whens, 4761 ) 4762 4763 def _parse_show(self) -> t.Optional[exp.Expression]: 4764 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4765 if parser: 4766 return parser(self) 4767 self._advance() 4768 return self.expression(exp.Show, this=self._prev.text.upper()) 4769 4770 def _parse_set_item_assignment( 4771 self, kind: t.Optional[str] = None 4772 ) -> t.Optional[exp.Expression]: 4773 index = self._index 4774 4775 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4776 return self._parse_set_transaction(global_=kind == "GLOBAL") 4777 4778 left = self._parse_primary() or self._parse_id_var() 4779 4780 if not self._match_texts(("=", "TO")): 4781 self._retreat(index) 4782 return None 4783 4784 right = self._parse_statement() or self._parse_id_var() 4785 this = self.expression(exp.EQ, this=left, expression=right) 4786 4787 return self.expression(exp.SetItem, this=this, kind=kind) 4788 4789 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4790 self._match_text_seq("TRANSACTION") 4791 characteristics = self._parse_csv( 4792 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4793 ) 4794 return self.expression( 4795 exp.SetItem, 4796 expressions=characteristics, 4797 kind="TRANSACTION", 4798 **{"global": global_}, # type: ignore 4799 ) 4800 4801 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4802 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4803 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4804 4805 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4806 index = self._index 4807 set_ = self.expression( 4808 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4809 ) 4810 4811 if self._curr: 4812 self._retreat(index) 4813 return self._parse_as_command(self._prev) 4814 4815 return set_ 4816 4817 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4818 for option in options: 4819 if self._match_text_seq(*option.split(" ")): 4820 return exp.var(option) 4821 return None 4822 4823 def _parse_as_command(self, start: Token) -> exp.Command: 4824 while self._curr: 4825 self._advance() 4826 text = self._find_sql(start, self._prev) 4827 size = len(start.text) 4828 return exp.Command(this=text[:size], expression=text[size:]) 4829 4830 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4831 settings = [] 4832 4833 self._match_l_paren() 4834 kind = self._parse_id_var() 4835 4836 if self._match(TokenType.L_PAREN): 4837 while True: 4838 key = self._parse_id_var() 4839 value = self._parse_primary() 4840 4841 if not key and value is None: 4842 break 4843 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4844 self._match(TokenType.R_PAREN) 4845 4846 self._match_r_paren() 4847 4848 return self.expression( 4849 exp.DictProperty, 4850 this=this, 4851 kind=kind.this if kind else None, 4852 settings=settings, 4853 ) 4854 4855 def _parse_dict_range(self, this: str) -> exp.DictRange: 4856 self._match_l_paren() 4857 has_min = self._match_text_seq("MIN") 4858 if has_min: 4859 min = self._parse_var() or self._parse_primary() 4860 self._match_text_seq("MAX") 4861 max = self._parse_var() or self._parse_primary() 4862 else: 4863 max = self._parse_var() or self._parse_primary() 4864 min = exp.Literal.number(0) 4865 self._match_r_paren() 4866 return self.expression(exp.DictRange, this=this, min=min, max=max) 4867 4868 def _find_parser( 4869 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4870 ) -> t.Optional[t.Callable]: 4871 if not self._curr: 4872 return None 4873 4874 index = self._index 4875 this = [] 4876 while True: 4877 # The current token might be multiple words 4878 curr = self._curr.text.upper() 4879 key = curr.split(" ") 4880 this.append(curr) 4881 4882 self._advance() 4883 result, trie = in_trie(trie, key) 4884 if result == TrieResult.FAILED: 4885 break 4886 4887 if result == TrieResult.EXISTS: 4888 subparser = parsers[" ".join(this)] 4889 return subparser 4890 4891 self._retreat(index) 4892 return None 4893 4894 def _match(self, token_type, advance=True, expression=None): 4895 if not self._curr: 4896 return None 4897 4898 if self._curr.token_type == token_type: 4899 if advance: 4900 self._advance() 4901 self._add_comments(expression) 4902 return True 4903 4904 return None 4905 4906 def _match_set(self, types, advance=True): 4907 if not self._curr: 4908 return None 4909 4910 if self._curr.token_type in types: 4911 if advance: 4912 self._advance() 4913 return True 4914 4915 return None 4916 4917 def _match_pair(self, token_type_a, token_type_b, advance=True): 4918 if not self._curr or not self._next: 4919 return None 4920 4921 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4922 if advance: 4923 self._advance(2) 4924 return True 4925 4926 return None 4927 4928 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4929 if not self._match(TokenType.L_PAREN, expression=expression): 4930 self.raise_error("Expecting (") 4931 4932 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4933 if not self._match(TokenType.R_PAREN, expression=expression): 4934 self.raise_error("Expecting )") 4935 4936 def _match_texts(self, texts, advance=True): 4937 if self._curr and self._curr.text.upper() in texts: 4938 if advance: 4939 self._advance() 4940 return True 4941 return False 4942 4943 def _match_text_seq(self, *texts, advance=True): 4944 index = self._index 4945 for text in texts: 4946 if self._curr and self._curr.text.upper() == text: 4947 self._advance() 4948 else: 4949 self._retreat(index) 4950 return False 4951 4952 if not advance: 4953 self._retreat(index) 4954 4955 return True 4956 4957 @t.overload 4958 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4959 ... 4960 4961 @t.overload 4962 def _replace_columns_with_dots( 4963 self, this: t.Optional[exp.Expression] 4964 ) -> t.Optional[exp.Expression]: 4965 ... 4966 4967 def _replace_columns_with_dots(self, this): 4968 if isinstance(this, exp.Dot): 4969 exp.replace_children(this, self._replace_columns_with_dots) 4970 elif isinstance(this, exp.Column): 4971 exp.replace_children(this, self._replace_columns_with_dots) 4972 table = this.args.get("table") 4973 this = ( 4974 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4975 ) 4976 4977 return this 4978 4979 def _replace_lambda( 4980 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4981 ) -> t.Optional[exp.Expression]: 4982 if not node: 4983 return node 4984 4985 for column in node.find_all(exp.Column): 4986 if column.parts[0].name in lambda_variables: 4987 dot_or_id = column.to_dot() if column.table else column.this 4988 parent = column.parent 4989 4990 while isinstance(parent, exp.Dot): 4991 if not isinstance(parent.parent, exp.Dot): 4992 parent.replace(dot_or_id) 4993 break 4994 parent = parent.parent 4995 else: 4996 if column is node: 4997 node = dot_or_id 4998 else: 4999 column.replace(dot_or_id) 5000 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TIME, 159 TokenType.TIMETZ, 160 TokenType.TIMESTAMP, 161 TokenType.TIMESTAMPTZ, 162 TokenType.TIMESTAMPLTZ, 163 TokenType.DATETIME, 164 TokenType.DATETIME64, 165 TokenType.DATE, 166 TokenType.INT4RANGE, 167 TokenType.INT4MULTIRANGE, 168 TokenType.INT8RANGE, 169 TokenType.INT8MULTIRANGE, 170 TokenType.NUMRANGE, 171 TokenType.NUMMULTIRANGE, 172 TokenType.TSRANGE, 173 TokenType.TSMULTIRANGE, 174 TokenType.TSTZRANGE, 175 TokenType.TSTZMULTIRANGE, 176 TokenType.DATERANGE, 177 TokenType.DATEMULTIRANGE, 178 TokenType.DECIMAL, 179 TokenType.BIGDECIMAL, 180 TokenType.UUID, 181 TokenType.GEOGRAPHY, 182 TokenType.GEOMETRY, 183 TokenType.HLLSKETCH, 184 TokenType.HSTORE, 185 TokenType.PSEUDO_TYPE, 186 TokenType.SUPER, 187 TokenType.SERIAL, 188 TokenType.SMALLSERIAL, 189 TokenType.BIGSERIAL, 190 TokenType.XML, 191 TokenType.YEAR, 192 TokenType.UNIQUEIDENTIFIER, 193 TokenType.USERDEFINED, 194 TokenType.MONEY, 195 TokenType.SMALLMONEY, 196 TokenType.ROWVERSION, 197 TokenType.IMAGE, 198 TokenType.VARIANT, 199 TokenType.OBJECT, 200 TokenType.INET, 201 TokenType.IPADDRESS, 202 TokenType.IPPREFIX, 203 TokenType.UNKNOWN, 204 TokenType.NULL, 205 *ENUM_TYPE_TOKENS, 206 *NESTED_TYPE_TOKENS, 207 } 208 209 SUBQUERY_PREDICATES = { 210 TokenType.ANY: exp.Any, 211 TokenType.ALL: exp.All, 212 TokenType.EXISTS: exp.Exists, 213 TokenType.SOME: exp.Any, 214 } 215 216 RESERVED_KEYWORDS = { 217 *Tokenizer.SINGLE_TOKENS.values(), 218 TokenType.SELECT, 219 } 220 221 DB_CREATABLES = { 222 TokenType.DATABASE, 223 TokenType.SCHEMA, 224 TokenType.TABLE, 225 TokenType.VIEW, 226 TokenType.DICTIONARY, 227 } 228 229 CREATABLES = { 230 TokenType.COLUMN, 231 TokenType.FUNCTION, 232 TokenType.INDEX, 233 TokenType.PROCEDURE, 234 *DB_CREATABLES, 235 } 236 237 # Tokens that can represent identifiers 238 ID_VAR_TOKENS = { 239 TokenType.VAR, 240 TokenType.ANTI, 241 TokenType.APPLY, 242 TokenType.ASC, 243 TokenType.AUTO_INCREMENT, 244 TokenType.BEGIN, 245 TokenType.CACHE, 246 TokenType.CASE, 247 TokenType.COLLATE, 248 TokenType.COMMAND, 249 TokenType.COMMENT, 250 TokenType.COMMIT, 251 TokenType.CONSTRAINT, 252 TokenType.DEFAULT, 253 TokenType.DELETE, 254 TokenType.DESC, 255 TokenType.DESCRIBE, 256 TokenType.DICTIONARY, 257 TokenType.DIV, 258 TokenType.END, 259 TokenType.EXECUTE, 260 TokenType.ESCAPE, 261 TokenType.FALSE, 262 TokenType.FIRST, 263 TokenType.FILTER, 264 TokenType.FORMAT, 265 TokenType.FULL, 266 TokenType.IS, 267 TokenType.ISNULL, 268 TokenType.INTERVAL, 269 TokenType.KEEP, 270 TokenType.LEFT, 271 TokenType.LOAD, 272 TokenType.MERGE, 273 TokenType.NATURAL, 274 TokenType.NEXT, 275 TokenType.OFFSET, 276 TokenType.ORDINALITY, 277 TokenType.OVERWRITE, 278 TokenType.PARTITION, 279 TokenType.PERCENT, 280 TokenType.PIVOT, 281 TokenType.PRAGMA, 282 TokenType.RANGE, 283 TokenType.REFERENCES, 284 TokenType.RIGHT, 285 TokenType.ROW, 286 TokenType.ROWS, 287 TokenType.SEMI, 288 TokenType.SET, 289 TokenType.SETTINGS, 290 TokenType.SHOW, 291 TokenType.TEMPORARY, 292 TokenType.TOP, 293 TokenType.TRUE, 294 TokenType.UNIQUE, 295 TokenType.UNPIVOT, 296 TokenType.UPDATE, 297 TokenType.VOLATILE, 298 TokenType.WINDOW, 299 *CREATABLES, 300 *SUBQUERY_PREDICATES, 301 *TYPE_TOKENS, 302 *NO_PAREN_FUNCTIONS, 303 } 304 305 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 306 307 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 308 TokenType.APPLY, 309 TokenType.ASOF, 310 TokenType.FULL, 311 TokenType.LEFT, 312 TokenType.LOCK, 313 TokenType.NATURAL, 314 TokenType.OFFSET, 315 TokenType.RIGHT, 316 TokenType.WINDOW, 317 } 318 319 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 320 321 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 322 323 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 324 325 FUNC_TOKENS = { 326 TokenType.COMMAND, 327 TokenType.CURRENT_DATE, 328 TokenType.CURRENT_DATETIME, 329 TokenType.CURRENT_TIMESTAMP, 330 TokenType.CURRENT_TIME, 331 TokenType.CURRENT_USER, 332 TokenType.FILTER, 333 TokenType.FIRST, 334 TokenType.FORMAT, 335 TokenType.GLOB, 336 TokenType.IDENTIFIER, 337 TokenType.INDEX, 338 TokenType.ISNULL, 339 TokenType.ILIKE, 340 TokenType.INSERT, 341 TokenType.LIKE, 342 TokenType.MERGE, 343 TokenType.OFFSET, 344 TokenType.PRIMARY_KEY, 345 TokenType.RANGE, 346 TokenType.REPLACE, 347 TokenType.RLIKE, 348 TokenType.ROW, 349 TokenType.UNNEST, 350 TokenType.VAR, 351 TokenType.LEFT, 352 TokenType.RIGHT, 353 TokenType.DATE, 354 TokenType.DATETIME, 355 TokenType.TABLE, 356 TokenType.TIMESTAMP, 357 TokenType.TIMESTAMPTZ, 358 TokenType.WINDOW, 359 TokenType.XOR, 360 *TYPE_TOKENS, 361 *SUBQUERY_PREDICATES, 362 } 363 364 CONJUNCTION = { 365 TokenType.AND: exp.And, 366 TokenType.OR: exp.Or, 367 } 368 369 EQUALITY = { 370 TokenType.EQ: exp.EQ, 371 TokenType.NEQ: exp.NEQ, 372 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 373 } 374 375 COMPARISON = { 376 TokenType.GT: exp.GT, 377 TokenType.GTE: exp.GTE, 378 TokenType.LT: exp.LT, 379 TokenType.LTE: exp.LTE, 380 } 381 382 BITWISE = { 383 TokenType.AMP: exp.BitwiseAnd, 384 TokenType.CARET: exp.BitwiseXor, 385 TokenType.PIPE: exp.BitwiseOr, 386 TokenType.DPIPE: exp.DPipe, 387 } 388 389 TERM = { 390 TokenType.DASH: exp.Sub, 391 TokenType.PLUS: exp.Add, 392 TokenType.MOD: exp.Mod, 393 TokenType.COLLATE: exp.Collate, 394 } 395 396 FACTOR = { 397 TokenType.DIV: exp.IntDiv, 398 TokenType.LR_ARROW: exp.Distance, 399 TokenType.SLASH: exp.Div, 400 TokenType.STAR: exp.Mul, 401 } 402 403 TIMES = { 404 TokenType.TIME, 405 TokenType.TIMETZ, 406 } 407 408 TIMESTAMPS = { 409 TokenType.TIMESTAMP, 410 TokenType.TIMESTAMPTZ, 411 TokenType.TIMESTAMPLTZ, 412 *TIMES, 413 } 414 415 SET_OPERATIONS = { 416 TokenType.UNION, 417 TokenType.INTERSECT, 418 TokenType.EXCEPT, 419 } 420 421 JOIN_METHODS = { 422 TokenType.NATURAL, 423 TokenType.ASOF, 424 } 425 426 JOIN_SIDES = { 427 TokenType.LEFT, 428 TokenType.RIGHT, 429 TokenType.FULL, 430 } 431 432 JOIN_KINDS = { 433 TokenType.INNER, 434 TokenType.OUTER, 435 TokenType.CROSS, 436 TokenType.SEMI, 437 TokenType.ANTI, 438 } 439 440 JOIN_HINTS: t.Set[str] = set() 441 442 LAMBDAS = { 443 TokenType.ARROW: lambda self, expressions: self.expression( 444 exp.Lambda, 445 this=self._replace_lambda( 446 self._parse_conjunction(), 447 {node.name for node in expressions}, 448 ), 449 expressions=expressions, 450 ), 451 TokenType.FARROW: lambda self, expressions: self.expression( 452 exp.Kwarg, 453 this=exp.var(expressions[0].name), 454 expression=self._parse_conjunction(), 455 ), 456 } 457 458 COLUMN_OPERATORS = { 459 TokenType.DOT: None, 460 TokenType.DCOLON: lambda self, this, to: self.expression( 461 exp.Cast if self.STRICT_CAST else exp.TryCast, 462 this=this, 463 to=to, 464 ), 465 TokenType.ARROW: lambda self, this, path: self.expression( 466 exp.JSONExtract, 467 this=this, 468 expression=path, 469 ), 470 TokenType.DARROW: lambda self, this, path: self.expression( 471 exp.JSONExtractScalar, 472 this=this, 473 expression=path, 474 ), 475 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 476 exp.JSONBExtract, 477 this=this, 478 expression=path, 479 ), 480 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 481 exp.JSONBExtractScalar, 482 this=this, 483 expression=path, 484 ), 485 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 486 exp.JSONBContains, 487 this=this, 488 expression=key, 489 ), 490 } 491 492 EXPRESSION_PARSERS = { 493 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 494 exp.Column: lambda self: self._parse_column(), 495 exp.Condition: lambda self: self._parse_conjunction(), 496 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 497 exp.Expression: lambda self: self._parse_statement(), 498 exp.From: lambda self: self._parse_from(), 499 exp.Group: lambda self: self._parse_group(), 500 exp.Having: lambda self: self._parse_having(), 501 exp.Identifier: lambda self: self._parse_id_var(), 502 exp.Join: lambda self: self._parse_join(), 503 exp.Lambda: lambda self: self._parse_lambda(), 504 exp.Lateral: lambda self: self._parse_lateral(), 505 exp.Limit: lambda self: self._parse_limit(), 506 exp.Offset: lambda self: self._parse_offset(), 507 exp.Order: lambda self: self._parse_order(), 508 exp.Ordered: lambda self: self._parse_ordered(), 509 exp.Properties: lambda self: self._parse_properties(), 510 exp.Qualify: lambda self: self._parse_qualify(), 511 exp.Returning: lambda self: self._parse_returning(), 512 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 513 exp.Table: lambda self: self._parse_table_parts(), 514 exp.TableAlias: lambda self: self._parse_table_alias(), 515 exp.Where: lambda self: self._parse_where(), 516 exp.Window: lambda self: self._parse_named_window(), 517 exp.With: lambda self: self._parse_with(), 518 "JOIN_TYPE": lambda self: self._parse_join_parts(), 519 } 520 521 STATEMENT_PARSERS = { 522 TokenType.ALTER: lambda self: self._parse_alter(), 523 TokenType.BEGIN: lambda self: self._parse_transaction(), 524 TokenType.CACHE: lambda self: self._parse_cache(), 525 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 526 TokenType.COMMENT: lambda self: self._parse_comment(), 527 TokenType.CREATE: lambda self: self._parse_create(), 528 TokenType.DELETE: lambda self: self._parse_delete(), 529 TokenType.DESC: lambda self: self._parse_describe(), 530 TokenType.DESCRIBE: lambda self: self._parse_describe(), 531 TokenType.DROP: lambda self: self._parse_drop(), 532 TokenType.FROM: lambda self: exp.select("*").from_( 533 t.cast(exp.From, self._parse_from(skip_from_token=True)) 534 ), 535 TokenType.INSERT: lambda self: self._parse_insert(), 536 TokenType.LOAD: lambda self: self._parse_load(), 537 TokenType.MERGE: lambda self: self._parse_merge(), 538 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 539 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 540 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 541 TokenType.SET: lambda self: self._parse_set(), 542 TokenType.UNCACHE: lambda self: self._parse_uncache(), 543 TokenType.UPDATE: lambda self: self._parse_update(), 544 TokenType.USE: lambda self: self.expression( 545 exp.Use, 546 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 547 and exp.var(self._prev.text), 548 this=self._parse_table(schema=False), 549 ), 550 } 551 552 UNARY_PARSERS = { 553 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 554 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 555 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 556 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 557 } 558 559 PRIMARY_PARSERS = { 560 TokenType.STRING: lambda self, token: self.expression( 561 exp.Literal, this=token.text, is_string=True 562 ), 563 TokenType.NUMBER: lambda self, token: self.expression( 564 exp.Literal, this=token.text, is_string=False 565 ), 566 TokenType.STAR: lambda self, _: self.expression( 567 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 568 ), 569 TokenType.NULL: lambda self, _: self.expression(exp.Null), 570 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 571 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 572 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 573 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 574 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 575 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 576 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 577 exp.National, this=token.text 578 ), 579 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 580 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 581 } 582 583 PLACEHOLDER_PARSERS = { 584 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 585 TokenType.PARAMETER: lambda self: self._parse_parameter(), 586 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 587 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 588 else None, 589 } 590 591 RANGE_PARSERS = { 592 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 593 TokenType.GLOB: binary_range_parser(exp.Glob), 594 TokenType.ILIKE: binary_range_parser(exp.ILike), 595 TokenType.IN: lambda self, this: self._parse_in(this), 596 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 597 TokenType.IS: lambda self, this: self._parse_is(this), 598 TokenType.LIKE: binary_range_parser(exp.Like), 599 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 600 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 601 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 602 } 603 604 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 605 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 606 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 607 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 608 "CHARACTER SET": lambda self: self._parse_character_set(), 609 "CHECKSUM": lambda self: self._parse_checksum(), 610 "CLUSTER BY": lambda self: self._parse_cluster(), 611 "CLUSTERED": lambda self: self._parse_clustered_by(), 612 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 613 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 614 "COPY": lambda self: self._parse_copy_property(), 615 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 616 "DEFINER": lambda self: self._parse_definer(), 617 "DETERMINISTIC": lambda self: self.expression( 618 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 619 ), 620 "DISTKEY": lambda self: self._parse_distkey(), 621 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 622 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 623 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 624 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 625 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 626 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 627 "FREESPACE": lambda self: self._parse_freespace(), 628 "HEAP": lambda self: self.expression(exp.HeapProperty), 629 "IMMUTABLE": lambda self: self.expression( 630 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 631 ), 632 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 633 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 634 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 635 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 636 "LIKE": lambda self: self._parse_create_like(), 637 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 638 "LOCK": lambda self: self._parse_locking(), 639 "LOCKING": lambda self: self._parse_locking(), 640 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 641 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 642 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 643 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 644 "NO": lambda self: self._parse_no_property(), 645 "ON": lambda self: self._parse_on_property(), 646 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 647 "PARTITION BY": lambda self: self._parse_partitioned_by(), 648 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 649 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 650 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 651 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 652 "RETURNS": lambda self: self._parse_returns(), 653 "ROW": lambda self: self._parse_row(), 654 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 655 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 656 "SETTINGS": lambda self: self.expression( 657 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 658 ), 659 "SORTKEY": lambda self: self._parse_sortkey(), 660 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 661 "STABLE": lambda self: self.expression( 662 exp.StabilityProperty, this=exp.Literal.string("STABLE") 663 ), 664 "STORED": lambda self: self._parse_stored(), 665 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 666 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 667 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 668 "TO": lambda self: self._parse_to_table(), 669 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 670 "TTL": lambda self: self._parse_ttl(), 671 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 672 "VOLATILE": lambda self: self._parse_volatile_property(), 673 "WITH": lambda self: self._parse_with_property(), 674 } 675 676 CONSTRAINT_PARSERS = { 677 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 678 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 679 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 680 "CHARACTER SET": lambda self: self.expression( 681 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 682 ), 683 "CHECK": lambda self: self.expression( 684 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 685 ), 686 "COLLATE": lambda self: self.expression( 687 exp.CollateColumnConstraint, this=self._parse_var() 688 ), 689 "COMMENT": lambda self: self.expression( 690 exp.CommentColumnConstraint, this=self._parse_string() 691 ), 692 "COMPRESS": lambda self: self._parse_compress(), 693 "CLUSTERED": lambda self: self.expression( 694 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 695 ), 696 "DEFAULT": lambda self: self.expression( 697 exp.DefaultColumnConstraint, this=self._parse_bitwise() 698 ), 699 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 700 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 701 "FORMAT": lambda self: self.expression( 702 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 703 ), 704 "GENERATED": lambda self: self._parse_generated_as_identity(), 705 "IDENTITY": lambda self: self._parse_auto_increment(), 706 "INLINE": lambda self: self._parse_inline(), 707 "LIKE": lambda self: self._parse_create_like(), 708 "NOT": lambda self: self._parse_not_constraint(), 709 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 710 "ON": lambda self: ( 711 self._match(TokenType.UPDATE) 712 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 713 ) 714 or self.expression(exp.OnProperty, this=self._parse_id_var()), 715 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 716 "PRIMARY KEY": lambda self: self._parse_primary_key(), 717 "REFERENCES": lambda self: self._parse_references(match=False), 718 "TITLE": lambda self: self.expression( 719 exp.TitleColumnConstraint, this=self._parse_var_or_string() 720 ), 721 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 722 "UNIQUE": lambda self: self._parse_unique(), 723 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 724 "WITH": lambda self: self.expression( 725 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 726 ), 727 } 728 729 ALTER_PARSERS = { 730 "ADD": lambda self: self._parse_alter_table_add(), 731 "ALTER": lambda self: self._parse_alter_table_alter(), 732 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 733 "DROP": lambda self: self._parse_alter_table_drop(), 734 "RENAME": lambda self: self._parse_alter_table_rename(), 735 } 736 737 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 738 739 NO_PAREN_FUNCTION_PARSERS = { 740 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 741 "CASE": lambda self: self._parse_case(), 742 "IF": lambda self: self._parse_if(), 743 "NEXT": lambda self: self._parse_next_value_for(), 744 } 745 746 INVALID_FUNC_NAME_TOKENS = { 747 TokenType.IDENTIFIER, 748 TokenType.STRING, 749 } 750 751 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 752 753 FUNCTION_PARSERS = { 754 "ANY_VALUE": lambda self: self._parse_any_value(), 755 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 756 "CONCAT": lambda self: self._parse_concat(), 757 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 758 "DECODE": lambda self: self._parse_decode(), 759 "EXTRACT": lambda self: self._parse_extract(), 760 "JSON_OBJECT": lambda self: self._parse_json_object(), 761 "LOG": lambda self: self._parse_logarithm(), 762 "MATCH": lambda self: self._parse_match_against(), 763 "OPENJSON": lambda self: self._parse_open_json(), 764 "POSITION": lambda self: self._parse_position(), 765 "SAFE_CAST": lambda self: self._parse_cast(False), 766 "STRING_AGG": lambda self: self._parse_string_agg(), 767 "SUBSTRING": lambda self: self._parse_substring(), 768 "TRIM": lambda self: self._parse_trim(), 769 "TRY_CAST": lambda self: self._parse_cast(False), 770 "TRY_CONVERT": lambda self: self._parse_convert(False), 771 } 772 773 QUERY_MODIFIER_PARSERS = { 774 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 775 TokenType.WHERE: lambda self: ("where", self._parse_where()), 776 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 777 TokenType.HAVING: lambda self: ("having", self._parse_having()), 778 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 779 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 780 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 781 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 782 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 783 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 784 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 785 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 786 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 787 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 788 TokenType.CLUSTER_BY: lambda self: ( 789 "cluster", 790 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 791 ), 792 TokenType.DISTRIBUTE_BY: lambda self: ( 793 "distribute", 794 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 795 ), 796 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 797 } 798 799 SET_PARSERS = { 800 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 801 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 802 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 803 "TRANSACTION": lambda self: self._parse_set_transaction(), 804 } 805 806 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 807 808 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 809 810 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 811 812 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 813 814 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 815 816 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 817 TRANSACTION_CHARACTERISTICS = { 818 "ISOLATION LEVEL REPEATABLE READ", 819 "ISOLATION LEVEL READ COMMITTED", 820 "ISOLATION LEVEL READ UNCOMMITTED", 821 "ISOLATION LEVEL SERIALIZABLE", 822 "READ WRITE", 823 "READ ONLY", 824 } 825 826 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 827 828 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 829 830 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 831 832 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 833 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 834 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 835 836 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 837 838 DISTINCT_TOKENS = {TokenType.DISTINCT} 839 840 STRICT_CAST = True 841 842 # A NULL arg in CONCAT yields NULL by default 843 CONCAT_NULL_OUTPUTS_STRING = False 844 845 PREFIXED_PIVOT_COLUMNS = False 846 IDENTIFY_PIVOT_STRINGS = False 847 848 LOG_BASE_FIRST = True 849 LOG_DEFAULTS_TO_LN = False 850 851 SUPPORTS_USER_DEFINED_TYPES = True 852 853 __slots__ = ( 854 "error_level", 855 "error_message_context", 856 "max_errors", 857 "sql", 858 "errors", 859 "_tokens", 860 "_index", 861 "_curr", 862 "_next", 863 "_prev", 864 "_prev_comments", 865 "_tokenizer", 866 ) 867 868 # Autofilled 869 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 870 INDEX_OFFSET: int = 0 871 UNNEST_COLUMN_ONLY: bool = False 872 ALIAS_POST_TABLESAMPLE: bool = False 873 STRICT_STRING_CONCAT = False 874 NORMALIZE_FUNCTIONS = "upper" 875 NULL_ORDERING: str = "nulls_are_small" 876 SHOW_TRIE: t.Dict = {} 877 SET_TRIE: t.Dict = {} 878 FORMAT_MAPPING: t.Dict[str, str] = {} 879 FORMAT_TRIE: t.Dict = {} 880 TIME_MAPPING: t.Dict[str, str] = {} 881 TIME_TRIE: t.Dict = {} 882 883 def __init__( 884 self, 885 error_level: t.Optional[ErrorLevel] = None, 886 error_message_context: int = 100, 887 max_errors: int = 3, 888 ): 889 self.error_level = error_level or ErrorLevel.IMMEDIATE 890 self.error_message_context = error_message_context 891 self.max_errors = max_errors 892 self._tokenizer = self.TOKENIZER_CLASS() 893 self.reset() 894 895 def reset(self): 896 self.sql = "" 897 self.errors = [] 898 self._tokens = [] 899 self._index = 0 900 self._curr = None 901 self._next = None 902 self._prev = None 903 self._prev_comments = None 904 905 def parse( 906 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 907 ) -> t.List[t.Optional[exp.Expression]]: 908 """ 909 Parses a list of tokens and returns a list of syntax trees, one tree 910 per parsed SQL statement. 911 912 Args: 913 raw_tokens: The list of tokens. 914 sql: The original SQL string, used to produce helpful debug messages. 915 916 Returns: 917 The list of the produced syntax trees. 918 """ 919 return self._parse( 920 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 921 ) 922 923 def parse_into( 924 self, 925 expression_types: exp.IntoType, 926 raw_tokens: t.List[Token], 927 sql: t.Optional[str] = None, 928 ) -> t.List[t.Optional[exp.Expression]]: 929 """ 930 Parses a list of tokens into a given Expression type. If a collection of Expression 931 types is given instead, this method will try to parse the token list into each one 932 of them, stopping at the first for which the parsing succeeds. 933 934 Args: 935 expression_types: The expression type(s) to try and parse the token list into. 936 raw_tokens: The list of tokens. 937 sql: The original SQL string, used to produce helpful debug messages. 938 939 Returns: 940 The target Expression. 941 """ 942 errors = [] 943 for expression_type in ensure_list(expression_types): 944 parser = self.EXPRESSION_PARSERS.get(expression_type) 945 if not parser: 946 raise TypeError(f"No parser registered for {expression_type}") 947 948 try: 949 return self._parse(parser, raw_tokens, sql) 950 except ParseError as e: 951 e.errors[0]["into_expression"] = expression_type 952 errors.append(e) 953 954 raise ParseError( 955 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 956 errors=merge_errors(errors), 957 ) from errors[-1] 958 959 def _parse( 960 self, 961 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 962 raw_tokens: t.List[Token], 963 sql: t.Optional[str] = None, 964 ) -> t.List[t.Optional[exp.Expression]]: 965 self.reset() 966 self.sql = sql or "" 967 968 total = len(raw_tokens) 969 chunks: t.List[t.List[Token]] = [[]] 970 971 for i, token in enumerate(raw_tokens): 972 if token.token_type == TokenType.SEMICOLON: 973 if i < total - 1: 974 chunks.append([]) 975 else: 976 chunks[-1].append(token) 977 978 expressions = [] 979 980 for tokens in chunks: 981 self._index = -1 982 self._tokens = tokens 983 self._advance() 984 985 expressions.append(parse_method(self)) 986 987 if self._index < len(self._tokens): 988 self.raise_error("Invalid expression / Unexpected token") 989 990 self.check_errors() 991 992 return expressions 993 994 def check_errors(self) -> None: 995 """Logs or raises any found errors, depending on the chosen error level setting.""" 996 if self.error_level == ErrorLevel.WARN: 997 for error in self.errors: 998 logger.error(str(error)) 999 elif self.error_level == ErrorLevel.RAISE and self.errors: 1000 raise ParseError( 1001 concat_messages(self.errors, self.max_errors), 1002 errors=merge_errors(self.errors), 1003 ) 1004 1005 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1006 """ 1007 Appends an error in the list of recorded errors or raises it, depending on the chosen 1008 error level setting. 1009 """ 1010 token = token or self._curr or self._prev or Token.string("") 1011 start = token.start 1012 end = token.end + 1 1013 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1014 highlight = self.sql[start:end] 1015 end_context = self.sql[end : end + self.error_message_context] 1016 1017 error = ParseError.new( 1018 f"{message}. Line {token.line}, Col: {token.col}.\n" 1019 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1020 description=message, 1021 line=token.line, 1022 col=token.col, 1023 start_context=start_context, 1024 highlight=highlight, 1025 end_context=end_context, 1026 ) 1027 1028 if self.error_level == ErrorLevel.IMMEDIATE: 1029 raise error 1030 1031 self.errors.append(error) 1032 1033 def expression( 1034 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1035 ) -> E: 1036 """ 1037 Creates a new, validated Expression. 1038 1039 Args: 1040 exp_class: The expression class to instantiate. 1041 comments: An optional list of comments to attach to the expression. 1042 kwargs: The arguments to set for the expression along with their respective values. 1043 1044 Returns: 1045 The target expression. 1046 """ 1047 instance = exp_class(**kwargs) 1048 instance.add_comments(comments) if comments else self._add_comments(instance) 1049 return self.validate_expression(instance) 1050 1051 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1052 if expression and self._prev_comments: 1053 expression.add_comments(self._prev_comments) 1054 self._prev_comments = None 1055 1056 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1057 """ 1058 Validates an Expression, making sure that all its mandatory arguments are set. 1059 1060 Args: 1061 expression: The expression to validate. 1062 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1063 1064 Returns: 1065 The validated expression. 1066 """ 1067 if self.error_level != ErrorLevel.IGNORE: 1068 for error_message in expression.error_messages(args): 1069 self.raise_error(error_message) 1070 1071 return expression 1072 1073 def _find_sql(self, start: Token, end: Token) -> str: 1074 return self.sql[start.start : end.end + 1] 1075 1076 def _advance(self, times: int = 1) -> None: 1077 self._index += times 1078 self._curr = seq_get(self._tokens, self._index) 1079 self._next = seq_get(self._tokens, self._index + 1) 1080 1081 if self._index > 0: 1082 self._prev = self._tokens[self._index - 1] 1083 self._prev_comments = self._prev.comments 1084 else: 1085 self._prev = None 1086 self._prev_comments = None 1087 1088 def _retreat(self, index: int) -> None: 1089 if index != self._index: 1090 self._advance(index - self._index) 1091 1092 def _parse_command(self) -> exp.Command: 1093 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1094 1095 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1096 start = self._prev 1097 exists = self._parse_exists() if allow_exists else None 1098 1099 self._match(TokenType.ON) 1100 1101 kind = self._match_set(self.CREATABLES) and self._prev 1102 if not kind: 1103 return self._parse_as_command(start) 1104 1105 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1106 this = self._parse_user_defined_function(kind=kind.token_type) 1107 elif kind.token_type == TokenType.TABLE: 1108 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1109 elif kind.token_type == TokenType.COLUMN: 1110 this = self._parse_column() 1111 else: 1112 this = self._parse_id_var() 1113 1114 self._match(TokenType.IS) 1115 1116 return self.expression( 1117 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1118 ) 1119 1120 def _parse_to_table( 1121 self, 1122 ) -> exp.ToTableProperty: 1123 table = self._parse_table_parts(schema=True) 1124 return self.expression(exp.ToTableProperty, this=table) 1125 1126 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1127 def _parse_ttl(self) -> exp.Expression: 1128 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1129 this = self._parse_bitwise() 1130 1131 if self._match_text_seq("DELETE"): 1132 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1133 if self._match_text_seq("RECOMPRESS"): 1134 return self.expression( 1135 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1136 ) 1137 if self._match_text_seq("TO", "DISK"): 1138 return self.expression( 1139 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1140 ) 1141 if self._match_text_seq("TO", "VOLUME"): 1142 return self.expression( 1143 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1144 ) 1145 1146 return this 1147 1148 expressions = self._parse_csv(_parse_ttl_action) 1149 where = self._parse_where() 1150 group = self._parse_group() 1151 1152 aggregates = None 1153 if group and self._match(TokenType.SET): 1154 aggregates = self._parse_csv(self._parse_set_item) 1155 1156 return self.expression( 1157 exp.MergeTreeTTL, 1158 expressions=expressions, 1159 where=where, 1160 group=group, 1161 aggregates=aggregates, 1162 ) 1163 1164 def _parse_statement(self) -> t.Optional[exp.Expression]: 1165 if self._curr is None: 1166 return None 1167 1168 if self._match_set(self.STATEMENT_PARSERS): 1169 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1170 1171 if self._match_set(Tokenizer.COMMANDS): 1172 return self._parse_command() 1173 1174 expression = self._parse_expression() 1175 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1176 return self._parse_query_modifiers(expression) 1177 1178 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1179 start = self._prev 1180 temporary = self._match(TokenType.TEMPORARY) 1181 materialized = self._match_text_seq("MATERIALIZED") 1182 1183 kind = self._match_set(self.CREATABLES) and self._prev.text 1184 if not kind: 1185 return self._parse_as_command(start) 1186 1187 return self.expression( 1188 exp.Drop, 1189 comments=start.comments, 1190 exists=exists or self._parse_exists(), 1191 this=self._parse_table(schema=True), 1192 kind=kind, 1193 temporary=temporary, 1194 materialized=materialized, 1195 cascade=self._match_text_seq("CASCADE"), 1196 constraints=self._match_text_seq("CONSTRAINTS"), 1197 purge=self._match_text_seq("PURGE"), 1198 ) 1199 1200 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1201 return ( 1202 self._match_text_seq("IF") 1203 and (not not_ or self._match(TokenType.NOT)) 1204 and self._match(TokenType.EXISTS) 1205 ) 1206 1207 def _parse_create(self) -> exp.Create | exp.Command: 1208 # Note: this can't be None because we've matched a statement parser 1209 start = self._prev 1210 comments = self._prev_comments 1211 1212 replace = start.text.upper() == "REPLACE" or self._match_pair( 1213 TokenType.OR, TokenType.REPLACE 1214 ) 1215 unique = self._match(TokenType.UNIQUE) 1216 1217 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1218 self._advance() 1219 1220 properties = None 1221 create_token = self._match_set(self.CREATABLES) and self._prev 1222 1223 if not create_token: 1224 # exp.Properties.Location.POST_CREATE 1225 properties = self._parse_properties() 1226 create_token = self._match_set(self.CREATABLES) and self._prev 1227 1228 if not properties or not create_token: 1229 return self._parse_as_command(start) 1230 1231 exists = self._parse_exists(not_=True) 1232 this = None 1233 expression: t.Optional[exp.Expression] = None 1234 indexes = None 1235 no_schema_binding = None 1236 begin = None 1237 clone = None 1238 1239 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1240 nonlocal properties 1241 if properties and temp_props: 1242 properties.expressions.extend(temp_props.expressions) 1243 elif temp_props: 1244 properties = temp_props 1245 1246 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1247 this = self._parse_user_defined_function(kind=create_token.token_type) 1248 1249 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1250 extend_props(self._parse_properties()) 1251 1252 self._match(TokenType.ALIAS) 1253 1254 if self._match(TokenType.COMMAND): 1255 expression = self._parse_as_command(self._prev) 1256 else: 1257 begin = self._match(TokenType.BEGIN) 1258 return_ = self._match_text_seq("RETURN") 1259 expression = self._parse_statement() 1260 1261 if return_: 1262 expression = self.expression(exp.Return, this=expression) 1263 elif create_token.token_type == TokenType.INDEX: 1264 this = self._parse_index(index=self._parse_id_var()) 1265 elif create_token.token_type in self.DB_CREATABLES: 1266 table_parts = self._parse_table_parts(schema=True) 1267 1268 # exp.Properties.Location.POST_NAME 1269 self._match(TokenType.COMMA) 1270 extend_props(self._parse_properties(before=True)) 1271 1272 this = self._parse_schema(this=table_parts) 1273 1274 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1275 extend_props(self._parse_properties()) 1276 1277 self._match(TokenType.ALIAS) 1278 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1279 # exp.Properties.Location.POST_ALIAS 1280 extend_props(self._parse_properties()) 1281 1282 expression = self._parse_ddl_select() 1283 1284 if create_token.token_type == TokenType.TABLE: 1285 # exp.Properties.Location.POST_EXPRESSION 1286 extend_props(self._parse_properties()) 1287 1288 indexes = [] 1289 while True: 1290 index = self._parse_index() 1291 1292 # exp.Properties.Location.POST_INDEX 1293 extend_props(self._parse_properties()) 1294 1295 if not index: 1296 break 1297 else: 1298 self._match(TokenType.COMMA) 1299 indexes.append(index) 1300 elif create_token.token_type == TokenType.VIEW: 1301 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1302 no_schema_binding = True 1303 1304 if self._match_text_seq("CLONE"): 1305 clone = self._parse_table(schema=True) 1306 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1307 clone_kind = ( 1308 self._match(TokenType.L_PAREN) 1309 and self._match_texts(self.CLONE_KINDS) 1310 and self._prev.text.upper() 1311 ) 1312 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1313 self._match(TokenType.R_PAREN) 1314 clone = self.expression( 1315 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1316 ) 1317 1318 return self.expression( 1319 exp.Create, 1320 comments=comments, 1321 this=this, 1322 kind=create_token.text, 1323 replace=replace, 1324 unique=unique, 1325 expression=expression, 1326 exists=exists, 1327 properties=properties, 1328 indexes=indexes, 1329 no_schema_binding=no_schema_binding, 1330 begin=begin, 1331 clone=clone, 1332 ) 1333 1334 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1335 # only used for teradata currently 1336 self._match(TokenType.COMMA) 1337 1338 kwargs = { 1339 "no": self._match_text_seq("NO"), 1340 "dual": self._match_text_seq("DUAL"), 1341 "before": self._match_text_seq("BEFORE"), 1342 "default": self._match_text_seq("DEFAULT"), 1343 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1344 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1345 "after": self._match_text_seq("AFTER"), 1346 "minimum": self._match_texts(("MIN", "MINIMUM")), 1347 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1348 } 1349 1350 if self._match_texts(self.PROPERTY_PARSERS): 1351 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1352 try: 1353 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1354 except TypeError: 1355 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1356 1357 return None 1358 1359 def _parse_property(self) -> t.Optional[exp.Expression]: 1360 if self._match_texts(self.PROPERTY_PARSERS): 1361 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1362 1363 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1364 return self._parse_character_set(default=True) 1365 1366 if self._match_text_seq("COMPOUND", "SORTKEY"): 1367 return self._parse_sortkey(compound=True) 1368 1369 if self._match_text_seq("SQL", "SECURITY"): 1370 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1371 1372 assignment = self._match_pair( 1373 TokenType.VAR, TokenType.EQ, advance=False 1374 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1375 1376 if assignment: 1377 key = self._parse_var_or_string() 1378 self._match(TokenType.EQ) 1379 return self.expression( 1380 exp.Property, 1381 this=key, 1382 value=self._parse_column() or self._parse_var(any_token=True), 1383 ) 1384 1385 return None 1386 1387 def _parse_stored(self) -> exp.FileFormatProperty: 1388 self._match(TokenType.ALIAS) 1389 1390 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1391 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1392 1393 return self.expression( 1394 exp.FileFormatProperty, 1395 this=self.expression( 1396 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1397 ) 1398 if input_format or output_format 1399 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1400 ) 1401 1402 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1403 self._match(TokenType.EQ) 1404 self._match(TokenType.ALIAS) 1405 return self.expression(exp_class, this=self._parse_field()) 1406 1407 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1408 properties = [] 1409 while True: 1410 if before: 1411 prop = self._parse_property_before() 1412 else: 1413 prop = self._parse_property() 1414 1415 if not prop: 1416 break 1417 for p in ensure_list(prop): 1418 properties.append(p) 1419 1420 if properties: 1421 return self.expression(exp.Properties, expressions=properties) 1422 1423 return None 1424 1425 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1426 return self.expression( 1427 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1428 ) 1429 1430 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1431 if self._index >= 2: 1432 pre_volatile_token = self._tokens[self._index - 2] 1433 else: 1434 pre_volatile_token = None 1435 1436 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1437 return exp.VolatileProperty() 1438 1439 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1440 1441 def _parse_with_property( 1442 self, 1443 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1444 if self._match(TokenType.L_PAREN, advance=False): 1445 return self._parse_wrapped_csv(self._parse_property) 1446 1447 if self._match_text_seq("JOURNAL"): 1448 return self._parse_withjournaltable() 1449 1450 if self._match_text_seq("DATA"): 1451 return self._parse_withdata(no=False) 1452 elif self._match_text_seq("NO", "DATA"): 1453 return self._parse_withdata(no=True) 1454 1455 if not self._next: 1456 return None 1457 1458 return self._parse_withisolatedloading() 1459 1460 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1461 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1462 self._match(TokenType.EQ) 1463 1464 user = self._parse_id_var() 1465 self._match(TokenType.PARAMETER) 1466 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1467 1468 if not user or not host: 1469 return None 1470 1471 return exp.DefinerProperty(this=f"{user}@{host}") 1472 1473 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1474 self._match(TokenType.TABLE) 1475 self._match(TokenType.EQ) 1476 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1477 1478 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1479 return self.expression(exp.LogProperty, no=no) 1480 1481 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1482 return self.expression(exp.JournalProperty, **kwargs) 1483 1484 def _parse_checksum(self) -> exp.ChecksumProperty: 1485 self._match(TokenType.EQ) 1486 1487 on = None 1488 if self._match(TokenType.ON): 1489 on = True 1490 elif self._match_text_seq("OFF"): 1491 on = False 1492 1493 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1494 1495 def _parse_cluster(self) -> exp.Cluster: 1496 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1497 1498 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1499 self._match_text_seq("BY") 1500 1501 self._match_l_paren() 1502 expressions = self._parse_csv(self._parse_column) 1503 self._match_r_paren() 1504 1505 if self._match_text_seq("SORTED", "BY"): 1506 self._match_l_paren() 1507 sorted_by = self._parse_csv(self._parse_ordered) 1508 self._match_r_paren() 1509 else: 1510 sorted_by = None 1511 1512 self._match(TokenType.INTO) 1513 buckets = self._parse_number() 1514 self._match_text_seq("BUCKETS") 1515 1516 return self.expression( 1517 exp.ClusteredByProperty, 1518 expressions=expressions, 1519 sorted_by=sorted_by, 1520 buckets=buckets, 1521 ) 1522 1523 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1524 if not self._match_text_seq("GRANTS"): 1525 self._retreat(self._index - 1) 1526 return None 1527 1528 return self.expression(exp.CopyGrantsProperty) 1529 1530 def _parse_freespace(self) -> exp.FreespaceProperty: 1531 self._match(TokenType.EQ) 1532 return self.expression( 1533 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1534 ) 1535 1536 def _parse_mergeblockratio( 1537 self, no: bool = False, default: bool = False 1538 ) -> exp.MergeBlockRatioProperty: 1539 if self._match(TokenType.EQ): 1540 return self.expression( 1541 exp.MergeBlockRatioProperty, 1542 this=self._parse_number(), 1543 percent=self._match(TokenType.PERCENT), 1544 ) 1545 1546 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1547 1548 def _parse_datablocksize( 1549 self, 1550 default: t.Optional[bool] = None, 1551 minimum: t.Optional[bool] = None, 1552 maximum: t.Optional[bool] = None, 1553 ) -> exp.DataBlocksizeProperty: 1554 self._match(TokenType.EQ) 1555 size = self._parse_number() 1556 1557 units = None 1558 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1559 units = self._prev.text 1560 1561 return self.expression( 1562 exp.DataBlocksizeProperty, 1563 size=size, 1564 units=units, 1565 default=default, 1566 minimum=minimum, 1567 maximum=maximum, 1568 ) 1569 1570 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1571 self._match(TokenType.EQ) 1572 always = self._match_text_seq("ALWAYS") 1573 manual = self._match_text_seq("MANUAL") 1574 never = self._match_text_seq("NEVER") 1575 default = self._match_text_seq("DEFAULT") 1576 1577 autotemp = None 1578 if self._match_text_seq("AUTOTEMP"): 1579 autotemp = self._parse_schema() 1580 1581 return self.expression( 1582 exp.BlockCompressionProperty, 1583 always=always, 1584 manual=manual, 1585 never=never, 1586 default=default, 1587 autotemp=autotemp, 1588 ) 1589 1590 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1591 no = self._match_text_seq("NO") 1592 concurrent = self._match_text_seq("CONCURRENT") 1593 self._match_text_seq("ISOLATED", "LOADING") 1594 for_all = self._match_text_seq("FOR", "ALL") 1595 for_insert = self._match_text_seq("FOR", "INSERT") 1596 for_none = self._match_text_seq("FOR", "NONE") 1597 return self.expression( 1598 exp.IsolatedLoadingProperty, 1599 no=no, 1600 concurrent=concurrent, 1601 for_all=for_all, 1602 for_insert=for_insert, 1603 for_none=for_none, 1604 ) 1605 1606 def _parse_locking(self) -> exp.LockingProperty: 1607 if self._match(TokenType.TABLE): 1608 kind = "TABLE" 1609 elif self._match(TokenType.VIEW): 1610 kind = "VIEW" 1611 elif self._match(TokenType.ROW): 1612 kind = "ROW" 1613 elif self._match_text_seq("DATABASE"): 1614 kind = "DATABASE" 1615 else: 1616 kind = None 1617 1618 if kind in ("DATABASE", "TABLE", "VIEW"): 1619 this = self._parse_table_parts() 1620 else: 1621 this = None 1622 1623 if self._match(TokenType.FOR): 1624 for_or_in = "FOR" 1625 elif self._match(TokenType.IN): 1626 for_or_in = "IN" 1627 else: 1628 for_or_in = None 1629 1630 if self._match_text_seq("ACCESS"): 1631 lock_type = "ACCESS" 1632 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1633 lock_type = "EXCLUSIVE" 1634 elif self._match_text_seq("SHARE"): 1635 lock_type = "SHARE" 1636 elif self._match_text_seq("READ"): 1637 lock_type = "READ" 1638 elif self._match_text_seq("WRITE"): 1639 lock_type = "WRITE" 1640 elif self._match_text_seq("CHECKSUM"): 1641 lock_type = "CHECKSUM" 1642 else: 1643 lock_type = None 1644 1645 override = self._match_text_seq("OVERRIDE") 1646 1647 return self.expression( 1648 exp.LockingProperty, 1649 this=this, 1650 kind=kind, 1651 for_or_in=for_or_in, 1652 lock_type=lock_type, 1653 override=override, 1654 ) 1655 1656 def _parse_partition_by(self) -> t.List[exp.Expression]: 1657 if self._match(TokenType.PARTITION_BY): 1658 return self._parse_csv(self._parse_conjunction) 1659 return [] 1660 1661 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1662 self._match(TokenType.EQ) 1663 return self.expression( 1664 exp.PartitionedByProperty, 1665 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1666 ) 1667 1668 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1669 if self._match_text_seq("AND", "STATISTICS"): 1670 statistics = True 1671 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1672 statistics = False 1673 else: 1674 statistics = None 1675 1676 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1677 1678 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1679 if self._match_text_seq("PRIMARY", "INDEX"): 1680 return exp.NoPrimaryIndexProperty() 1681 return None 1682 1683 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1684 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1685 return exp.OnCommitProperty() 1686 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1687 return exp.OnCommitProperty(delete=True) 1688 return self.expression(exp.OnProperty, this=self._parse_id_var()) 1689 1690 def _parse_distkey(self) -> exp.DistKeyProperty: 1691 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1692 1693 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1694 table = self._parse_table(schema=True) 1695 1696 options = [] 1697 while self._match_texts(("INCLUDING", "EXCLUDING")): 1698 this = self._prev.text.upper() 1699 1700 id_var = self._parse_id_var() 1701 if not id_var: 1702 return None 1703 1704 options.append( 1705 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1706 ) 1707 1708 return self.expression(exp.LikeProperty, this=table, expressions=options) 1709 1710 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1711 return self.expression( 1712 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1713 ) 1714 1715 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1716 self._match(TokenType.EQ) 1717 return self.expression( 1718 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1719 ) 1720 1721 def _parse_returns(self) -> exp.ReturnsProperty: 1722 value: t.Optional[exp.Expression] 1723 is_table = self._match(TokenType.TABLE) 1724 1725 if is_table: 1726 if self._match(TokenType.LT): 1727 value = self.expression( 1728 exp.Schema, 1729 this="TABLE", 1730 expressions=self._parse_csv(self._parse_struct_types), 1731 ) 1732 if not self._match(TokenType.GT): 1733 self.raise_error("Expecting >") 1734 else: 1735 value = self._parse_schema(exp.var("TABLE")) 1736 else: 1737 value = self._parse_types() 1738 1739 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1740 1741 def _parse_describe(self) -> exp.Describe: 1742 kind = self._match_set(self.CREATABLES) and self._prev.text 1743 this = self._parse_table() 1744 return self.expression(exp.Describe, this=this, kind=kind) 1745 1746 def _parse_insert(self) -> exp.Insert: 1747 comments = ensure_list(self._prev_comments) 1748 overwrite = self._match(TokenType.OVERWRITE) 1749 ignore = self._match(TokenType.IGNORE) 1750 local = self._match_text_seq("LOCAL") 1751 alternative = None 1752 1753 if self._match_text_seq("DIRECTORY"): 1754 this: t.Optional[exp.Expression] = self.expression( 1755 exp.Directory, 1756 this=self._parse_var_or_string(), 1757 local=local, 1758 row_format=self._parse_row_format(match_row=True), 1759 ) 1760 else: 1761 if self._match(TokenType.OR): 1762 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1763 1764 self._match(TokenType.INTO) 1765 comments += ensure_list(self._prev_comments) 1766 self._match(TokenType.TABLE) 1767 this = self._parse_table(schema=True) 1768 1769 returning = self._parse_returning() 1770 1771 return self.expression( 1772 exp.Insert, 1773 comments=comments, 1774 this=this, 1775 exists=self._parse_exists(), 1776 partition=self._parse_partition(), 1777 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1778 and self._parse_conjunction(), 1779 expression=self._parse_ddl_select(), 1780 conflict=self._parse_on_conflict(), 1781 returning=returning or self._parse_returning(), 1782 overwrite=overwrite, 1783 alternative=alternative, 1784 ignore=ignore, 1785 ) 1786 1787 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1788 conflict = self._match_text_seq("ON", "CONFLICT") 1789 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1790 1791 if not conflict and not duplicate: 1792 return None 1793 1794 nothing = None 1795 expressions = None 1796 key = None 1797 constraint = None 1798 1799 if conflict: 1800 if self._match_text_seq("ON", "CONSTRAINT"): 1801 constraint = self._parse_id_var() 1802 else: 1803 key = self._parse_csv(self._parse_value) 1804 1805 self._match_text_seq("DO") 1806 if self._match_text_seq("NOTHING"): 1807 nothing = True 1808 else: 1809 self._match(TokenType.UPDATE) 1810 self._match(TokenType.SET) 1811 expressions = self._parse_csv(self._parse_equality) 1812 1813 return self.expression( 1814 exp.OnConflict, 1815 duplicate=duplicate, 1816 expressions=expressions, 1817 nothing=nothing, 1818 key=key, 1819 constraint=constraint, 1820 ) 1821 1822 def _parse_returning(self) -> t.Optional[exp.Returning]: 1823 if not self._match(TokenType.RETURNING): 1824 return None 1825 return self.expression( 1826 exp.Returning, 1827 expressions=self._parse_csv(self._parse_expression), 1828 into=self._match(TokenType.INTO) and self._parse_table_part(), 1829 ) 1830 1831 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1832 if not self._match(TokenType.FORMAT): 1833 return None 1834 return self._parse_row_format() 1835 1836 def _parse_row_format( 1837 self, match_row: bool = False 1838 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1839 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1840 return None 1841 1842 if self._match_text_seq("SERDE"): 1843 this = self._parse_string() 1844 1845 serde_properties = None 1846 if self._match(TokenType.SERDE_PROPERTIES): 1847 serde_properties = self.expression( 1848 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1849 ) 1850 1851 return self.expression( 1852 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1853 ) 1854 1855 self._match_text_seq("DELIMITED") 1856 1857 kwargs = {} 1858 1859 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1860 kwargs["fields"] = self._parse_string() 1861 if self._match_text_seq("ESCAPED", "BY"): 1862 kwargs["escaped"] = self._parse_string() 1863 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1864 kwargs["collection_items"] = self._parse_string() 1865 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1866 kwargs["map_keys"] = self._parse_string() 1867 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1868 kwargs["lines"] = self._parse_string() 1869 if self._match_text_seq("NULL", "DEFINED", "AS"): 1870 kwargs["null"] = self._parse_string() 1871 1872 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1873 1874 def _parse_load(self) -> exp.LoadData | exp.Command: 1875 if self._match_text_seq("DATA"): 1876 local = self._match_text_seq("LOCAL") 1877 self._match_text_seq("INPATH") 1878 inpath = self._parse_string() 1879 overwrite = self._match(TokenType.OVERWRITE) 1880 self._match_pair(TokenType.INTO, TokenType.TABLE) 1881 1882 return self.expression( 1883 exp.LoadData, 1884 this=self._parse_table(schema=True), 1885 local=local, 1886 overwrite=overwrite, 1887 inpath=inpath, 1888 partition=self._parse_partition(), 1889 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1890 serde=self._match_text_seq("SERDE") and self._parse_string(), 1891 ) 1892 return self._parse_as_command(self._prev) 1893 1894 def _parse_delete(self) -> exp.Delete: 1895 # This handles MySQL's "Multiple-Table Syntax" 1896 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1897 tables = None 1898 comments = self._prev_comments 1899 if not self._match(TokenType.FROM, advance=False): 1900 tables = self._parse_csv(self._parse_table) or None 1901 1902 returning = self._parse_returning() 1903 1904 return self.expression( 1905 exp.Delete, 1906 comments=comments, 1907 tables=tables, 1908 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1909 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1910 where=self._parse_where(), 1911 returning=returning or self._parse_returning(), 1912 limit=self._parse_limit(), 1913 ) 1914 1915 def _parse_update(self) -> exp.Update: 1916 comments = self._prev_comments 1917 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1918 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1919 returning = self._parse_returning() 1920 return self.expression( 1921 exp.Update, 1922 comments=comments, 1923 **{ # type: ignore 1924 "this": this, 1925 "expressions": expressions, 1926 "from": self._parse_from(joins=True), 1927 "where": self._parse_where(), 1928 "returning": returning or self._parse_returning(), 1929 "limit": self._parse_limit(), 1930 }, 1931 ) 1932 1933 def _parse_uncache(self) -> exp.Uncache: 1934 if not self._match(TokenType.TABLE): 1935 self.raise_error("Expecting TABLE after UNCACHE") 1936 1937 return self.expression( 1938 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1939 ) 1940 1941 def _parse_cache(self) -> exp.Cache: 1942 lazy = self._match_text_seq("LAZY") 1943 self._match(TokenType.TABLE) 1944 table = self._parse_table(schema=True) 1945 1946 options = [] 1947 if self._match_text_seq("OPTIONS"): 1948 self._match_l_paren() 1949 k = self._parse_string() 1950 self._match(TokenType.EQ) 1951 v = self._parse_string() 1952 options = [k, v] 1953 self._match_r_paren() 1954 1955 self._match(TokenType.ALIAS) 1956 return self.expression( 1957 exp.Cache, 1958 this=table, 1959 lazy=lazy, 1960 options=options, 1961 expression=self._parse_select(nested=True), 1962 ) 1963 1964 def _parse_partition(self) -> t.Optional[exp.Partition]: 1965 if not self._match(TokenType.PARTITION): 1966 return None 1967 1968 return self.expression( 1969 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1970 ) 1971 1972 def _parse_value(self) -> exp.Tuple: 1973 if self._match(TokenType.L_PAREN): 1974 expressions = self._parse_csv(self._parse_conjunction) 1975 self._match_r_paren() 1976 return self.expression(exp.Tuple, expressions=expressions) 1977 1978 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1979 # https://prestodb.io/docs/current/sql/values.html 1980 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1981 1982 def _parse_projections(self) -> t.List[exp.Expression]: 1983 return self._parse_expressions() 1984 1985 def _parse_select( 1986 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1987 ) -> t.Optional[exp.Expression]: 1988 cte = self._parse_with() 1989 if cte: 1990 this = self._parse_statement() 1991 1992 if not this: 1993 self.raise_error("Failed to parse any statement following CTE") 1994 return cte 1995 1996 if "with" in this.arg_types: 1997 this.set("with", cte) 1998 else: 1999 self.raise_error(f"{this.key} does not support CTE") 2000 this = cte 2001 elif self._match(TokenType.SELECT): 2002 comments = self._prev_comments 2003 2004 hint = self._parse_hint() 2005 all_ = self._match(TokenType.ALL) 2006 distinct = self._match_set(self.DISTINCT_TOKENS) 2007 2008 kind = ( 2009 self._match(TokenType.ALIAS) 2010 and self._match_texts(("STRUCT", "VALUE")) 2011 and self._prev.text 2012 ) 2013 2014 if distinct: 2015 distinct = self.expression( 2016 exp.Distinct, 2017 on=self._parse_value() if self._match(TokenType.ON) else None, 2018 ) 2019 2020 if all_ and distinct: 2021 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2022 2023 limit = self._parse_limit(top=True) 2024 projections = self._parse_projections() 2025 2026 this = self.expression( 2027 exp.Select, 2028 kind=kind, 2029 hint=hint, 2030 distinct=distinct, 2031 expressions=projections, 2032 limit=limit, 2033 ) 2034 this.comments = comments 2035 2036 into = self._parse_into() 2037 if into: 2038 this.set("into", into) 2039 2040 from_ = self._parse_from() 2041 if from_: 2042 this.set("from", from_) 2043 2044 this = self._parse_query_modifiers(this) 2045 elif (table or nested) and self._match(TokenType.L_PAREN): 2046 if self._match(TokenType.PIVOT): 2047 this = self._parse_simplified_pivot() 2048 elif self._match(TokenType.FROM): 2049 this = exp.select("*").from_( 2050 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2051 ) 2052 else: 2053 this = self._parse_table() if table else self._parse_select(nested=True) 2054 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2055 2056 self._match_r_paren() 2057 2058 # We return early here so that the UNION isn't attached to the subquery by the 2059 # following call to _parse_set_operations, but instead becomes the parent node 2060 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2061 elif self._match(TokenType.VALUES): 2062 this = self.expression( 2063 exp.Values, 2064 expressions=self._parse_csv(self._parse_value), 2065 alias=self._parse_table_alias(), 2066 ) 2067 else: 2068 this = None 2069 2070 return self._parse_set_operations(this) 2071 2072 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2073 if not skip_with_token and not self._match(TokenType.WITH): 2074 return None 2075 2076 comments = self._prev_comments 2077 recursive = self._match(TokenType.RECURSIVE) 2078 2079 expressions = [] 2080 while True: 2081 expressions.append(self._parse_cte()) 2082 2083 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2084 break 2085 else: 2086 self._match(TokenType.WITH) 2087 2088 return self.expression( 2089 exp.With, comments=comments, expressions=expressions, recursive=recursive 2090 ) 2091 2092 def _parse_cte(self) -> exp.CTE: 2093 alias = self._parse_table_alias() 2094 if not alias or not alias.this: 2095 self.raise_error("Expected CTE to have alias") 2096 2097 self._match(TokenType.ALIAS) 2098 return self.expression( 2099 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2100 ) 2101 2102 def _parse_table_alias( 2103 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2104 ) -> t.Optional[exp.TableAlias]: 2105 any_token = self._match(TokenType.ALIAS) 2106 alias = ( 2107 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2108 or self._parse_string_as_identifier() 2109 ) 2110 2111 index = self._index 2112 if self._match(TokenType.L_PAREN): 2113 columns = self._parse_csv(self._parse_function_parameter) 2114 self._match_r_paren() if columns else self._retreat(index) 2115 else: 2116 columns = None 2117 2118 if not alias and not columns: 2119 return None 2120 2121 return self.expression(exp.TableAlias, this=alias, columns=columns) 2122 2123 def _parse_subquery( 2124 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2125 ) -> t.Optional[exp.Subquery]: 2126 if not this: 2127 return None 2128 2129 return self.expression( 2130 exp.Subquery, 2131 this=this, 2132 pivots=self._parse_pivots(), 2133 alias=self._parse_table_alias() if parse_alias else None, 2134 ) 2135 2136 def _parse_query_modifiers( 2137 self, this: t.Optional[exp.Expression] 2138 ) -> t.Optional[exp.Expression]: 2139 if isinstance(this, self.MODIFIABLES): 2140 for join in iter(self._parse_join, None): 2141 this.append("joins", join) 2142 for lateral in iter(self._parse_lateral, None): 2143 this.append("laterals", lateral) 2144 2145 while True: 2146 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2147 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2148 key, expression = parser(self) 2149 2150 if expression: 2151 this.set(key, expression) 2152 if key == "limit": 2153 offset = expression.args.pop("offset", None) 2154 if offset: 2155 this.set("offset", exp.Offset(expression=offset)) 2156 continue 2157 break 2158 return this 2159 2160 def _parse_hint(self) -> t.Optional[exp.Hint]: 2161 if self._match(TokenType.HINT): 2162 hints = [] 2163 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2164 hints.extend(hint) 2165 2166 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2167 self.raise_error("Expected */ after HINT") 2168 2169 return self.expression(exp.Hint, expressions=hints) 2170 2171 return None 2172 2173 def _parse_into(self) -> t.Optional[exp.Into]: 2174 if not self._match(TokenType.INTO): 2175 return None 2176 2177 temp = self._match(TokenType.TEMPORARY) 2178 unlogged = self._match_text_seq("UNLOGGED") 2179 self._match(TokenType.TABLE) 2180 2181 return self.expression( 2182 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2183 ) 2184 2185 def _parse_from( 2186 self, joins: bool = False, skip_from_token: bool = False 2187 ) -> t.Optional[exp.From]: 2188 if not skip_from_token and not self._match(TokenType.FROM): 2189 return None 2190 2191 return self.expression( 2192 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2193 ) 2194 2195 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2196 if not self._match(TokenType.MATCH_RECOGNIZE): 2197 return None 2198 2199 self._match_l_paren() 2200 2201 partition = self._parse_partition_by() 2202 order = self._parse_order() 2203 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2204 2205 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2206 rows = exp.var("ONE ROW PER MATCH") 2207 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2208 text = "ALL ROWS PER MATCH" 2209 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2210 text += f" SHOW EMPTY MATCHES" 2211 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2212 text += f" OMIT EMPTY MATCHES" 2213 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2214 text += f" WITH UNMATCHED ROWS" 2215 rows = exp.var(text) 2216 else: 2217 rows = None 2218 2219 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2220 text = "AFTER MATCH SKIP" 2221 if self._match_text_seq("PAST", "LAST", "ROW"): 2222 text += f" PAST LAST ROW" 2223 elif self._match_text_seq("TO", "NEXT", "ROW"): 2224 text += f" TO NEXT ROW" 2225 elif self._match_text_seq("TO", "FIRST"): 2226 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2227 elif self._match_text_seq("TO", "LAST"): 2228 text += f" TO LAST {self._advance_any().text}" # type: ignore 2229 after = exp.var(text) 2230 else: 2231 after = None 2232 2233 if self._match_text_seq("PATTERN"): 2234 self._match_l_paren() 2235 2236 if not self._curr: 2237 self.raise_error("Expecting )", self._curr) 2238 2239 paren = 1 2240 start = self._curr 2241 2242 while self._curr and paren > 0: 2243 if self._curr.token_type == TokenType.L_PAREN: 2244 paren += 1 2245 if self._curr.token_type == TokenType.R_PAREN: 2246 paren -= 1 2247 2248 end = self._prev 2249 self._advance() 2250 2251 if paren > 0: 2252 self.raise_error("Expecting )", self._curr) 2253 2254 pattern = exp.var(self._find_sql(start, end)) 2255 else: 2256 pattern = None 2257 2258 define = ( 2259 self._parse_csv( 2260 lambda: self.expression( 2261 exp.Alias, 2262 alias=self._parse_id_var(any_token=True), 2263 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2264 ) 2265 ) 2266 if self._match_text_seq("DEFINE") 2267 else None 2268 ) 2269 2270 self._match_r_paren() 2271 2272 return self.expression( 2273 exp.MatchRecognize, 2274 partition_by=partition, 2275 order=order, 2276 measures=measures, 2277 rows=rows, 2278 after=after, 2279 pattern=pattern, 2280 define=define, 2281 alias=self._parse_table_alias(), 2282 ) 2283 2284 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2285 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2286 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2287 2288 if outer_apply or cross_apply: 2289 this = self._parse_select(table=True) 2290 view = None 2291 outer = not cross_apply 2292 elif self._match(TokenType.LATERAL): 2293 this = self._parse_select(table=True) 2294 view = self._match(TokenType.VIEW) 2295 outer = self._match(TokenType.OUTER) 2296 else: 2297 return None 2298 2299 if not this: 2300 this = ( 2301 self._parse_unnest() 2302 or self._parse_function() 2303 or self._parse_id_var(any_token=False) 2304 ) 2305 2306 while self._match(TokenType.DOT): 2307 this = exp.Dot( 2308 this=this, 2309 expression=self._parse_function() or self._parse_id_var(any_token=False), 2310 ) 2311 2312 if view: 2313 table = self._parse_id_var(any_token=False) 2314 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2315 table_alias: t.Optional[exp.TableAlias] = self.expression( 2316 exp.TableAlias, this=table, columns=columns 2317 ) 2318 elif isinstance(this, exp.Subquery) and this.alias: 2319 # Ensures parity between the Subquery's and the Lateral's "alias" args 2320 table_alias = this.args["alias"].copy() 2321 else: 2322 table_alias = self._parse_table_alias() 2323 2324 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2325 2326 def _parse_join_parts( 2327 self, 2328 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2329 return ( 2330 self._match_set(self.JOIN_METHODS) and self._prev, 2331 self._match_set(self.JOIN_SIDES) and self._prev, 2332 self._match_set(self.JOIN_KINDS) and self._prev, 2333 ) 2334 2335 def _parse_join( 2336 self, skip_join_token: bool = False, parse_bracket: bool = False 2337 ) -> t.Optional[exp.Join]: 2338 if self._match(TokenType.COMMA): 2339 return self.expression(exp.Join, this=self._parse_table()) 2340 2341 index = self._index 2342 method, side, kind = self._parse_join_parts() 2343 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2344 join = self._match(TokenType.JOIN) 2345 2346 if not skip_join_token and not join: 2347 self._retreat(index) 2348 kind = None 2349 method = None 2350 side = None 2351 2352 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2353 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2354 2355 if not skip_join_token and not join and not outer_apply and not cross_apply: 2356 return None 2357 2358 if outer_apply: 2359 side = Token(TokenType.LEFT, "LEFT") 2360 2361 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2362 2363 if method: 2364 kwargs["method"] = method.text 2365 if side: 2366 kwargs["side"] = side.text 2367 if kind: 2368 kwargs["kind"] = kind.text 2369 if hint: 2370 kwargs["hint"] = hint 2371 2372 if self._match(TokenType.ON): 2373 kwargs["on"] = self._parse_conjunction() 2374 elif self._match(TokenType.USING): 2375 kwargs["using"] = self._parse_wrapped_id_vars() 2376 elif not (kind and kind.token_type == TokenType.CROSS): 2377 index = self._index 2378 joins = self._parse_joins() 2379 2380 if joins and self._match(TokenType.ON): 2381 kwargs["on"] = self._parse_conjunction() 2382 elif joins and self._match(TokenType.USING): 2383 kwargs["using"] = self._parse_wrapped_id_vars() 2384 else: 2385 joins = None 2386 self._retreat(index) 2387 2388 kwargs["this"].set("joins", joins) 2389 2390 comments = [c for token in (method, side, kind) if token for c in token.comments] 2391 return self.expression(exp.Join, comments=comments, **kwargs) 2392 2393 def _parse_index( 2394 self, 2395 index: t.Optional[exp.Expression] = None, 2396 ) -> t.Optional[exp.Index]: 2397 if index: 2398 unique = None 2399 primary = None 2400 amp = None 2401 2402 self._match(TokenType.ON) 2403 self._match(TokenType.TABLE) # hive 2404 table = self._parse_table_parts(schema=True) 2405 else: 2406 unique = self._match(TokenType.UNIQUE) 2407 primary = self._match_text_seq("PRIMARY") 2408 amp = self._match_text_seq("AMP") 2409 2410 if not self._match(TokenType.INDEX): 2411 return None 2412 2413 index = self._parse_id_var() 2414 table = None 2415 2416 using = self._parse_field() if self._match(TokenType.USING) else None 2417 2418 if self._match(TokenType.L_PAREN, advance=False): 2419 columns = self._parse_wrapped_csv(self._parse_ordered) 2420 else: 2421 columns = None 2422 2423 return self.expression( 2424 exp.Index, 2425 this=index, 2426 table=table, 2427 using=using, 2428 columns=columns, 2429 unique=unique, 2430 primary=primary, 2431 amp=amp, 2432 partition_by=self._parse_partition_by(), 2433 ) 2434 2435 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2436 hints: t.List[exp.Expression] = [] 2437 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2438 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2439 hints.append( 2440 self.expression( 2441 exp.WithTableHint, 2442 expressions=self._parse_csv( 2443 lambda: self._parse_function() or self._parse_var(any_token=True) 2444 ), 2445 ) 2446 ) 2447 self._match_r_paren() 2448 else: 2449 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2450 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2451 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2452 2453 self._match_texts({"INDEX", "KEY"}) 2454 if self._match(TokenType.FOR): 2455 hint.set("target", self._advance_any() and self._prev.text.upper()) 2456 2457 hint.set("expressions", self._parse_wrapped_id_vars()) 2458 hints.append(hint) 2459 2460 return hints or None 2461 2462 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2463 return ( 2464 (not schema and self._parse_function(optional_parens=False)) 2465 or self._parse_id_var(any_token=False) 2466 or self._parse_string_as_identifier() 2467 or self._parse_placeholder() 2468 ) 2469 2470 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2471 catalog = None 2472 db = None 2473 table = self._parse_table_part(schema=schema) 2474 2475 while self._match(TokenType.DOT): 2476 if catalog: 2477 # This allows nesting the table in arbitrarily many dot expressions if needed 2478 table = self.expression( 2479 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2480 ) 2481 else: 2482 catalog = db 2483 db = table 2484 table = self._parse_table_part(schema=schema) 2485 2486 if not table: 2487 self.raise_error(f"Expected table name but got {self._curr}") 2488 2489 return self.expression( 2490 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2491 ) 2492 2493 def _parse_table( 2494 self, 2495 schema: bool = False, 2496 joins: bool = False, 2497 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2498 parse_bracket: bool = False, 2499 ) -> t.Optional[exp.Expression]: 2500 lateral = self._parse_lateral() 2501 if lateral: 2502 return lateral 2503 2504 unnest = self._parse_unnest() 2505 if unnest: 2506 return unnest 2507 2508 values = self._parse_derived_table_values() 2509 if values: 2510 return values 2511 2512 subquery = self._parse_select(table=True) 2513 if subquery: 2514 if not subquery.args.get("pivots"): 2515 subquery.set("pivots", self._parse_pivots()) 2516 return subquery 2517 2518 bracket = parse_bracket and self._parse_bracket(None) 2519 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2520 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2521 2522 if schema: 2523 return self._parse_schema(this=this) 2524 2525 if self.ALIAS_POST_TABLESAMPLE: 2526 table_sample = self._parse_table_sample() 2527 2528 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2529 if alias: 2530 this.set("alias", alias) 2531 2532 if not this.args.get("pivots"): 2533 this.set("pivots", self._parse_pivots()) 2534 2535 this.set("hints", self._parse_table_hints()) 2536 2537 if not self.ALIAS_POST_TABLESAMPLE: 2538 table_sample = self._parse_table_sample() 2539 2540 if table_sample: 2541 table_sample.set("this", this) 2542 this = table_sample 2543 2544 if joins: 2545 for join in iter(self._parse_join, None): 2546 this.append("joins", join) 2547 2548 return this 2549 2550 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2551 if not self._match(TokenType.UNNEST): 2552 return None 2553 2554 expressions = self._parse_wrapped_csv(self._parse_type) 2555 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2556 2557 alias = self._parse_table_alias() if with_alias else None 2558 2559 if alias and self.UNNEST_COLUMN_ONLY: 2560 if alias.args.get("columns"): 2561 self.raise_error("Unexpected extra column alias in unnest.") 2562 2563 alias.set("columns", [alias.this]) 2564 alias.set("this", None) 2565 2566 offset = None 2567 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2568 self._match(TokenType.ALIAS) 2569 offset = self._parse_id_var() or exp.to_identifier("offset") 2570 2571 return self.expression( 2572 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2573 ) 2574 2575 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2576 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2577 if not is_derived and not self._match(TokenType.VALUES): 2578 return None 2579 2580 expressions = self._parse_csv(self._parse_value) 2581 alias = self._parse_table_alias() 2582 2583 if is_derived: 2584 self._match_r_paren() 2585 2586 return self.expression( 2587 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2588 ) 2589 2590 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2591 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2592 as_modifier and self._match_text_seq("USING", "SAMPLE") 2593 ): 2594 return None 2595 2596 bucket_numerator = None 2597 bucket_denominator = None 2598 bucket_field = None 2599 percent = None 2600 rows = None 2601 size = None 2602 seed = None 2603 2604 kind = ( 2605 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2606 ) 2607 method = self._parse_var(tokens=(TokenType.ROW,)) 2608 2609 self._match(TokenType.L_PAREN) 2610 2611 num = self._parse_number() 2612 2613 if self._match_text_seq("BUCKET"): 2614 bucket_numerator = self._parse_number() 2615 self._match_text_seq("OUT", "OF") 2616 bucket_denominator = bucket_denominator = self._parse_number() 2617 self._match(TokenType.ON) 2618 bucket_field = self._parse_field() 2619 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2620 percent = num 2621 elif self._match(TokenType.ROWS): 2622 rows = num 2623 else: 2624 size = num 2625 2626 self._match(TokenType.R_PAREN) 2627 2628 if self._match(TokenType.L_PAREN): 2629 method = self._parse_var() 2630 seed = self._match(TokenType.COMMA) and self._parse_number() 2631 self._match_r_paren() 2632 elif self._match_texts(("SEED", "REPEATABLE")): 2633 seed = self._parse_wrapped(self._parse_number) 2634 2635 return self.expression( 2636 exp.TableSample, 2637 method=method, 2638 bucket_numerator=bucket_numerator, 2639 bucket_denominator=bucket_denominator, 2640 bucket_field=bucket_field, 2641 percent=percent, 2642 rows=rows, 2643 size=size, 2644 seed=seed, 2645 kind=kind, 2646 ) 2647 2648 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2649 return list(iter(self._parse_pivot, None)) or None 2650 2651 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2652 return list(iter(self._parse_join, None)) or None 2653 2654 # https://duckdb.org/docs/sql/statements/pivot 2655 def _parse_simplified_pivot(self) -> exp.Pivot: 2656 def _parse_on() -> t.Optional[exp.Expression]: 2657 this = self._parse_bitwise() 2658 return self._parse_in(this) if self._match(TokenType.IN) else this 2659 2660 this = self._parse_table() 2661 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2662 using = self._match(TokenType.USING) and self._parse_csv( 2663 lambda: self._parse_alias(self._parse_function()) 2664 ) 2665 group = self._parse_group() 2666 return self.expression( 2667 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2668 ) 2669 2670 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2671 index = self._index 2672 include_nulls = None 2673 2674 if self._match(TokenType.PIVOT): 2675 unpivot = False 2676 elif self._match(TokenType.UNPIVOT): 2677 unpivot = True 2678 2679 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2680 if self._match_text_seq("INCLUDE", "NULLS"): 2681 include_nulls = True 2682 elif self._match_text_seq("EXCLUDE", "NULLS"): 2683 include_nulls = False 2684 else: 2685 return None 2686 2687 expressions = [] 2688 field = None 2689 2690 if not self._match(TokenType.L_PAREN): 2691 self._retreat(index) 2692 return None 2693 2694 if unpivot: 2695 expressions = self._parse_csv(self._parse_column) 2696 else: 2697 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2698 2699 if not expressions: 2700 self.raise_error("Failed to parse PIVOT's aggregation list") 2701 2702 if not self._match(TokenType.FOR): 2703 self.raise_error("Expecting FOR") 2704 2705 value = self._parse_column() 2706 2707 if not self._match(TokenType.IN): 2708 self.raise_error("Expecting IN") 2709 2710 field = self._parse_in(value, alias=True) 2711 2712 self._match_r_paren() 2713 2714 pivot = self.expression( 2715 exp.Pivot, 2716 expressions=expressions, 2717 field=field, 2718 unpivot=unpivot, 2719 include_nulls=include_nulls, 2720 ) 2721 2722 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2723 pivot.set("alias", self._parse_table_alias()) 2724 2725 if not unpivot: 2726 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2727 2728 columns: t.List[exp.Expression] = [] 2729 for fld in pivot.args["field"].expressions: 2730 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2731 for name in names: 2732 if self.PREFIXED_PIVOT_COLUMNS: 2733 name = f"{name}_{field_name}" if name else field_name 2734 else: 2735 name = f"{field_name}_{name}" if name else field_name 2736 2737 columns.append(exp.to_identifier(name)) 2738 2739 pivot.set("columns", columns) 2740 2741 return pivot 2742 2743 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2744 return [agg.alias for agg in aggregations] 2745 2746 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2747 if not skip_where_token and not self._match(TokenType.WHERE): 2748 return None 2749 2750 return self.expression( 2751 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2752 ) 2753 2754 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2755 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2756 return None 2757 2758 elements = defaultdict(list) 2759 2760 if self._match(TokenType.ALL): 2761 return self.expression(exp.Group, all=True) 2762 2763 while True: 2764 expressions = self._parse_csv(self._parse_conjunction) 2765 if expressions: 2766 elements["expressions"].extend(expressions) 2767 2768 grouping_sets = self._parse_grouping_sets() 2769 if grouping_sets: 2770 elements["grouping_sets"].extend(grouping_sets) 2771 2772 rollup = None 2773 cube = None 2774 totals = None 2775 2776 with_ = self._match(TokenType.WITH) 2777 if self._match(TokenType.ROLLUP): 2778 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2779 elements["rollup"].extend(ensure_list(rollup)) 2780 2781 if self._match(TokenType.CUBE): 2782 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2783 elements["cube"].extend(ensure_list(cube)) 2784 2785 if self._match_text_seq("TOTALS"): 2786 totals = True 2787 elements["totals"] = True # type: ignore 2788 2789 if not (grouping_sets or rollup or cube or totals): 2790 break 2791 2792 return self.expression(exp.Group, **elements) # type: ignore 2793 2794 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2795 if not self._match(TokenType.GROUPING_SETS): 2796 return None 2797 2798 return self._parse_wrapped_csv(self._parse_grouping_set) 2799 2800 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2801 if self._match(TokenType.L_PAREN): 2802 grouping_set = self._parse_csv(self._parse_column) 2803 self._match_r_paren() 2804 return self.expression(exp.Tuple, expressions=grouping_set) 2805 2806 return self._parse_column() 2807 2808 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2809 if not skip_having_token and not self._match(TokenType.HAVING): 2810 return None 2811 return self.expression(exp.Having, this=self._parse_conjunction()) 2812 2813 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2814 if not self._match(TokenType.QUALIFY): 2815 return None 2816 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2817 2818 def _parse_order( 2819 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2820 ) -> t.Optional[exp.Expression]: 2821 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2822 return this 2823 2824 return self.expression( 2825 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2826 ) 2827 2828 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2829 if not self._match(token): 2830 return None 2831 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2832 2833 def _parse_ordered(self) -> exp.Ordered: 2834 this = self._parse_conjunction() 2835 self._match(TokenType.ASC) 2836 2837 is_desc = self._match(TokenType.DESC) 2838 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2839 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2840 desc = is_desc or False 2841 asc = not desc 2842 nulls_first = is_nulls_first or False 2843 explicitly_null_ordered = is_nulls_first or is_nulls_last 2844 2845 if ( 2846 not explicitly_null_ordered 2847 and ( 2848 (asc and self.NULL_ORDERING == "nulls_are_small") 2849 or (desc and self.NULL_ORDERING != "nulls_are_small") 2850 ) 2851 and self.NULL_ORDERING != "nulls_are_last" 2852 ): 2853 nulls_first = True 2854 2855 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2856 2857 def _parse_limit( 2858 self, this: t.Optional[exp.Expression] = None, top: bool = False 2859 ) -> t.Optional[exp.Expression]: 2860 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2861 comments = self._prev_comments 2862 if top: 2863 limit_paren = self._match(TokenType.L_PAREN) 2864 expression = self._parse_number() 2865 2866 if limit_paren: 2867 self._match_r_paren() 2868 else: 2869 expression = self._parse_term() 2870 2871 if self._match(TokenType.COMMA): 2872 offset = expression 2873 expression = self._parse_term() 2874 else: 2875 offset = None 2876 2877 limit_exp = self.expression( 2878 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2879 ) 2880 2881 return limit_exp 2882 2883 if self._match(TokenType.FETCH): 2884 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2885 direction = self._prev.text if direction else "FIRST" 2886 2887 count = self._parse_number() 2888 percent = self._match(TokenType.PERCENT) 2889 2890 self._match_set((TokenType.ROW, TokenType.ROWS)) 2891 2892 only = self._match_text_seq("ONLY") 2893 with_ties = self._match_text_seq("WITH", "TIES") 2894 2895 if only and with_ties: 2896 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2897 2898 return self.expression( 2899 exp.Fetch, 2900 direction=direction, 2901 count=count, 2902 percent=percent, 2903 with_ties=with_ties, 2904 ) 2905 2906 return this 2907 2908 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2909 if not self._match(TokenType.OFFSET): 2910 return this 2911 2912 count = self._parse_term() 2913 self._match_set((TokenType.ROW, TokenType.ROWS)) 2914 return self.expression(exp.Offset, this=this, expression=count) 2915 2916 def _parse_locks(self) -> t.List[exp.Lock]: 2917 locks = [] 2918 while True: 2919 if self._match_text_seq("FOR", "UPDATE"): 2920 update = True 2921 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2922 "LOCK", "IN", "SHARE", "MODE" 2923 ): 2924 update = False 2925 else: 2926 break 2927 2928 expressions = None 2929 if self._match_text_seq("OF"): 2930 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2931 2932 wait: t.Optional[bool | exp.Expression] = None 2933 if self._match_text_seq("NOWAIT"): 2934 wait = True 2935 elif self._match_text_seq("WAIT"): 2936 wait = self._parse_primary() 2937 elif self._match_text_seq("SKIP", "LOCKED"): 2938 wait = False 2939 2940 locks.append( 2941 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2942 ) 2943 2944 return locks 2945 2946 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2947 if not self._match_set(self.SET_OPERATIONS): 2948 return this 2949 2950 token_type = self._prev.token_type 2951 2952 if token_type == TokenType.UNION: 2953 expression = exp.Union 2954 elif token_type == TokenType.EXCEPT: 2955 expression = exp.Except 2956 else: 2957 expression = exp.Intersect 2958 2959 return self.expression( 2960 expression, 2961 this=this, 2962 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2963 expression=self._parse_set_operations(self._parse_select(nested=True)), 2964 ) 2965 2966 def _parse_expression(self) -> t.Optional[exp.Expression]: 2967 return self._parse_alias(self._parse_conjunction()) 2968 2969 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2970 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2971 2972 def _parse_equality(self) -> t.Optional[exp.Expression]: 2973 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2974 2975 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2976 return self._parse_tokens(self._parse_range, self.COMPARISON) 2977 2978 def _parse_range(self) -> t.Optional[exp.Expression]: 2979 this = self._parse_bitwise() 2980 negate = self._match(TokenType.NOT) 2981 2982 if self._match_set(self.RANGE_PARSERS): 2983 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2984 if not expression: 2985 return this 2986 2987 this = expression 2988 elif self._match(TokenType.ISNULL): 2989 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2990 2991 # Postgres supports ISNULL and NOTNULL for conditions. 2992 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2993 if self._match(TokenType.NOTNULL): 2994 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2995 this = self.expression(exp.Not, this=this) 2996 2997 if negate: 2998 this = self.expression(exp.Not, this=this) 2999 3000 if self._match(TokenType.IS): 3001 this = self._parse_is(this) 3002 3003 return this 3004 3005 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3006 index = self._index - 1 3007 negate = self._match(TokenType.NOT) 3008 3009 if self._match_text_seq("DISTINCT", "FROM"): 3010 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3011 return self.expression(klass, this=this, expression=self._parse_expression()) 3012 3013 expression = self._parse_null() or self._parse_boolean() 3014 if not expression: 3015 self._retreat(index) 3016 return None 3017 3018 this = self.expression(exp.Is, this=this, expression=expression) 3019 return self.expression(exp.Not, this=this) if negate else this 3020 3021 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3022 unnest = self._parse_unnest(with_alias=False) 3023 if unnest: 3024 this = self.expression(exp.In, this=this, unnest=unnest) 3025 elif self._match(TokenType.L_PAREN): 3026 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3027 3028 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3029 this = self.expression(exp.In, this=this, query=expressions[0]) 3030 else: 3031 this = self.expression(exp.In, this=this, expressions=expressions) 3032 3033 self._match_r_paren(this) 3034 else: 3035 this = self.expression(exp.In, this=this, field=self._parse_field()) 3036 3037 return this 3038 3039 def _parse_between(self, this: exp.Expression) -> exp.Between: 3040 low = self._parse_bitwise() 3041 self._match(TokenType.AND) 3042 high = self._parse_bitwise() 3043 return self.expression(exp.Between, this=this, low=low, high=high) 3044 3045 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3046 if not self._match(TokenType.ESCAPE): 3047 return this 3048 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3049 3050 def _parse_interval(self) -> t.Optional[exp.Interval]: 3051 index = self._index 3052 3053 if not self._match(TokenType.INTERVAL): 3054 return None 3055 3056 if self._match(TokenType.STRING, advance=False): 3057 this = self._parse_primary() 3058 else: 3059 this = self._parse_term() 3060 3061 if not this: 3062 self._retreat(index) 3063 return None 3064 3065 unit = self._parse_function() or self._parse_var() 3066 3067 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3068 # each INTERVAL expression into this canonical form so it's easy to transpile 3069 if this and this.is_number: 3070 this = exp.Literal.string(this.name) 3071 elif this and this.is_string: 3072 parts = this.name.split() 3073 3074 if len(parts) == 2: 3075 if unit: 3076 # this is not actually a unit, it's something else 3077 unit = None 3078 self._retreat(self._index - 1) 3079 else: 3080 this = exp.Literal.string(parts[0]) 3081 unit = self.expression(exp.Var, this=parts[1]) 3082 3083 return self.expression(exp.Interval, this=this, unit=unit) 3084 3085 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3086 this = self._parse_term() 3087 3088 while True: 3089 if self._match_set(self.BITWISE): 3090 this = self.expression( 3091 self.BITWISE[self._prev.token_type], 3092 this=this, 3093 expression=self._parse_term(), 3094 ) 3095 elif self._match(TokenType.DQMARK): 3096 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3097 elif self._match_pair(TokenType.LT, TokenType.LT): 3098 this = self.expression( 3099 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3100 ) 3101 elif self._match_pair(TokenType.GT, TokenType.GT): 3102 this = self.expression( 3103 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3104 ) 3105 else: 3106 break 3107 3108 return this 3109 3110 def _parse_term(self) -> t.Optional[exp.Expression]: 3111 return self._parse_tokens(self._parse_factor, self.TERM) 3112 3113 def _parse_factor(self) -> t.Optional[exp.Expression]: 3114 return self._parse_tokens(self._parse_unary, self.FACTOR) 3115 3116 def _parse_unary(self) -> t.Optional[exp.Expression]: 3117 if self._match_set(self.UNARY_PARSERS): 3118 return self.UNARY_PARSERS[self._prev.token_type](self) 3119 return self._parse_at_time_zone(self._parse_type()) 3120 3121 def _parse_type(self) -> t.Optional[exp.Expression]: 3122 interval = self._parse_interval() 3123 if interval: 3124 return interval 3125 3126 index = self._index 3127 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3128 this = self._parse_column() 3129 3130 if data_type: 3131 if isinstance(this, exp.Literal): 3132 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3133 if parser: 3134 return parser(self, this, data_type) 3135 return self.expression(exp.Cast, this=this, to=data_type) 3136 if not data_type.expressions: 3137 self._retreat(index) 3138 return self._parse_column() 3139 return self._parse_column_ops(data_type) 3140 3141 return this 3142 3143 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3144 this = self._parse_type() 3145 if not this: 3146 return None 3147 3148 return self.expression( 3149 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3150 ) 3151 3152 def _parse_types( 3153 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3154 ) -> t.Optional[exp.Expression]: 3155 index = self._index 3156 3157 prefix = self._match_text_seq("SYSUDTLIB", ".") 3158 3159 if not self._match_set(self.TYPE_TOKENS): 3160 identifier = allow_identifiers and self._parse_id_var( 3161 any_token=False, tokens=(TokenType.VAR,) 3162 ) 3163 3164 if identifier: 3165 tokens = self._tokenizer.tokenize(identifier.name) 3166 3167 if len(tokens) != 1: 3168 self.raise_error("Unexpected identifier", self._prev) 3169 3170 if tokens[0].token_type in self.TYPE_TOKENS: 3171 self._prev = tokens[0] 3172 elif self.SUPPORTS_USER_DEFINED_TYPES: 3173 return identifier 3174 else: 3175 return None 3176 else: 3177 return None 3178 3179 type_token = self._prev.token_type 3180 3181 if type_token == TokenType.PSEUDO_TYPE: 3182 return self.expression(exp.PseudoType, this=self._prev.text) 3183 3184 nested = type_token in self.NESTED_TYPE_TOKENS 3185 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3186 expressions = None 3187 maybe_func = False 3188 3189 if self._match(TokenType.L_PAREN): 3190 if is_struct: 3191 expressions = self._parse_csv(self._parse_struct_types) 3192 elif nested: 3193 expressions = self._parse_csv( 3194 lambda: self._parse_types( 3195 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3196 ) 3197 ) 3198 elif type_token in self.ENUM_TYPE_TOKENS: 3199 expressions = self._parse_csv(self._parse_equality) 3200 else: 3201 expressions = self._parse_csv(self._parse_type_size) 3202 3203 if not expressions or not self._match(TokenType.R_PAREN): 3204 self._retreat(index) 3205 return None 3206 3207 maybe_func = True 3208 3209 this: t.Optional[exp.Expression] = None 3210 values: t.Optional[t.List[exp.Expression]] = None 3211 3212 if nested and self._match(TokenType.LT): 3213 if is_struct: 3214 expressions = self._parse_csv(self._parse_struct_types) 3215 else: 3216 expressions = self._parse_csv( 3217 lambda: self._parse_types( 3218 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3219 ) 3220 ) 3221 3222 if not self._match(TokenType.GT): 3223 self.raise_error("Expecting >") 3224 3225 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3226 values = self._parse_csv(self._parse_conjunction) 3227 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3228 3229 if type_token in self.TIMESTAMPS: 3230 if self._match_text_seq("WITH", "TIME", "ZONE"): 3231 maybe_func = False 3232 tz_type = ( 3233 exp.DataType.Type.TIMETZ 3234 if type_token in self.TIMES 3235 else exp.DataType.Type.TIMESTAMPTZ 3236 ) 3237 this = exp.DataType(this=tz_type, expressions=expressions) 3238 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3239 maybe_func = False 3240 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3241 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3242 maybe_func = False 3243 elif type_token == TokenType.INTERVAL: 3244 if self._match_text_seq("YEAR", "TO", "MONTH"): 3245 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3246 elif self._match_text_seq("DAY", "TO", "SECOND"): 3247 span = [exp.IntervalDayToSecondSpan()] 3248 else: 3249 span = None 3250 3251 unit = not span and self._parse_var() 3252 if not unit: 3253 this = self.expression( 3254 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3255 ) 3256 else: 3257 this = self.expression(exp.Interval, unit=unit) 3258 3259 if maybe_func and check_func: 3260 index2 = self._index 3261 peek = self._parse_string() 3262 3263 if not peek: 3264 self._retreat(index) 3265 return None 3266 3267 self._retreat(index2) 3268 3269 if not this: 3270 this = exp.DataType( 3271 this=exp.DataType.Type[type_token.value], 3272 expressions=expressions, 3273 nested=nested, 3274 values=values, 3275 prefix=prefix, 3276 ) 3277 3278 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3279 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3280 3281 return this 3282 3283 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3284 this = self._parse_type() or self._parse_id_var() 3285 self._match(TokenType.COLON) 3286 return self._parse_column_def(this) 3287 3288 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3289 if not self._match_text_seq("AT", "TIME", "ZONE"): 3290 return this 3291 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3292 3293 def _parse_column(self) -> t.Optional[exp.Expression]: 3294 this = self._parse_field() 3295 if isinstance(this, exp.Identifier): 3296 this = self.expression(exp.Column, this=this) 3297 elif not this: 3298 return self._parse_bracket(this) 3299 return self._parse_column_ops(this) 3300 3301 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3302 this = self._parse_bracket(this) 3303 3304 while self._match_set(self.COLUMN_OPERATORS): 3305 op_token = self._prev.token_type 3306 op = self.COLUMN_OPERATORS.get(op_token) 3307 3308 if op_token == TokenType.DCOLON: 3309 field = self._parse_types() 3310 if not field: 3311 self.raise_error("Expected type") 3312 elif op and self._curr: 3313 self._advance() 3314 value = self._prev.text 3315 field = ( 3316 exp.Literal.number(value) 3317 if self._prev.token_type == TokenType.NUMBER 3318 else exp.Literal.string(value) 3319 ) 3320 else: 3321 field = self._parse_field(anonymous_func=True, any_token=True) 3322 3323 if isinstance(field, exp.Func): 3324 # bigquery allows function calls like x.y.count(...) 3325 # SAFE.SUBSTR(...) 3326 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3327 this = self._replace_columns_with_dots(this) 3328 3329 if op: 3330 this = op(self, this, field) 3331 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3332 this = self.expression( 3333 exp.Column, 3334 this=field, 3335 table=this.this, 3336 db=this.args.get("table"), 3337 catalog=this.args.get("db"), 3338 ) 3339 else: 3340 this = self.expression(exp.Dot, this=this, expression=field) 3341 this = self._parse_bracket(this) 3342 return this 3343 3344 def _parse_primary(self) -> t.Optional[exp.Expression]: 3345 if self._match_set(self.PRIMARY_PARSERS): 3346 token_type = self._prev.token_type 3347 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3348 3349 if token_type == TokenType.STRING: 3350 expressions = [primary] 3351 while self._match(TokenType.STRING): 3352 expressions.append(exp.Literal.string(self._prev.text)) 3353 3354 if len(expressions) > 1: 3355 return self.expression(exp.Concat, expressions=expressions) 3356 3357 return primary 3358 3359 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3360 return exp.Literal.number(f"0.{self._prev.text}") 3361 3362 if self._match(TokenType.L_PAREN): 3363 comments = self._prev_comments 3364 query = self._parse_select() 3365 3366 if query: 3367 expressions = [query] 3368 else: 3369 expressions = self._parse_expressions() 3370 3371 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3372 3373 if isinstance(this, exp.Subqueryable): 3374 this = self._parse_set_operations( 3375 self._parse_subquery(this=this, parse_alias=False) 3376 ) 3377 elif len(expressions) > 1: 3378 this = self.expression(exp.Tuple, expressions=expressions) 3379 else: 3380 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3381 3382 if this: 3383 this.add_comments(comments) 3384 3385 self._match_r_paren(expression=this) 3386 return this 3387 3388 return None 3389 3390 def _parse_field( 3391 self, 3392 any_token: bool = False, 3393 tokens: t.Optional[t.Collection[TokenType]] = None, 3394 anonymous_func: bool = False, 3395 ) -> t.Optional[exp.Expression]: 3396 return ( 3397 self._parse_primary() 3398 or self._parse_function(anonymous=anonymous_func) 3399 or self._parse_id_var(any_token=any_token, tokens=tokens) 3400 ) 3401 3402 def _parse_function( 3403 self, 3404 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3405 anonymous: bool = False, 3406 optional_parens: bool = True, 3407 ) -> t.Optional[exp.Expression]: 3408 if not self._curr: 3409 return None 3410 3411 token_type = self._curr.token_type 3412 this = self._curr.text 3413 upper = this.upper() 3414 3415 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3416 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3417 self._advance() 3418 return parser(self) 3419 3420 if not self._next or self._next.token_type != TokenType.L_PAREN: 3421 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3422 self._advance() 3423 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3424 3425 return None 3426 3427 if token_type not in self.FUNC_TOKENS: 3428 return None 3429 3430 self._advance(2) 3431 3432 parser = self.FUNCTION_PARSERS.get(upper) 3433 if parser and not anonymous: 3434 this = parser(self) 3435 else: 3436 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3437 3438 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3439 this = self.expression(subquery_predicate, this=self._parse_select()) 3440 self._match_r_paren() 3441 return this 3442 3443 if functions is None: 3444 functions = self.FUNCTIONS 3445 3446 function = functions.get(upper) 3447 3448 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3449 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3450 3451 if function and not anonymous: 3452 func = self.validate_expression(function(args), args) 3453 if not self.NORMALIZE_FUNCTIONS: 3454 func.meta["name"] = this 3455 this = func 3456 else: 3457 this = self.expression(exp.Anonymous, this=this, expressions=args) 3458 3459 self._match_r_paren(this) 3460 return self._parse_window(this) 3461 3462 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3463 return self._parse_column_def(self._parse_id_var()) 3464 3465 def _parse_user_defined_function( 3466 self, kind: t.Optional[TokenType] = None 3467 ) -> t.Optional[exp.Expression]: 3468 this = self._parse_id_var() 3469 3470 while self._match(TokenType.DOT): 3471 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3472 3473 if not self._match(TokenType.L_PAREN): 3474 return this 3475 3476 expressions = self._parse_csv(self._parse_function_parameter) 3477 self._match_r_paren() 3478 return self.expression( 3479 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3480 ) 3481 3482 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3483 literal = self._parse_primary() 3484 if literal: 3485 return self.expression(exp.Introducer, this=token.text, expression=literal) 3486 3487 return self.expression(exp.Identifier, this=token.text) 3488 3489 def _parse_session_parameter(self) -> exp.SessionParameter: 3490 kind = None 3491 this = self._parse_id_var() or self._parse_primary() 3492 3493 if this and self._match(TokenType.DOT): 3494 kind = this.name 3495 this = self._parse_var() or self._parse_primary() 3496 3497 return self.expression(exp.SessionParameter, this=this, kind=kind) 3498 3499 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3500 index = self._index 3501 3502 if self._match(TokenType.L_PAREN): 3503 expressions = t.cast( 3504 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3505 ) 3506 3507 if not self._match(TokenType.R_PAREN): 3508 self._retreat(index) 3509 else: 3510 expressions = [self._parse_id_var()] 3511 3512 if self._match_set(self.LAMBDAS): 3513 return self.LAMBDAS[self._prev.token_type](self, expressions) 3514 3515 self._retreat(index) 3516 3517 this: t.Optional[exp.Expression] 3518 3519 if self._match(TokenType.DISTINCT): 3520 this = self.expression( 3521 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3522 ) 3523 else: 3524 this = self._parse_select_or_expression(alias=alias) 3525 3526 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3527 3528 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3529 index = self._index 3530 3531 if not self.errors: 3532 try: 3533 if self._parse_select(nested=True): 3534 return this 3535 except ParseError: 3536 pass 3537 finally: 3538 self.errors.clear() 3539 self._retreat(index) 3540 3541 if not self._match(TokenType.L_PAREN): 3542 return this 3543 3544 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3545 3546 self._match_r_paren() 3547 return self.expression(exp.Schema, this=this, expressions=args) 3548 3549 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3550 return self._parse_column_def(self._parse_field(any_token=True)) 3551 3552 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3553 # column defs are not really columns, they're identifiers 3554 if isinstance(this, exp.Column): 3555 this = this.this 3556 3557 kind = self._parse_types(schema=True) 3558 3559 if self._match_text_seq("FOR", "ORDINALITY"): 3560 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3561 3562 constraints: t.List[exp.Expression] = [] 3563 3564 if not kind and self._match(TokenType.ALIAS): 3565 constraints.append( 3566 self.expression( 3567 exp.ComputedColumnConstraint, 3568 this=self._parse_conjunction(), 3569 persisted=self._match_text_seq("PERSISTED"), 3570 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3571 ) 3572 ) 3573 3574 while True: 3575 constraint = self._parse_column_constraint() 3576 if not constraint: 3577 break 3578 constraints.append(constraint) 3579 3580 if not kind and not constraints: 3581 return this 3582 3583 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3584 3585 def _parse_auto_increment( 3586 self, 3587 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3588 start = None 3589 increment = None 3590 3591 if self._match(TokenType.L_PAREN, advance=False): 3592 args = self._parse_wrapped_csv(self._parse_bitwise) 3593 start = seq_get(args, 0) 3594 increment = seq_get(args, 1) 3595 elif self._match_text_seq("START"): 3596 start = self._parse_bitwise() 3597 self._match_text_seq("INCREMENT") 3598 increment = self._parse_bitwise() 3599 3600 if start and increment: 3601 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3602 3603 return exp.AutoIncrementColumnConstraint() 3604 3605 def _parse_compress(self) -> exp.CompressColumnConstraint: 3606 if self._match(TokenType.L_PAREN, advance=False): 3607 return self.expression( 3608 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3609 ) 3610 3611 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3612 3613 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3614 if self._match_text_seq("BY", "DEFAULT"): 3615 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3616 this = self.expression( 3617 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3618 ) 3619 else: 3620 self._match_text_seq("ALWAYS") 3621 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3622 3623 self._match(TokenType.ALIAS) 3624 identity = self._match_text_seq("IDENTITY") 3625 3626 if self._match(TokenType.L_PAREN): 3627 if self._match_text_seq("START", "WITH"): 3628 this.set("start", self._parse_bitwise()) 3629 if self._match_text_seq("INCREMENT", "BY"): 3630 this.set("increment", self._parse_bitwise()) 3631 if self._match_text_seq("MINVALUE"): 3632 this.set("minvalue", self._parse_bitwise()) 3633 if self._match_text_seq("MAXVALUE"): 3634 this.set("maxvalue", self._parse_bitwise()) 3635 3636 if self._match_text_seq("CYCLE"): 3637 this.set("cycle", True) 3638 elif self._match_text_seq("NO", "CYCLE"): 3639 this.set("cycle", False) 3640 3641 if not identity: 3642 this.set("expression", self._parse_bitwise()) 3643 3644 self._match_r_paren() 3645 3646 return this 3647 3648 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3649 self._match_text_seq("LENGTH") 3650 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3651 3652 def _parse_not_constraint( 3653 self, 3654 ) -> t.Optional[exp.Expression]: 3655 if self._match_text_seq("NULL"): 3656 return self.expression(exp.NotNullColumnConstraint) 3657 if self._match_text_seq("CASESPECIFIC"): 3658 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3659 if self._match_text_seq("FOR", "REPLICATION"): 3660 return self.expression(exp.NotForReplicationColumnConstraint) 3661 return None 3662 3663 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3664 if self._match(TokenType.CONSTRAINT): 3665 this = self._parse_id_var() 3666 else: 3667 this = None 3668 3669 if self._match_texts(self.CONSTRAINT_PARSERS): 3670 return self.expression( 3671 exp.ColumnConstraint, 3672 this=this, 3673 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3674 ) 3675 3676 return this 3677 3678 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3679 if not self._match(TokenType.CONSTRAINT): 3680 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3681 3682 this = self._parse_id_var() 3683 expressions = [] 3684 3685 while True: 3686 constraint = self._parse_unnamed_constraint() or self._parse_function() 3687 if not constraint: 3688 break 3689 expressions.append(constraint) 3690 3691 return self.expression(exp.Constraint, this=this, expressions=expressions) 3692 3693 def _parse_unnamed_constraint( 3694 self, constraints: t.Optional[t.Collection[str]] = None 3695 ) -> t.Optional[exp.Expression]: 3696 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3697 return None 3698 3699 constraint = self._prev.text.upper() 3700 if constraint not in self.CONSTRAINT_PARSERS: 3701 self.raise_error(f"No parser found for schema constraint {constraint}.") 3702 3703 return self.CONSTRAINT_PARSERS[constraint](self) 3704 3705 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3706 self._match_text_seq("KEY") 3707 return self.expression( 3708 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3709 ) 3710 3711 def _parse_key_constraint_options(self) -> t.List[str]: 3712 options = [] 3713 while True: 3714 if not self._curr: 3715 break 3716 3717 if self._match(TokenType.ON): 3718 action = None 3719 on = self._advance_any() and self._prev.text 3720 3721 if self._match_text_seq("NO", "ACTION"): 3722 action = "NO ACTION" 3723 elif self._match_text_seq("CASCADE"): 3724 action = "CASCADE" 3725 elif self._match_pair(TokenType.SET, TokenType.NULL): 3726 action = "SET NULL" 3727 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3728 action = "SET DEFAULT" 3729 else: 3730 self.raise_error("Invalid key constraint") 3731 3732 options.append(f"ON {on} {action}") 3733 elif self._match_text_seq("NOT", "ENFORCED"): 3734 options.append("NOT ENFORCED") 3735 elif self._match_text_seq("DEFERRABLE"): 3736 options.append("DEFERRABLE") 3737 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3738 options.append("INITIALLY DEFERRED") 3739 elif self._match_text_seq("NORELY"): 3740 options.append("NORELY") 3741 elif self._match_text_seq("MATCH", "FULL"): 3742 options.append("MATCH FULL") 3743 else: 3744 break 3745 3746 return options 3747 3748 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3749 if match and not self._match(TokenType.REFERENCES): 3750 return None 3751 3752 expressions = None 3753 this = self._parse_table(schema=True) 3754 options = self._parse_key_constraint_options() 3755 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3756 3757 def _parse_foreign_key(self) -> exp.ForeignKey: 3758 expressions = self._parse_wrapped_id_vars() 3759 reference = self._parse_references() 3760 options = {} 3761 3762 while self._match(TokenType.ON): 3763 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3764 self.raise_error("Expected DELETE or UPDATE") 3765 3766 kind = self._prev.text.lower() 3767 3768 if self._match_text_seq("NO", "ACTION"): 3769 action = "NO ACTION" 3770 elif self._match(TokenType.SET): 3771 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3772 action = "SET " + self._prev.text.upper() 3773 else: 3774 self._advance() 3775 action = self._prev.text.upper() 3776 3777 options[kind] = action 3778 3779 return self.expression( 3780 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3781 ) 3782 3783 def _parse_primary_key( 3784 self, wrapped_optional: bool = False, in_props: bool = False 3785 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3786 desc = ( 3787 self._match_set((TokenType.ASC, TokenType.DESC)) 3788 and self._prev.token_type == TokenType.DESC 3789 ) 3790 3791 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3792 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3793 3794 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3795 options = self._parse_key_constraint_options() 3796 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3797 3798 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3799 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3800 return this 3801 3802 bracket_kind = self._prev.token_type 3803 3804 if self._match(TokenType.COLON): 3805 expressions: t.List[exp.Expression] = [ 3806 self.expression(exp.Slice, expression=self._parse_conjunction()) 3807 ] 3808 else: 3809 expressions = self._parse_csv( 3810 lambda: self._parse_slice( 3811 self._parse_alias(self._parse_conjunction(), explicit=True) 3812 ) 3813 ) 3814 3815 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3816 if bracket_kind == TokenType.L_BRACE: 3817 this = self.expression(exp.Struct, expressions=expressions) 3818 elif not this or this.name.upper() == "ARRAY": 3819 this = self.expression(exp.Array, expressions=expressions) 3820 else: 3821 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3822 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3823 3824 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3825 self.raise_error("Expected ]") 3826 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3827 self.raise_error("Expected }") 3828 3829 self._add_comments(this) 3830 return self._parse_bracket(this) 3831 3832 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3833 if self._match(TokenType.COLON): 3834 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3835 return this 3836 3837 def _parse_case(self) -> t.Optional[exp.Expression]: 3838 ifs = [] 3839 default = None 3840 3841 comments = self._prev_comments 3842 expression = self._parse_conjunction() 3843 3844 while self._match(TokenType.WHEN): 3845 this = self._parse_conjunction() 3846 self._match(TokenType.THEN) 3847 then = self._parse_conjunction() 3848 ifs.append(self.expression(exp.If, this=this, true=then)) 3849 3850 if self._match(TokenType.ELSE): 3851 default = self._parse_conjunction() 3852 3853 if not self._match(TokenType.END): 3854 self.raise_error("Expected END after CASE", self._prev) 3855 3856 return self._parse_window( 3857 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3858 ) 3859 3860 def _parse_if(self) -> t.Optional[exp.Expression]: 3861 if self._match(TokenType.L_PAREN): 3862 args = self._parse_csv(self._parse_conjunction) 3863 this = self.validate_expression(exp.If.from_arg_list(args), args) 3864 self._match_r_paren() 3865 else: 3866 index = self._index - 1 3867 condition = self._parse_conjunction() 3868 3869 if not condition: 3870 self._retreat(index) 3871 return None 3872 3873 self._match(TokenType.THEN) 3874 true = self._parse_conjunction() 3875 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3876 self._match(TokenType.END) 3877 this = self.expression(exp.If, this=condition, true=true, false=false) 3878 3879 return self._parse_window(this) 3880 3881 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3882 if not self._match_text_seq("VALUE", "FOR"): 3883 self._retreat(self._index - 1) 3884 return None 3885 3886 return self.expression( 3887 exp.NextValueFor, 3888 this=self._parse_column(), 3889 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3890 ) 3891 3892 def _parse_extract(self) -> exp.Extract: 3893 this = self._parse_function() or self._parse_var() or self._parse_type() 3894 3895 if self._match(TokenType.FROM): 3896 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3897 3898 if not self._match(TokenType.COMMA): 3899 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3900 3901 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3902 3903 def _parse_any_value(self) -> exp.AnyValue: 3904 this = self._parse_lambda() 3905 is_max = None 3906 having = None 3907 3908 if self._match(TokenType.HAVING): 3909 self._match_texts(("MAX", "MIN")) 3910 is_max = self._prev.text == "MAX" 3911 having = self._parse_column() 3912 3913 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3914 3915 def _parse_cast(self, strict: bool) -> exp.Expression: 3916 this = self._parse_conjunction() 3917 3918 if not self._match(TokenType.ALIAS): 3919 if self._match(TokenType.COMMA): 3920 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3921 3922 self.raise_error("Expected AS after CAST") 3923 3924 fmt = None 3925 to = self._parse_types() 3926 3927 if not to: 3928 self.raise_error("Expected TYPE after CAST") 3929 elif isinstance(to, exp.Identifier): 3930 to = exp.DataType.build(to.name, udt=True) 3931 elif to.this == exp.DataType.Type.CHAR: 3932 if self._match(TokenType.CHARACTER_SET): 3933 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3934 elif self._match(TokenType.FORMAT): 3935 fmt_string = self._parse_string() 3936 fmt = self._parse_at_time_zone(fmt_string) 3937 3938 if to.this in exp.DataType.TEMPORAL_TYPES: 3939 this = self.expression( 3940 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3941 this=this, 3942 format=exp.Literal.string( 3943 format_time( 3944 fmt_string.this if fmt_string else "", 3945 self.FORMAT_MAPPING or self.TIME_MAPPING, 3946 self.FORMAT_TRIE or self.TIME_TRIE, 3947 ) 3948 ), 3949 ) 3950 3951 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3952 this.set("zone", fmt.args["zone"]) 3953 3954 return this 3955 3956 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3957 3958 def _parse_concat(self) -> t.Optional[exp.Expression]: 3959 args = self._parse_csv(self._parse_conjunction) 3960 if self.CONCAT_NULL_OUTPUTS_STRING: 3961 args = [ 3962 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3963 for arg in args 3964 if arg 3965 ] 3966 3967 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3968 # we find such a call we replace it with its argument. 3969 if len(args) == 1: 3970 return args[0] 3971 3972 return self.expression( 3973 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3974 ) 3975 3976 def _parse_string_agg(self) -> exp.Expression: 3977 if self._match(TokenType.DISTINCT): 3978 args: t.List[t.Optional[exp.Expression]] = [ 3979 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3980 ] 3981 if self._match(TokenType.COMMA): 3982 args.extend(self._parse_csv(self._parse_conjunction)) 3983 else: 3984 args = self._parse_csv(self._parse_conjunction) # type: ignore 3985 3986 index = self._index 3987 if not self._match(TokenType.R_PAREN) and args: 3988 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3989 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3990 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3991 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3992 3993 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3994 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3995 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3996 if not self._match_text_seq("WITHIN", "GROUP"): 3997 self._retreat(index) 3998 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3999 4000 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4001 order = self._parse_order(this=seq_get(args, 0)) 4002 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4003 4004 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4005 this = self._parse_bitwise() 4006 4007 if self._match(TokenType.USING): 4008 to: t.Optional[exp.Expression] = self.expression( 4009 exp.CharacterSet, this=self._parse_var() 4010 ) 4011 elif self._match(TokenType.COMMA): 4012 to = self._parse_types() 4013 else: 4014 to = None 4015 4016 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4017 4018 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4019 """ 4020 There are generally two variants of the DECODE function: 4021 4022 - DECODE(bin, charset) 4023 - DECODE(expression, search, result [, search, result] ... [, default]) 4024 4025 The second variant will always be parsed into a CASE expression. Note that NULL 4026 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4027 instead of relying on pattern matching. 4028 """ 4029 args = self._parse_csv(self._parse_conjunction) 4030 4031 if len(args) < 3: 4032 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4033 4034 expression, *expressions = args 4035 if not expression: 4036 return None 4037 4038 ifs = [] 4039 for search, result in zip(expressions[::2], expressions[1::2]): 4040 if not search or not result: 4041 return None 4042 4043 if isinstance(search, exp.Literal): 4044 ifs.append( 4045 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4046 ) 4047 elif isinstance(search, exp.Null): 4048 ifs.append( 4049 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4050 ) 4051 else: 4052 cond = exp.or_( 4053 exp.EQ(this=expression.copy(), expression=search), 4054 exp.and_( 4055 exp.Is(this=expression.copy(), expression=exp.Null()), 4056 exp.Is(this=search.copy(), expression=exp.Null()), 4057 copy=False, 4058 ), 4059 copy=False, 4060 ) 4061 ifs.append(exp.If(this=cond, true=result)) 4062 4063 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4064 4065 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4066 self._match_text_seq("KEY") 4067 key = self._parse_field() 4068 self._match(TokenType.COLON) 4069 self._match_text_seq("VALUE") 4070 value = self._parse_field() 4071 4072 if not key and not value: 4073 return None 4074 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4075 4076 def _parse_json_object(self) -> exp.JSONObject: 4077 star = self._parse_star() 4078 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4079 4080 null_handling = None 4081 if self._match_text_seq("NULL", "ON", "NULL"): 4082 null_handling = "NULL ON NULL" 4083 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4084 null_handling = "ABSENT ON NULL" 4085 4086 unique_keys = None 4087 if self._match_text_seq("WITH", "UNIQUE"): 4088 unique_keys = True 4089 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4090 unique_keys = False 4091 4092 self._match_text_seq("KEYS") 4093 4094 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4095 format_json = self._match_text_seq("FORMAT", "JSON") 4096 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4097 4098 return self.expression( 4099 exp.JSONObject, 4100 expressions=expressions, 4101 null_handling=null_handling, 4102 unique_keys=unique_keys, 4103 return_type=return_type, 4104 format_json=format_json, 4105 encoding=encoding, 4106 ) 4107 4108 def _parse_logarithm(self) -> exp.Func: 4109 # Default argument order is base, expression 4110 args = self._parse_csv(self._parse_range) 4111 4112 if len(args) > 1: 4113 if not self.LOG_BASE_FIRST: 4114 args.reverse() 4115 return exp.Log.from_arg_list(args) 4116 4117 return self.expression( 4118 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4119 ) 4120 4121 def _parse_match_against(self) -> exp.MatchAgainst: 4122 expressions = self._parse_csv(self._parse_column) 4123 4124 self._match_text_seq(")", "AGAINST", "(") 4125 4126 this = self._parse_string() 4127 4128 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4129 modifier = "IN NATURAL LANGUAGE MODE" 4130 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4131 modifier = f"{modifier} WITH QUERY EXPANSION" 4132 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4133 modifier = "IN BOOLEAN MODE" 4134 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4135 modifier = "WITH QUERY EXPANSION" 4136 else: 4137 modifier = None 4138 4139 return self.expression( 4140 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4141 ) 4142 4143 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4144 def _parse_open_json(self) -> exp.OpenJSON: 4145 this = self._parse_bitwise() 4146 path = self._match(TokenType.COMMA) and self._parse_string() 4147 4148 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4149 this = self._parse_field(any_token=True) 4150 kind = self._parse_types() 4151 path = self._parse_string() 4152 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4153 4154 return self.expression( 4155 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4156 ) 4157 4158 expressions = None 4159 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4160 self._match_l_paren() 4161 expressions = self._parse_csv(_parse_open_json_column_def) 4162 4163 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4164 4165 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4166 args = self._parse_csv(self._parse_bitwise) 4167 4168 if self._match(TokenType.IN): 4169 return self.expression( 4170 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4171 ) 4172 4173 if haystack_first: 4174 haystack = seq_get(args, 0) 4175 needle = seq_get(args, 1) 4176 else: 4177 needle = seq_get(args, 0) 4178 haystack = seq_get(args, 1) 4179 4180 return self.expression( 4181 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4182 ) 4183 4184 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4185 args = self._parse_csv(self._parse_table) 4186 return exp.JoinHint(this=func_name.upper(), expressions=args) 4187 4188 def _parse_substring(self) -> exp.Substring: 4189 # Postgres supports the form: substring(string [from int] [for int]) 4190 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4191 4192 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4193 4194 if self._match(TokenType.FROM): 4195 args.append(self._parse_bitwise()) 4196 if self._match(TokenType.FOR): 4197 args.append(self._parse_bitwise()) 4198 4199 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4200 4201 def _parse_trim(self) -> exp.Trim: 4202 # https://www.w3resource.com/sql/character-functions/trim.php 4203 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4204 4205 position = None 4206 collation = None 4207 4208 if self._match_texts(self.TRIM_TYPES): 4209 position = self._prev.text.upper() 4210 4211 expression = self._parse_bitwise() 4212 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4213 this = self._parse_bitwise() 4214 else: 4215 this = expression 4216 expression = None 4217 4218 if self._match(TokenType.COLLATE): 4219 collation = self._parse_bitwise() 4220 4221 return self.expression( 4222 exp.Trim, this=this, position=position, expression=expression, collation=collation 4223 ) 4224 4225 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4226 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4227 4228 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4229 return self._parse_window(self._parse_id_var(), alias=True) 4230 4231 def _parse_respect_or_ignore_nulls( 4232 self, this: t.Optional[exp.Expression] 4233 ) -> t.Optional[exp.Expression]: 4234 if self._match_text_seq("IGNORE", "NULLS"): 4235 return self.expression(exp.IgnoreNulls, this=this) 4236 if self._match_text_seq("RESPECT", "NULLS"): 4237 return self.expression(exp.RespectNulls, this=this) 4238 return this 4239 4240 def _parse_window( 4241 self, this: t.Optional[exp.Expression], alias: bool = False 4242 ) -> t.Optional[exp.Expression]: 4243 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4244 self._match(TokenType.WHERE) 4245 this = self.expression( 4246 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4247 ) 4248 self._match_r_paren() 4249 4250 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4251 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4252 if self._match_text_seq("WITHIN", "GROUP"): 4253 order = self._parse_wrapped(self._parse_order) 4254 this = self.expression(exp.WithinGroup, this=this, expression=order) 4255 4256 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4257 # Some dialects choose to implement and some do not. 4258 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4259 4260 # There is some code above in _parse_lambda that handles 4261 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4262 4263 # The below changes handle 4264 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4265 4266 # Oracle allows both formats 4267 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4268 # and Snowflake chose to do the same for familiarity 4269 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4270 this = self._parse_respect_or_ignore_nulls(this) 4271 4272 # bigquery select from window x AS (partition by ...) 4273 if alias: 4274 over = None 4275 self._match(TokenType.ALIAS) 4276 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4277 return this 4278 else: 4279 over = self._prev.text.upper() 4280 4281 if not self._match(TokenType.L_PAREN): 4282 return self.expression( 4283 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4284 ) 4285 4286 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4287 4288 first = self._match(TokenType.FIRST) 4289 if self._match_text_seq("LAST"): 4290 first = False 4291 4292 partition, order = self._parse_partition_and_order() 4293 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4294 4295 if kind: 4296 self._match(TokenType.BETWEEN) 4297 start = self._parse_window_spec() 4298 self._match(TokenType.AND) 4299 end = self._parse_window_spec() 4300 4301 spec = self.expression( 4302 exp.WindowSpec, 4303 kind=kind, 4304 start=start["value"], 4305 start_side=start["side"], 4306 end=end["value"], 4307 end_side=end["side"], 4308 ) 4309 else: 4310 spec = None 4311 4312 self._match_r_paren() 4313 4314 window = self.expression( 4315 exp.Window, 4316 this=this, 4317 partition_by=partition, 4318 order=order, 4319 spec=spec, 4320 alias=window_alias, 4321 over=over, 4322 first=first, 4323 ) 4324 4325 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4326 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4327 return self._parse_window(window, alias=alias) 4328 4329 return window 4330 4331 def _parse_partition_and_order( 4332 self, 4333 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4334 return self._parse_partition_by(), self._parse_order() 4335 4336 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4337 self._match(TokenType.BETWEEN) 4338 4339 return { 4340 "value": ( 4341 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4342 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4343 or self._parse_bitwise() 4344 ), 4345 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4346 } 4347 4348 def _parse_alias( 4349 self, this: t.Optional[exp.Expression], explicit: bool = False 4350 ) -> t.Optional[exp.Expression]: 4351 any_token = self._match(TokenType.ALIAS) 4352 4353 if explicit and not any_token: 4354 return this 4355 4356 if self._match(TokenType.L_PAREN): 4357 aliases = self.expression( 4358 exp.Aliases, 4359 this=this, 4360 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4361 ) 4362 self._match_r_paren(aliases) 4363 return aliases 4364 4365 alias = self._parse_id_var(any_token) 4366 4367 if alias: 4368 return self.expression(exp.Alias, this=this, alias=alias) 4369 4370 return this 4371 4372 def _parse_id_var( 4373 self, 4374 any_token: bool = True, 4375 tokens: t.Optional[t.Collection[TokenType]] = None, 4376 ) -> t.Optional[exp.Expression]: 4377 identifier = self._parse_identifier() 4378 4379 if identifier: 4380 return identifier 4381 4382 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4383 quoted = self._prev.token_type == TokenType.STRING 4384 return exp.Identifier(this=self._prev.text, quoted=quoted) 4385 4386 return None 4387 4388 def _parse_string(self) -> t.Optional[exp.Expression]: 4389 if self._match(TokenType.STRING): 4390 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4391 return self._parse_placeholder() 4392 4393 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4394 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4395 4396 def _parse_number(self) -> t.Optional[exp.Expression]: 4397 if self._match(TokenType.NUMBER): 4398 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4399 return self._parse_placeholder() 4400 4401 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4402 if self._match(TokenType.IDENTIFIER): 4403 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4404 return self._parse_placeholder() 4405 4406 def _parse_var( 4407 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4408 ) -> t.Optional[exp.Expression]: 4409 if ( 4410 (any_token and self._advance_any()) 4411 or self._match(TokenType.VAR) 4412 or (self._match_set(tokens) if tokens else False) 4413 ): 4414 return self.expression(exp.Var, this=self._prev.text) 4415 return self._parse_placeholder() 4416 4417 def _advance_any(self) -> t.Optional[Token]: 4418 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4419 self._advance() 4420 return self._prev 4421 return None 4422 4423 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4424 return self._parse_var() or self._parse_string() 4425 4426 def _parse_null(self) -> t.Optional[exp.Expression]: 4427 if self._match(TokenType.NULL): 4428 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4429 return self._parse_placeholder() 4430 4431 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4432 if self._match(TokenType.TRUE): 4433 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4434 if self._match(TokenType.FALSE): 4435 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4436 return self._parse_placeholder() 4437 4438 def _parse_star(self) -> t.Optional[exp.Expression]: 4439 if self._match(TokenType.STAR): 4440 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4441 return self._parse_placeholder() 4442 4443 def _parse_parameter(self) -> exp.Parameter: 4444 wrapped = self._match(TokenType.L_BRACE) 4445 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4446 self._match(TokenType.R_BRACE) 4447 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4448 4449 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4450 if self._match_set(self.PLACEHOLDER_PARSERS): 4451 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4452 if placeholder: 4453 return placeholder 4454 self._advance(-1) 4455 return None 4456 4457 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4458 if not self._match(TokenType.EXCEPT): 4459 return None 4460 if self._match(TokenType.L_PAREN, advance=False): 4461 return self._parse_wrapped_csv(self._parse_column) 4462 return self._parse_csv(self._parse_column) 4463 4464 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4465 if not self._match(TokenType.REPLACE): 4466 return None 4467 if self._match(TokenType.L_PAREN, advance=False): 4468 return self._parse_wrapped_csv(self._parse_expression) 4469 return self._parse_expressions() 4470 4471 def _parse_csv( 4472 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4473 ) -> t.List[exp.Expression]: 4474 parse_result = parse_method() 4475 items = [parse_result] if parse_result is not None else [] 4476 4477 while self._match(sep): 4478 self._add_comments(parse_result) 4479 parse_result = parse_method() 4480 if parse_result is not None: 4481 items.append(parse_result) 4482 4483 return items 4484 4485 def _parse_tokens( 4486 self, parse_method: t.Callable, expressions: t.Dict 4487 ) -> t.Optional[exp.Expression]: 4488 this = parse_method() 4489 4490 while self._match_set(expressions): 4491 this = self.expression( 4492 expressions[self._prev.token_type], 4493 this=this, 4494 comments=self._prev_comments, 4495 expression=parse_method(), 4496 ) 4497 4498 return this 4499 4500 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4501 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4502 4503 def _parse_wrapped_csv( 4504 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4505 ) -> t.List[exp.Expression]: 4506 return self._parse_wrapped( 4507 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4508 ) 4509 4510 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4511 wrapped = self._match(TokenType.L_PAREN) 4512 if not wrapped and not optional: 4513 self.raise_error("Expecting (") 4514 parse_result = parse_method() 4515 if wrapped: 4516 self._match_r_paren() 4517 return parse_result 4518 4519 def _parse_expressions(self) -> t.List[exp.Expression]: 4520 return self._parse_csv(self._parse_expression) 4521 4522 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4523 return self._parse_select() or self._parse_set_operations( 4524 self._parse_expression() if alias else self._parse_conjunction() 4525 ) 4526 4527 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4528 return self._parse_query_modifiers( 4529 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4530 ) 4531 4532 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4533 this = None 4534 if self._match_texts(self.TRANSACTION_KIND): 4535 this = self._prev.text 4536 4537 self._match_texts({"TRANSACTION", "WORK"}) 4538 4539 modes = [] 4540 while True: 4541 mode = [] 4542 while self._match(TokenType.VAR): 4543 mode.append(self._prev.text) 4544 4545 if mode: 4546 modes.append(" ".join(mode)) 4547 if not self._match(TokenType.COMMA): 4548 break 4549 4550 return self.expression(exp.Transaction, this=this, modes=modes) 4551 4552 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4553 chain = None 4554 savepoint = None 4555 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4556 4557 self._match_texts({"TRANSACTION", "WORK"}) 4558 4559 if self._match_text_seq("TO"): 4560 self._match_text_seq("SAVEPOINT") 4561 savepoint = self._parse_id_var() 4562 4563 if self._match(TokenType.AND): 4564 chain = not self._match_text_seq("NO") 4565 self._match_text_seq("CHAIN") 4566 4567 if is_rollback: 4568 return self.expression(exp.Rollback, savepoint=savepoint) 4569 4570 return self.expression(exp.Commit, chain=chain) 4571 4572 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4573 if not self._match_text_seq("ADD"): 4574 return None 4575 4576 self._match(TokenType.COLUMN) 4577 exists_column = self._parse_exists(not_=True) 4578 expression = self._parse_field_def() 4579 4580 if expression: 4581 expression.set("exists", exists_column) 4582 4583 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4584 if self._match_texts(("FIRST", "AFTER")): 4585 position = self._prev.text 4586 column_position = self.expression( 4587 exp.ColumnPosition, this=self._parse_column(), position=position 4588 ) 4589 expression.set("position", column_position) 4590 4591 return expression 4592 4593 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4594 drop = self._match(TokenType.DROP) and self._parse_drop() 4595 if drop and not isinstance(drop, exp.Command): 4596 drop.set("kind", drop.args.get("kind", "COLUMN")) 4597 return drop 4598 4599 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4600 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4601 return self.expression( 4602 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4603 ) 4604 4605 def _parse_add_constraint(self) -> exp.AddConstraint: 4606 this = None 4607 kind = self._prev.token_type 4608 4609 if kind == TokenType.CONSTRAINT: 4610 this = self._parse_id_var() 4611 4612 if self._match_text_seq("CHECK"): 4613 expression = self._parse_wrapped(self._parse_conjunction) 4614 enforced = self._match_text_seq("ENFORCED") 4615 4616 return self.expression( 4617 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4618 ) 4619 4620 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4621 expression = self._parse_foreign_key() 4622 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4623 expression = self._parse_primary_key() 4624 else: 4625 expression = None 4626 4627 return self.expression(exp.AddConstraint, this=this, expression=expression) 4628 4629 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4630 index = self._index - 1 4631 4632 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4633 return self._parse_csv(self._parse_add_constraint) 4634 4635 self._retreat(index) 4636 return self._parse_csv(self._parse_add_column) 4637 4638 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4639 self._match(TokenType.COLUMN) 4640 column = self._parse_field(any_token=True) 4641 4642 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4643 return self.expression(exp.AlterColumn, this=column, drop=True) 4644 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4645 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4646 4647 self._match_text_seq("SET", "DATA") 4648 return self.expression( 4649 exp.AlterColumn, 4650 this=column, 4651 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4652 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4653 using=self._match(TokenType.USING) and self._parse_conjunction(), 4654 ) 4655 4656 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4657 index = self._index - 1 4658 4659 partition_exists = self._parse_exists() 4660 if self._match(TokenType.PARTITION, advance=False): 4661 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4662 4663 self._retreat(index) 4664 return self._parse_csv(self._parse_drop_column) 4665 4666 def _parse_alter_table_rename(self) -> exp.RenameTable: 4667 self._match_text_seq("TO") 4668 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4669 4670 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4671 start = self._prev 4672 4673 if not self._match(TokenType.TABLE): 4674 return self._parse_as_command(start) 4675 4676 exists = self._parse_exists() 4677 this = self._parse_table(schema=True) 4678 4679 if self._next: 4680 self._advance() 4681 4682 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4683 if parser: 4684 actions = ensure_list(parser(self)) 4685 4686 if not self._curr: 4687 return self.expression( 4688 exp.AlterTable, 4689 this=this, 4690 exists=exists, 4691 actions=actions, 4692 ) 4693 return self._parse_as_command(start) 4694 4695 def _parse_merge(self) -> exp.Merge: 4696 self._match(TokenType.INTO) 4697 target = self._parse_table() 4698 4699 if target and self._match(TokenType.ALIAS, advance=False): 4700 target.set("alias", self._parse_table_alias()) 4701 4702 self._match(TokenType.USING) 4703 using = self._parse_table() 4704 4705 self._match(TokenType.ON) 4706 on = self._parse_conjunction() 4707 4708 whens = [] 4709 while self._match(TokenType.WHEN): 4710 matched = not self._match(TokenType.NOT) 4711 self._match_text_seq("MATCHED") 4712 source = ( 4713 False 4714 if self._match_text_seq("BY", "TARGET") 4715 else self._match_text_seq("BY", "SOURCE") 4716 ) 4717 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4718 4719 self._match(TokenType.THEN) 4720 4721 if self._match(TokenType.INSERT): 4722 _this = self._parse_star() 4723 if _this: 4724 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4725 else: 4726 then = self.expression( 4727 exp.Insert, 4728 this=self._parse_value(), 4729 expression=self._match(TokenType.VALUES) and self._parse_value(), 4730 ) 4731 elif self._match(TokenType.UPDATE): 4732 expressions = self._parse_star() 4733 if expressions: 4734 then = self.expression(exp.Update, expressions=expressions) 4735 else: 4736 then = self.expression( 4737 exp.Update, 4738 expressions=self._match(TokenType.SET) 4739 and self._parse_csv(self._parse_equality), 4740 ) 4741 elif self._match(TokenType.DELETE): 4742 then = self.expression(exp.Var, this=self._prev.text) 4743 else: 4744 then = None 4745 4746 whens.append( 4747 self.expression( 4748 exp.When, 4749 matched=matched, 4750 source=source, 4751 condition=condition, 4752 then=then, 4753 ) 4754 ) 4755 4756 return self.expression( 4757 exp.Merge, 4758 this=target, 4759 using=using, 4760 on=on, 4761 expressions=whens, 4762 ) 4763 4764 def _parse_show(self) -> t.Optional[exp.Expression]: 4765 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4766 if parser: 4767 return parser(self) 4768 self._advance() 4769 return self.expression(exp.Show, this=self._prev.text.upper()) 4770 4771 def _parse_set_item_assignment( 4772 self, kind: t.Optional[str] = None 4773 ) -> t.Optional[exp.Expression]: 4774 index = self._index 4775 4776 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4777 return self._parse_set_transaction(global_=kind == "GLOBAL") 4778 4779 left = self._parse_primary() or self._parse_id_var() 4780 4781 if not self._match_texts(("=", "TO")): 4782 self._retreat(index) 4783 return None 4784 4785 right = self._parse_statement() or self._parse_id_var() 4786 this = self.expression(exp.EQ, this=left, expression=right) 4787 4788 return self.expression(exp.SetItem, this=this, kind=kind) 4789 4790 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4791 self._match_text_seq("TRANSACTION") 4792 characteristics = self._parse_csv( 4793 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4794 ) 4795 return self.expression( 4796 exp.SetItem, 4797 expressions=characteristics, 4798 kind="TRANSACTION", 4799 **{"global": global_}, # type: ignore 4800 ) 4801 4802 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4803 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4804 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4805 4806 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4807 index = self._index 4808 set_ = self.expression( 4809 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4810 ) 4811 4812 if self._curr: 4813 self._retreat(index) 4814 return self._parse_as_command(self._prev) 4815 4816 return set_ 4817 4818 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4819 for option in options: 4820 if self._match_text_seq(*option.split(" ")): 4821 return exp.var(option) 4822 return None 4823 4824 def _parse_as_command(self, start: Token) -> exp.Command: 4825 while self._curr: 4826 self._advance() 4827 text = self._find_sql(start, self._prev) 4828 size = len(start.text) 4829 return exp.Command(this=text[:size], expression=text[size:]) 4830 4831 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4832 settings = [] 4833 4834 self._match_l_paren() 4835 kind = self._parse_id_var() 4836 4837 if self._match(TokenType.L_PAREN): 4838 while True: 4839 key = self._parse_id_var() 4840 value = self._parse_primary() 4841 4842 if not key and value is None: 4843 break 4844 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4845 self._match(TokenType.R_PAREN) 4846 4847 self._match_r_paren() 4848 4849 return self.expression( 4850 exp.DictProperty, 4851 this=this, 4852 kind=kind.this if kind else None, 4853 settings=settings, 4854 ) 4855 4856 def _parse_dict_range(self, this: str) -> exp.DictRange: 4857 self._match_l_paren() 4858 has_min = self._match_text_seq("MIN") 4859 if has_min: 4860 min = self._parse_var() or self._parse_primary() 4861 self._match_text_seq("MAX") 4862 max = self._parse_var() or self._parse_primary() 4863 else: 4864 max = self._parse_var() or self._parse_primary() 4865 min = exp.Literal.number(0) 4866 self._match_r_paren() 4867 return self.expression(exp.DictRange, this=this, min=min, max=max) 4868 4869 def _find_parser( 4870 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4871 ) -> t.Optional[t.Callable]: 4872 if not self._curr: 4873 return None 4874 4875 index = self._index 4876 this = [] 4877 while True: 4878 # The current token might be multiple words 4879 curr = self._curr.text.upper() 4880 key = curr.split(" ") 4881 this.append(curr) 4882 4883 self._advance() 4884 result, trie = in_trie(trie, key) 4885 if result == TrieResult.FAILED: 4886 break 4887 4888 if result == TrieResult.EXISTS: 4889 subparser = parsers[" ".join(this)] 4890 return subparser 4891 4892 self._retreat(index) 4893 return None 4894 4895 def _match(self, token_type, advance=True, expression=None): 4896 if not self._curr: 4897 return None 4898 4899 if self._curr.token_type == token_type: 4900 if advance: 4901 self._advance() 4902 self._add_comments(expression) 4903 return True 4904 4905 return None 4906 4907 def _match_set(self, types, advance=True): 4908 if not self._curr: 4909 return None 4910 4911 if self._curr.token_type in types: 4912 if advance: 4913 self._advance() 4914 return True 4915 4916 return None 4917 4918 def _match_pair(self, token_type_a, token_type_b, advance=True): 4919 if not self._curr or not self._next: 4920 return None 4921 4922 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4923 if advance: 4924 self._advance(2) 4925 return True 4926 4927 return None 4928 4929 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4930 if not self._match(TokenType.L_PAREN, expression=expression): 4931 self.raise_error("Expecting (") 4932 4933 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4934 if not self._match(TokenType.R_PAREN, expression=expression): 4935 self.raise_error("Expecting )") 4936 4937 def _match_texts(self, texts, advance=True): 4938 if self._curr and self._curr.text.upper() in texts: 4939 if advance: 4940 self._advance() 4941 return True 4942 return False 4943 4944 def _match_text_seq(self, *texts, advance=True): 4945 index = self._index 4946 for text in texts: 4947 if self._curr and self._curr.text.upper() == text: 4948 self._advance() 4949 else: 4950 self._retreat(index) 4951 return False 4952 4953 if not advance: 4954 self._retreat(index) 4955 4956 return True 4957 4958 @t.overload 4959 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4960 ... 4961 4962 @t.overload 4963 def _replace_columns_with_dots( 4964 self, this: t.Optional[exp.Expression] 4965 ) -> t.Optional[exp.Expression]: 4966 ... 4967 4968 def _replace_columns_with_dots(self, this): 4969 if isinstance(this, exp.Dot): 4970 exp.replace_children(this, self._replace_columns_with_dots) 4971 elif isinstance(this, exp.Column): 4972 exp.replace_children(this, self._replace_columns_with_dots) 4973 table = this.args.get("table") 4974 this = ( 4975 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4976 ) 4977 4978 return this 4979 4980 def _replace_lambda( 4981 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4982 ) -> t.Optional[exp.Expression]: 4983 if not node: 4984 return node 4985 4986 for column in node.find_all(exp.Column): 4987 if column.parts[0].name in lambda_variables: 4988 dot_or_id = column.to_dot() if column.table else column.this 4989 parent = column.parent 4990 4991 while isinstance(parent, exp.Dot): 4992 if not isinstance(parent.parent, exp.Dot): 4993 parent.replace(dot_or_id) 4994 break 4995 parent = parent.parent 4996 else: 4997 if column is node: 4998 node = dot_or_id 4999 else: 5000 column.replace(dot_or_id) 5001 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
883 def __init__( 884 self, 885 error_level: t.Optional[ErrorLevel] = None, 886 error_message_context: int = 100, 887 max_errors: int = 3, 888 ): 889 self.error_level = error_level or ErrorLevel.IMMEDIATE 890 self.error_message_context = error_message_context 891 self.max_errors = max_errors 892 self._tokenizer = self.TOKENIZER_CLASS() 893 self.reset()
905 def parse( 906 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 907 ) -> t.List[t.Optional[exp.Expression]]: 908 """ 909 Parses a list of tokens and returns a list of syntax trees, one tree 910 per parsed SQL statement. 911 912 Args: 913 raw_tokens: The list of tokens. 914 sql: The original SQL string, used to produce helpful debug messages. 915 916 Returns: 917 The list of the produced syntax trees. 918 """ 919 return self._parse( 920 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 921 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
923 def parse_into( 924 self, 925 expression_types: exp.IntoType, 926 raw_tokens: t.List[Token], 927 sql: t.Optional[str] = None, 928 ) -> t.List[t.Optional[exp.Expression]]: 929 """ 930 Parses a list of tokens into a given Expression type. If a collection of Expression 931 types is given instead, this method will try to parse the token list into each one 932 of them, stopping at the first for which the parsing succeeds. 933 934 Args: 935 expression_types: The expression type(s) to try and parse the token list into. 936 raw_tokens: The list of tokens. 937 sql: The original SQL string, used to produce helpful debug messages. 938 939 Returns: 940 The target Expression. 941 """ 942 errors = [] 943 for expression_type in ensure_list(expression_types): 944 parser = self.EXPRESSION_PARSERS.get(expression_type) 945 if not parser: 946 raise TypeError(f"No parser registered for {expression_type}") 947 948 try: 949 return self._parse(parser, raw_tokens, sql) 950 except ParseError as e: 951 e.errors[0]["into_expression"] = expression_type 952 errors.append(e) 953 954 raise ParseError( 955 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 956 errors=merge_errors(errors), 957 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
994 def check_errors(self) -> None: 995 """Logs or raises any found errors, depending on the chosen error level setting.""" 996 if self.error_level == ErrorLevel.WARN: 997 for error in self.errors: 998 logger.error(str(error)) 999 elif self.error_level == ErrorLevel.RAISE and self.errors: 1000 raise ParseError( 1001 concat_messages(self.errors, self.max_errors), 1002 errors=merge_errors(self.errors), 1003 )
Logs or raises any found errors, depending on the chosen error level setting.
1005 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1006 """ 1007 Appends an error in the list of recorded errors or raises it, depending on the chosen 1008 error level setting. 1009 """ 1010 token = token or self._curr or self._prev or Token.string("") 1011 start = token.start 1012 end = token.end + 1 1013 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1014 highlight = self.sql[start:end] 1015 end_context = self.sql[end : end + self.error_message_context] 1016 1017 error = ParseError.new( 1018 f"{message}. Line {token.line}, Col: {token.col}.\n" 1019 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1020 description=message, 1021 line=token.line, 1022 col=token.col, 1023 start_context=start_context, 1024 highlight=highlight, 1025 end_context=end_context, 1026 ) 1027 1028 if self.error_level == ErrorLevel.IMMEDIATE: 1029 raise error 1030 1031 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1033 def expression( 1034 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1035 ) -> E: 1036 """ 1037 Creates a new, validated Expression. 1038 1039 Args: 1040 exp_class: The expression class to instantiate. 1041 comments: An optional list of comments to attach to the expression. 1042 kwargs: The arguments to set for the expression along with their respective values. 1043 1044 Returns: 1045 The target expression. 1046 """ 1047 instance = exp_class(**kwargs) 1048 instance.add_comments(comments) if comments else self._add_comments(instance) 1049 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1056 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1057 """ 1058 Validates an Expression, making sure that all its mandatory arguments are set. 1059 1060 Args: 1061 expression: The expression to validate. 1062 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1063 1064 Returns: 1065 The validated expression. 1066 """ 1067 if self.error_level != ErrorLevel.IGNORE: 1068 for error_message in expression.error_messages(args): 1069 self.raise_error(error_message) 1070 1071 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.