sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.FIXEDSTRING, 140 TokenType.FLOAT, 141 TokenType.DOUBLE, 142 TokenType.CHAR, 143 TokenType.NCHAR, 144 TokenType.VARCHAR, 145 TokenType.NVARCHAR, 146 TokenType.TEXT, 147 TokenType.MEDIUMTEXT, 148 TokenType.LONGTEXT, 149 TokenType.MEDIUMBLOB, 150 TokenType.LONGBLOB, 151 TokenType.BINARY, 152 TokenType.VARBINARY, 153 TokenType.JSON, 154 TokenType.JSONB, 155 TokenType.INTERVAL, 156 TokenType.TIME, 157 TokenType.TIMETZ, 158 TokenType.TIMESTAMP, 159 TokenType.TIMESTAMPTZ, 160 TokenType.TIMESTAMPLTZ, 161 TokenType.DATETIME, 162 TokenType.DATETIME64, 163 TokenType.DATE, 164 TokenType.INT4RANGE, 165 TokenType.INT4MULTIRANGE, 166 TokenType.INT8RANGE, 167 TokenType.INT8MULTIRANGE, 168 TokenType.NUMRANGE, 169 TokenType.NUMMULTIRANGE, 170 TokenType.TSRANGE, 171 TokenType.TSMULTIRANGE, 172 TokenType.TSTZRANGE, 173 TokenType.TSTZMULTIRANGE, 174 TokenType.DATERANGE, 175 TokenType.DATEMULTIRANGE, 176 TokenType.DECIMAL, 177 TokenType.BIGDECIMAL, 178 TokenType.UUID, 179 TokenType.GEOGRAPHY, 180 TokenType.GEOMETRY, 181 TokenType.HLLSKETCH, 182 TokenType.HSTORE, 183 TokenType.PSEUDO_TYPE, 184 TokenType.SUPER, 185 TokenType.SERIAL, 186 TokenType.SMALLSERIAL, 187 TokenType.BIGSERIAL, 188 TokenType.XML, 189 TokenType.UNIQUEIDENTIFIER, 190 TokenType.USERDEFINED, 191 TokenType.MONEY, 192 TokenType.SMALLMONEY, 193 TokenType.ROWVERSION, 194 TokenType.IMAGE, 195 TokenType.VARIANT, 196 TokenType.OBJECT, 197 TokenType.INET, 198 TokenType.IPADDRESS, 199 TokenType.IPPREFIX, 200 *ENUM_TYPE_TOKENS, 201 *NESTED_TYPE_TOKENS, 202 } 203 204 SUBQUERY_PREDICATES = { 205 TokenType.ANY: exp.Any, 206 TokenType.ALL: exp.All, 207 TokenType.EXISTS: exp.Exists, 208 TokenType.SOME: exp.Any, 209 } 210 211 RESERVED_KEYWORDS = { 212 *Tokenizer.SINGLE_TOKENS.values(), 213 TokenType.SELECT, 214 } 215 216 DB_CREATABLES = { 217 TokenType.DATABASE, 218 TokenType.SCHEMA, 219 TokenType.TABLE, 220 TokenType.VIEW, 221 TokenType.DICTIONARY, 222 } 223 224 CREATABLES = { 225 TokenType.COLUMN, 226 TokenType.FUNCTION, 227 TokenType.INDEX, 228 TokenType.PROCEDURE, 229 *DB_CREATABLES, 230 } 231 232 # Tokens that can represent identifiers 233 ID_VAR_TOKENS = { 234 TokenType.VAR, 235 TokenType.ANTI, 236 TokenType.APPLY, 237 TokenType.ASC, 238 TokenType.AUTO_INCREMENT, 239 TokenType.BEGIN, 240 TokenType.CACHE, 241 TokenType.CASE, 242 TokenType.COLLATE, 243 TokenType.COMMAND, 244 TokenType.COMMENT, 245 TokenType.COMMIT, 246 TokenType.CONSTRAINT, 247 TokenType.DEFAULT, 248 TokenType.DELETE, 249 TokenType.DESC, 250 TokenType.DESCRIBE, 251 TokenType.DICTIONARY, 252 TokenType.DIV, 253 TokenType.END, 254 TokenType.EXECUTE, 255 TokenType.ESCAPE, 256 TokenType.FALSE, 257 TokenType.FIRST, 258 TokenType.FILTER, 259 TokenType.FORMAT, 260 TokenType.FULL, 261 TokenType.IS, 262 TokenType.ISNULL, 263 TokenType.INTERVAL, 264 TokenType.KEEP, 265 TokenType.LEFT, 266 TokenType.LOAD, 267 TokenType.MERGE, 268 TokenType.NATURAL, 269 TokenType.NEXT, 270 TokenType.OFFSET, 271 TokenType.ORDINALITY, 272 TokenType.OVERWRITE, 273 TokenType.PARTITION, 274 TokenType.PERCENT, 275 TokenType.PIVOT, 276 TokenType.PRAGMA, 277 TokenType.RANGE, 278 TokenType.REFERENCES, 279 TokenType.RIGHT, 280 TokenType.ROW, 281 TokenType.ROWS, 282 TokenType.SEMI, 283 TokenType.SET, 284 TokenType.SETTINGS, 285 TokenType.SHOW, 286 TokenType.TEMPORARY, 287 TokenType.TOP, 288 TokenType.TRUE, 289 TokenType.UNIQUE, 290 TokenType.UNPIVOT, 291 TokenType.UPDATE, 292 TokenType.VOLATILE, 293 TokenType.WINDOW, 294 *CREATABLES, 295 *SUBQUERY_PREDICATES, 296 *TYPE_TOKENS, 297 *NO_PAREN_FUNCTIONS, 298 } 299 300 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 301 302 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 303 TokenType.APPLY, 304 TokenType.ASOF, 305 TokenType.FULL, 306 TokenType.LEFT, 307 TokenType.LOCK, 308 TokenType.NATURAL, 309 TokenType.OFFSET, 310 TokenType.RIGHT, 311 TokenType.WINDOW, 312 } 313 314 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 315 316 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 317 318 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 319 320 FUNC_TOKENS = { 321 TokenType.COMMAND, 322 TokenType.CURRENT_DATE, 323 TokenType.CURRENT_DATETIME, 324 TokenType.CURRENT_TIMESTAMP, 325 TokenType.CURRENT_TIME, 326 TokenType.CURRENT_USER, 327 TokenType.FILTER, 328 TokenType.FIRST, 329 TokenType.FORMAT, 330 TokenType.GLOB, 331 TokenType.IDENTIFIER, 332 TokenType.INDEX, 333 TokenType.ISNULL, 334 TokenType.ILIKE, 335 TokenType.LIKE, 336 TokenType.MERGE, 337 TokenType.OFFSET, 338 TokenType.PRIMARY_KEY, 339 TokenType.RANGE, 340 TokenType.REPLACE, 341 TokenType.RLIKE, 342 TokenType.ROW, 343 TokenType.UNNEST, 344 TokenType.VAR, 345 TokenType.LEFT, 346 TokenType.RIGHT, 347 TokenType.DATE, 348 TokenType.DATETIME, 349 TokenType.TABLE, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMPTZ, 352 TokenType.WINDOW, 353 TokenType.XOR, 354 *TYPE_TOKENS, 355 *SUBQUERY_PREDICATES, 356 } 357 358 CONJUNCTION = { 359 TokenType.AND: exp.And, 360 TokenType.OR: exp.Or, 361 } 362 363 EQUALITY = { 364 TokenType.EQ: exp.EQ, 365 TokenType.NEQ: exp.NEQ, 366 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 367 } 368 369 COMPARISON = { 370 TokenType.GT: exp.GT, 371 TokenType.GTE: exp.GTE, 372 TokenType.LT: exp.LT, 373 TokenType.LTE: exp.LTE, 374 } 375 376 BITWISE = { 377 TokenType.AMP: exp.BitwiseAnd, 378 TokenType.CARET: exp.BitwiseXor, 379 TokenType.PIPE: exp.BitwiseOr, 380 TokenType.DPIPE: exp.DPipe, 381 } 382 383 TERM = { 384 TokenType.DASH: exp.Sub, 385 TokenType.PLUS: exp.Add, 386 TokenType.MOD: exp.Mod, 387 TokenType.COLLATE: exp.Collate, 388 } 389 390 FACTOR = { 391 TokenType.DIV: exp.IntDiv, 392 TokenType.LR_ARROW: exp.Distance, 393 TokenType.SLASH: exp.Div, 394 TokenType.STAR: exp.Mul, 395 } 396 397 TIMES = { 398 TokenType.TIME, 399 TokenType.TIMETZ, 400 } 401 402 TIMESTAMPS = { 403 TokenType.TIMESTAMP, 404 TokenType.TIMESTAMPTZ, 405 TokenType.TIMESTAMPLTZ, 406 *TIMES, 407 } 408 409 SET_OPERATIONS = { 410 TokenType.UNION, 411 TokenType.INTERSECT, 412 TokenType.EXCEPT, 413 } 414 415 JOIN_METHODS = { 416 TokenType.NATURAL, 417 TokenType.ASOF, 418 } 419 420 JOIN_SIDES = { 421 TokenType.LEFT, 422 TokenType.RIGHT, 423 TokenType.FULL, 424 } 425 426 JOIN_KINDS = { 427 TokenType.INNER, 428 TokenType.OUTER, 429 TokenType.CROSS, 430 TokenType.SEMI, 431 TokenType.ANTI, 432 } 433 434 JOIN_HINTS: t.Set[str] = set() 435 436 LAMBDAS = { 437 TokenType.ARROW: lambda self, expressions: self.expression( 438 exp.Lambda, 439 this=self._replace_lambda( 440 self._parse_conjunction(), 441 {node.name for node in expressions}, 442 ), 443 expressions=expressions, 444 ), 445 TokenType.FARROW: lambda self, expressions: self.expression( 446 exp.Kwarg, 447 this=exp.var(expressions[0].name), 448 expression=self._parse_conjunction(), 449 ), 450 } 451 452 COLUMN_OPERATORS = { 453 TokenType.DOT: None, 454 TokenType.DCOLON: lambda self, this, to: self.expression( 455 exp.Cast if self.STRICT_CAST else exp.TryCast, 456 this=this, 457 to=to, 458 ), 459 TokenType.ARROW: lambda self, this, path: self.expression( 460 exp.JSONExtract, 461 this=this, 462 expression=path, 463 ), 464 TokenType.DARROW: lambda self, this, path: self.expression( 465 exp.JSONExtractScalar, 466 this=this, 467 expression=path, 468 ), 469 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 470 exp.JSONBExtract, 471 this=this, 472 expression=path, 473 ), 474 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 475 exp.JSONBExtractScalar, 476 this=this, 477 expression=path, 478 ), 479 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 480 exp.JSONBContains, 481 this=this, 482 expression=key, 483 ), 484 } 485 486 EXPRESSION_PARSERS = { 487 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 488 exp.Column: lambda self: self._parse_column(), 489 exp.Condition: lambda self: self._parse_conjunction(), 490 exp.DataType: lambda self: self._parse_types(), 491 exp.Expression: lambda self: self._parse_statement(), 492 exp.From: lambda self: self._parse_from(), 493 exp.Group: lambda self: self._parse_group(), 494 exp.Having: lambda self: self._parse_having(), 495 exp.Identifier: lambda self: self._parse_id_var(), 496 exp.Join: lambda self: self._parse_join(), 497 exp.Lambda: lambda self: self._parse_lambda(), 498 exp.Lateral: lambda self: self._parse_lateral(), 499 exp.Limit: lambda self: self._parse_limit(), 500 exp.Offset: lambda self: self._parse_offset(), 501 exp.Order: lambda self: self._parse_order(), 502 exp.Ordered: lambda self: self._parse_ordered(), 503 exp.Properties: lambda self: self._parse_properties(), 504 exp.Qualify: lambda self: self._parse_qualify(), 505 exp.Returning: lambda self: self._parse_returning(), 506 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 507 exp.Table: lambda self: self._parse_table_parts(), 508 exp.TableAlias: lambda self: self._parse_table_alias(), 509 exp.Where: lambda self: self._parse_where(), 510 exp.Window: lambda self: self._parse_named_window(), 511 exp.With: lambda self: self._parse_with(), 512 "JOIN_TYPE": lambda self: self._parse_join_parts(), 513 } 514 515 STATEMENT_PARSERS = { 516 TokenType.ALTER: lambda self: self._parse_alter(), 517 TokenType.BEGIN: lambda self: self._parse_transaction(), 518 TokenType.CACHE: lambda self: self._parse_cache(), 519 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 520 TokenType.COMMENT: lambda self: self._parse_comment(), 521 TokenType.CREATE: lambda self: self._parse_create(), 522 TokenType.DELETE: lambda self: self._parse_delete(), 523 TokenType.DESC: lambda self: self._parse_describe(), 524 TokenType.DESCRIBE: lambda self: self._parse_describe(), 525 TokenType.DROP: lambda self: self._parse_drop(), 526 TokenType.FROM: lambda self: exp.select("*").from_( 527 t.cast(exp.From, self._parse_from(skip_from_token=True)) 528 ), 529 TokenType.INSERT: lambda self: self._parse_insert(), 530 TokenType.LOAD: lambda self: self._parse_load(), 531 TokenType.MERGE: lambda self: self._parse_merge(), 532 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 533 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 534 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 535 TokenType.SET: lambda self: self._parse_set(), 536 TokenType.UNCACHE: lambda self: self._parse_uncache(), 537 TokenType.UPDATE: lambda self: self._parse_update(), 538 TokenType.USE: lambda self: self.expression( 539 exp.Use, 540 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 541 and exp.var(self._prev.text), 542 this=self._parse_table(schema=False), 543 ), 544 } 545 546 UNARY_PARSERS = { 547 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 548 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 549 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 550 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 551 } 552 553 PRIMARY_PARSERS = { 554 TokenType.STRING: lambda self, token: self.expression( 555 exp.Literal, this=token.text, is_string=True 556 ), 557 TokenType.NUMBER: lambda self, token: self.expression( 558 exp.Literal, this=token.text, is_string=False 559 ), 560 TokenType.STAR: lambda self, _: self.expression( 561 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 562 ), 563 TokenType.NULL: lambda self, _: self.expression(exp.Null), 564 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 565 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 566 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 567 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 568 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 569 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 570 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 571 exp.National, this=token.text 572 ), 573 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 574 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 575 } 576 577 PLACEHOLDER_PARSERS = { 578 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 579 TokenType.PARAMETER: lambda self: self._parse_parameter(), 580 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 581 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 582 else None, 583 } 584 585 RANGE_PARSERS = { 586 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 587 TokenType.GLOB: binary_range_parser(exp.Glob), 588 TokenType.ILIKE: binary_range_parser(exp.ILike), 589 TokenType.IN: lambda self, this: self._parse_in(this), 590 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 591 TokenType.IS: lambda self, this: self._parse_is(this), 592 TokenType.LIKE: binary_range_parser(exp.Like), 593 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 594 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 595 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 596 } 597 598 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 599 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 600 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 601 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 602 "CHARACTER SET": lambda self: self._parse_character_set(), 603 "CHECKSUM": lambda self: self._parse_checksum(), 604 "CLUSTER BY": lambda self: self._parse_cluster(), 605 "CLUSTERED": lambda self: self._parse_clustered_by(), 606 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 607 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 608 "COPY": lambda self: self._parse_copy_property(), 609 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 610 "DEFINER": lambda self: self._parse_definer(), 611 "DETERMINISTIC": lambda self: self.expression( 612 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 613 ), 614 "DISTKEY": lambda self: self._parse_distkey(), 615 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 616 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 617 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 618 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 619 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 620 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 621 "FREESPACE": lambda self: self._parse_freespace(), 622 "HEAP": lambda self: self.expression(exp.HeapProperty), 623 "IMMUTABLE": lambda self: self.expression( 624 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 625 ), 626 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 627 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 628 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 629 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 630 "LIKE": lambda self: self._parse_create_like(), 631 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 632 "LOCK": lambda self: self._parse_locking(), 633 "LOCKING": lambda self: self._parse_locking(), 634 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 635 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 636 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 637 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 638 "NO": lambda self: self._parse_no_property(), 639 "ON": lambda self: self._parse_on_property(), 640 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 641 "PARTITION BY": lambda self: self._parse_partitioned_by(), 642 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 643 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 644 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 645 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 646 "RETURNS": lambda self: self._parse_returns(), 647 "ROW": lambda self: self._parse_row(), 648 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 649 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 650 "SETTINGS": lambda self: self.expression( 651 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 652 ), 653 "SORTKEY": lambda self: self._parse_sortkey(), 654 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 655 "STABLE": lambda self: self.expression( 656 exp.StabilityProperty, this=exp.Literal.string("STABLE") 657 ), 658 "STORED": lambda self: self._parse_stored(), 659 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 660 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 661 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 662 "TO": lambda self: self._parse_to_table(), 663 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 664 "TTL": lambda self: self._parse_ttl(), 665 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 666 "VOLATILE": lambda self: self._parse_volatile_property(), 667 "WITH": lambda self: self._parse_with_property(), 668 } 669 670 CONSTRAINT_PARSERS = { 671 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 672 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 673 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 674 "CHARACTER SET": lambda self: self.expression( 675 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "CHECK": lambda self: self.expression( 678 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 679 ), 680 "COLLATE": lambda self: self.expression( 681 exp.CollateColumnConstraint, this=self._parse_var() 682 ), 683 "COMMENT": lambda self: self.expression( 684 exp.CommentColumnConstraint, this=self._parse_string() 685 ), 686 "COMPRESS": lambda self: self._parse_compress(), 687 "DEFAULT": lambda self: self.expression( 688 exp.DefaultColumnConstraint, this=self._parse_bitwise() 689 ), 690 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 691 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 692 "FORMAT": lambda self: self.expression( 693 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 694 ), 695 "GENERATED": lambda self: self._parse_generated_as_identity(), 696 "IDENTITY": lambda self: self._parse_auto_increment(), 697 "INLINE": lambda self: self._parse_inline(), 698 "LIKE": lambda self: self._parse_create_like(), 699 "NOT": lambda self: self._parse_not_constraint(), 700 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 701 "ON": lambda self: self._match(TokenType.UPDATE) 702 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 703 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 704 "PRIMARY KEY": lambda self: self._parse_primary_key(), 705 "REFERENCES": lambda self: self._parse_references(match=False), 706 "TITLE": lambda self: self.expression( 707 exp.TitleColumnConstraint, this=self._parse_var_or_string() 708 ), 709 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 710 "UNIQUE": lambda self: self._parse_unique(), 711 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 712 } 713 714 ALTER_PARSERS = { 715 "ADD": lambda self: self._parse_alter_table_add(), 716 "ALTER": lambda self: self._parse_alter_table_alter(), 717 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 718 "DROP": lambda self: self._parse_alter_table_drop(), 719 "RENAME": lambda self: self._parse_alter_table_rename(), 720 } 721 722 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 723 724 NO_PAREN_FUNCTION_PARSERS = { 725 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 726 "CASE": lambda self: self._parse_case(), 727 "IF": lambda self: self._parse_if(), 728 "NEXT": lambda self: self._parse_next_value_for(), 729 } 730 731 INVALID_FUNC_NAME_TOKENS = { 732 TokenType.IDENTIFIER, 733 TokenType.STRING, 734 } 735 736 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 737 738 FUNCTION_PARSERS = { 739 "ANY_VALUE": lambda self: self._parse_any_value(), 740 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 741 "CONCAT": lambda self: self._parse_concat(), 742 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 743 "DECODE": lambda self: self._parse_decode(), 744 "EXTRACT": lambda self: self._parse_extract(), 745 "JSON_OBJECT": lambda self: self._parse_json_object(), 746 "LOG": lambda self: self._parse_logarithm(), 747 "MATCH": lambda self: self._parse_match_against(), 748 "OPENJSON": lambda self: self._parse_open_json(), 749 "POSITION": lambda self: self._parse_position(), 750 "SAFE_CAST": lambda self: self._parse_cast(False), 751 "STRING_AGG": lambda self: self._parse_string_agg(), 752 "SUBSTRING": lambda self: self._parse_substring(), 753 "TRIM": lambda self: self._parse_trim(), 754 "TRY_CAST": lambda self: self._parse_cast(False), 755 "TRY_CONVERT": lambda self: self._parse_convert(False), 756 } 757 758 QUERY_MODIFIER_PARSERS = { 759 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 760 TokenType.WHERE: lambda self: ("where", self._parse_where()), 761 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 762 TokenType.HAVING: lambda self: ("having", self._parse_having()), 763 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 764 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 765 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 766 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 767 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 768 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 769 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 770 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 771 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 772 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 773 TokenType.CLUSTER_BY: lambda self: ( 774 "cluster", 775 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 776 ), 777 TokenType.DISTRIBUTE_BY: lambda self: ( 778 "distribute", 779 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 780 ), 781 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 782 } 783 784 SET_PARSERS = { 785 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 786 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 787 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 788 "TRANSACTION": lambda self: self._parse_set_transaction(), 789 } 790 791 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 792 793 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 794 795 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 796 797 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 798 799 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 800 801 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 802 TRANSACTION_CHARACTERISTICS = { 803 "ISOLATION LEVEL REPEATABLE READ", 804 "ISOLATION LEVEL READ COMMITTED", 805 "ISOLATION LEVEL READ UNCOMMITTED", 806 "ISOLATION LEVEL SERIALIZABLE", 807 "READ WRITE", 808 "READ ONLY", 809 } 810 811 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 812 813 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 814 815 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 816 817 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 818 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 819 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 820 PARTITION_BY_TOKENS = {TokenType.PARTITION_BY} 821 822 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 823 824 STRICT_CAST = True 825 826 # A NULL arg in CONCAT yields NULL by default 827 CONCAT_NULL_OUTPUTS_STRING = False 828 829 PREFIXED_PIVOT_COLUMNS = False 830 IDENTIFY_PIVOT_STRINGS = False 831 832 LOG_BASE_FIRST = True 833 LOG_DEFAULTS_TO_LN = False 834 835 __slots__ = ( 836 "error_level", 837 "error_message_context", 838 "max_errors", 839 "sql", 840 "errors", 841 "_tokens", 842 "_index", 843 "_curr", 844 "_next", 845 "_prev", 846 "_prev_comments", 847 ) 848 849 # Autofilled 850 INDEX_OFFSET: int = 0 851 UNNEST_COLUMN_ONLY: bool = False 852 ALIAS_POST_TABLESAMPLE: bool = False 853 STRICT_STRING_CONCAT = False 854 NORMALIZE_FUNCTIONS = "upper" 855 NULL_ORDERING: str = "nulls_are_small" 856 SHOW_TRIE: t.Dict = {} 857 SET_TRIE: t.Dict = {} 858 FORMAT_MAPPING: t.Dict[str, str] = {} 859 FORMAT_TRIE: t.Dict = {} 860 TIME_MAPPING: t.Dict[str, str] = {} 861 TIME_TRIE: t.Dict = {} 862 863 def __init__( 864 self, 865 error_level: t.Optional[ErrorLevel] = None, 866 error_message_context: int = 100, 867 max_errors: int = 3, 868 ): 869 self.error_level = error_level or ErrorLevel.IMMEDIATE 870 self.error_message_context = error_message_context 871 self.max_errors = max_errors 872 self.reset() 873 874 def reset(self): 875 self.sql = "" 876 self.errors = [] 877 self._tokens = [] 878 self._index = 0 879 self._curr = None 880 self._next = None 881 self._prev = None 882 self._prev_comments = None 883 884 def parse( 885 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 886 ) -> t.List[t.Optional[exp.Expression]]: 887 """ 888 Parses a list of tokens and returns a list of syntax trees, one tree 889 per parsed SQL statement. 890 891 Args: 892 raw_tokens: The list of tokens. 893 sql: The original SQL string, used to produce helpful debug messages. 894 895 Returns: 896 The list of the produced syntax trees. 897 """ 898 return self._parse( 899 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 900 ) 901 902 def parse_into( 903 self, 904 expression_types: exp.IntoType, 905 raw_tokens: t.List[Token], 906 sql: t.Optional[str] = None, 907 ) -> t.List[t.Optional[exp.Expression]]: 908 """ 909 Parses a list of tokens into a given Expression type. If a collection of Expression 910 types is given instead, this method will try to parse the token list into each one 911 of them, stopping at the first for which the parsing succeeds. 912 913 Args: 914 expression_types: The expression type(s) to try and parse the token list into. 915 raw_tokens: The list of tokens. 916 sql: The original SQL string, used to produce helpful debug messages. 917 918 Returns: 919 The target Expression. 920 """ 921 errors = [] 922 for expression_type in ensure_list(expression_types): 923 parser = self.EXPRESSION_PARSERS.get(expression_type) 924 if not parser: 925 raise TypeError(f"No parser registered for {expression_type}") 926 927 try: 928 return self._parse(parser, raw_tokens, sql) 929 except ParseError as e: 930 e.errors[0]["into_expression"] = expression_type 931 errors.append(e) 932 933 raise ParseError( 934 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 935 errors=merge_errors(errors), 936 ) from errors[-1] 937 938 def _parse( 939 self, 940 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 941 raw_tokens: t.List[Token], 942 sql: t.Optional[str] = None, 943 ) -> t.List[t.Optional[exp.Expression]]: 944 self.reset() 945 self.sql = sql or "" 946 947 total = len(raw_tokens) 948 chunks: t.List[t.List[Token]] = [[]] 949 950 for i, token in enumerate(raw_tokens): 951 if token.token_type == TokenType.SEMICOLON: 952 if i < total - 1: 953 chunks.append([]) 954 else: 955 chunks[-1].append(token) 956 957 expressions = [] 958 959 for tokens in chunks: 960 self._index = -1 961 self._tokens = tokens 962 self._advance() 963 964 expressions.append(parse_method(self)) 965 966 if self._index < len(self._tokens): 967 self.raise_error("Invalid expression / Unexpected token") 968 969 self.check_errors() 970 971 return expressions 972 973 def check_errors(self) -> None: 974 """Logs or raises any found errors, depending on the chosen error level setting.""" 975 if self.error_level == ErrorLevel.WARN: 976 for error in self.errors: 977 logger.error(str(error)) 978 elif self.error_level == ErrorLevel.RAISE and self.errors: 979 raise ParseError( 980 concat_messages(self.errors, self.max_errors), 981 errors=merge_errors(self.errors), 982 ) 983 984 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 985 """ 986 Appends an error in the list of recorded errors or raises it, depending on the chosen 987 error level setting. 988 """ 989 token = token or self._curr or self._prev or Token.string("") 990 start = token.start 991 end = token.end + 1 992 start_context = self.sql[max(start - self.error_message_context, 0) : start] 993 highlight = self.sql[start:end] 994 end_context = self.sql[end : end + self.error_message_context] 995 996 error = ParseError.new( 997 f"{message}. Line {token.line}, Col: {token.col}.\n" 998 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 999 description=message, 1000 line=token.line, 1001 col=token.col, 1002 start_context=start_context, 1003 highlight=highlight, 1004 end_context=end_context, 1005 ) 1006 1007 if self.error_level == ErrorLevel.IMMEDIATE: 1008 raise error 1009 1010 self.errors.append(error) 1011 1012 def expression( 1013 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1014 ) -> E: 1015 """ 1016 Creates a new, validated Expression. 1017 1018 Args: 1019 exp_class: The expression class to instantiate. 1020 comments: An optional list of comments to attach to the expression. 1021 kwargs: The arguments to set for the expression along with their respective values. 1022 1023 Returns: 1024 The target expression. 1025 """ 1026 instance = exp_class(**kwargs) 1027 instance.add_comments(comments) if comments else self._add_comments(instance) 1028 return self.validate_expression(instance) 1029 1030 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1031 if expression and self._prev_comments: 1032 expression.add_comments(self._prev_comments) 1033 self._prev_comments = None 1034 1035 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1036 """ 1037 Validates an Expression, making sure that all its mandatory arguments are set. 1038 1039 Args: 1040 expression: The expression to validate. 1041 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1042 1043 Returns: 1044 The validated expression. 1045 """ 1046 if self.error_level != ErrorLevel.IGNORE: 1047 for error_message in expression.error_messages(args): 1048 self.raise_error(error_message) 1049 1050 return expression 1051 1052 def _find_sql(self, start: Token, end: Token) -> str: 1053 return self.sql[start.start : end.end + 1] 1054 1055 def _advance(self, times: int = 1) -> None: 1056 self._index += times 1057 self._curr = seq_get(self._tokens, self._index) 1058 self._next = seq_get(self._tokens, self._index + 1) 1059 1060 if self._index > 0: 1061 self._prev = self._tokens[self._index - 1] 1062 self._prev_comments = self._prev.comments 1063 else: 1064 self._prev = None 1065 self._prev_comments = None 1066 1067 def _retreat(self, index: int) -> None: 1068 if index != self._index: 1069 self._advance(index - self._index) 1070 1071 def _parse_command(self) -> exp.Command: 1072 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1073 1074 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1075 start = self._prev 1076 exists = self._parse_exists() if allow_exists else None 1077 1078 self._match(TokenType.ON) 1079 1080 kind = self._match_set(self.CREATABLES) and self._prev 1081 if not kind: 1082 return self._parse_as_command(start) 1083 1084 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1085 this = self._parse_user_defined_function(kind=kind.token_type) 1086 elif kind.token_type == TokenType.TABLE: 1087 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1088 elif kind.token_type == TokenType.COLUMN: 1089 this = self._parse_column() 1090 else: 1091 this = self._parse_id_var() 1092 1093 self._match(TokenType.IS) 1094 1095 return self.expression( 1096 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1097 ) 1098 1099 def _parse_to_table( 1100 self, 1101 ) -> exp.ToTableProperty: 1102 table = self._parse_table_parts(schema=True) 1103 return self.expression(exp.ToTableProperty, this=table) 1104 1105 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1106 def _parse_ttl(self) -> exp.Expression: 1107 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1108 this = self._parse_bitwise() 1109 1110 if self._match_text_seq("DELETE"): 1111 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1112 if self._match_text_seq("RECOMPRESS"): 1113 return self.expression( 1114 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1115 ) 1116 if self._match_text_seq("TO", "DISK"): 1117 return self.expression( 1118 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1119 ) 1120 if self._match_text_seq("TO", "VOLUME"): 1121 return self.expression( 1122 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1123 ) 1124 1125 return this 1126 1127 expressions = self._parse_csv(_parse_ttl_action) 1128 where = self._parse_where() 1129 group = self._parse_group() 1130 1131 aggregates = None 1132 if group and self._match(TokenType.SET): 1133 aggregates = self._parse_csv(self._parse_set_item) 1134 1135 return self.expression( 1136 exp.MergeTreeTTL, 1137 expressions=expressions, 1138 where=where, 1139 group=group, 1140 aggregates=aggregates, 1141 ) 1142 1143 def _parse_statement(self) -> t.Optional[exp.Expression]: 1144 if self._curr is None: 1145 return None 1146 1147 if self._match_set(self.STATEMENT_PARSERS): 1148 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1149 1150 if self._match_set(Tokenizer.COMMANDS): 1151 return self._parse_command() 1152 1153 expression = self._parse_expression() 1154 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1155 return self._parse_query_modifiers(expression) 1156 1157 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1158 start = self._prev 1159 temporary = self._match(TokenType.TEMPORARY) 1160 materialized = self._match_text_seq("MATERIALIZED") 1161 1162 kind = self._match_set(self.CREATABLES) and self._prev.text 1163 if not kind: 1164 return self._parse_as_command(start) 1165 1166 return self.expression( 1167 exp.Drop, 1168 comments=start.comments, 1169 exists=exists or self._parse_exists(), 1170 this=self._parse_table(schema=True), 1171 kind=kind, 1172 temporary=temporary, 1173 materialized=materialized, 1174 cascade=self._match_text_seq("CASCADE"), 1175 constraints=self._match_text_seq("CONSTRAINTS"), 1176 purge=self._match_text_seq("PURGE"), 1177 ) 1178 1179 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1180 return ( 1181 self._match_text_seq("IF") 1182 and (not not_ or self._match(TokenType.NOT)) 1183 and self._match(TokenType.EXISTS) 1184 ) 1185 1186 def _parse_create(self) -> exp.Create | exp.Command: 1187 # Note: this can't be None because we've matched a statement parser 1188 start = self._prev 1189 comments = self._prev_comments 1190 1191 replace = start.text.upper() == "REPLACE" or self._match_pair( 1192 TokenType.OR, TokenType.REPLACE 1193 ) 1194 unique = self._match(TokenType.UNIQUE) 1195 1196 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1197 self._advance() 1198 1199 properties = None 1200 create_token = self._match_set(self.CREATABLES) and self._prev 1201 1202 if not create_token: 1203 # exp.Properties.Location.POST_CREATE 1204 properties = self._parse_properties() 1205 create_token = self._match_set(self.CREATABLES) and self._prev 1206 1207 if not properties or not create_token: 1208 return self._parse_as_command(start) 1209 1210 exists = self._parse_exists(not_=True) 1211 this = None 1212 expression: t.Optional[exp.Expression] = None 1213 indexes = None 1214 no_schema_binding = None 1215 begin = None 1216 clone = None 1217 1218 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1219 nonlocal properties 1220 if properties and temp_props: 1221 properties.expressions.extend(temp_props.expressions) 1222 elif temp_props: 1223 properties = temp_props 1224 1225 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1226 this = self._parse_user_defined_function(kind=create_token.token_type) 1227 1228 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1229 extend_props(self._parse_properties()) 1230 1231 self._match(TokenType.ALIAS) 1232 1233 if self._match(TokenType.COMMAND): 1234 expression = self._parse_as_command(self._prev) 1235 else: 1236 begin = self._match(TokenType.BEGIN) 1237 return_ = self._match_text_seq("RETURN") 1238 expression = self._parse_statement() 1239 1240 if return_: 1241 expression = self.expression(exp.Return, this=expression) 1242 elif create_token.token_type == TokenType.INDEX: 1243 this = self._parse_index(index=self._parse_id_var()) 1244 elif create_token.token_type in self.DB_CREATABLES: 1245 table_parts = self._parse_table_parts(schema=True) 1246 1247 # exp.Properties.Location.POST_NAME 1248 self._match(TokenType.COMMA) 1249 extend_props(self._parse_properties(before=True)) 1250 1251 this = self._parse_schema(this=table_parts) 1252 1253 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1254 extend_props(self._parse_properties()) 1255 1256 self._match(TokenType.ALIAS) 1257 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1258 # exp.Properties.Location.POST_ALIAS 1259 extend_props(self._parse_properties()) 1260 1261 expression = self._parse_ddl_select() 1262 1263 if create_token.token_type == TokenType.TABLE: 1264 # exp.Properties.Location.POST_EXPRESSION 1265 extend_props(self._parse_properties()) 1266 1267 indexes = [] 1268 while True: 1269 index = self._parse_index() 1270 1271 # exp.Properties.Location.POST_INDEX 1272 extend_props(self._parse_properties()) 1273 1274 if not index: 1275 break 1276 else: 1277 self._match(TokenType.COMMA) 1278 indexes.append(index) 1279 elif create_token.token_type == TokenType.VIEW: 1280 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1281 no_schema_binding = True 1282 1283 if self._match_text_seq("CLONE"): 1284 clone = self._parse_table(schema=True) 1285 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1286 clone_kind = ( 1287 self._match(TokenType.L_PAREN) 1288 and self._match_texts(self.CLONE_KINDS) 1289 and self._prev.text.upper() 1290 ) 1291 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1292 self._match(TokenType.R_PAREN) 1293 clone = self.expression( 1294 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1295 ) 1296 1297 return self.expression( 1298 exp.Create, 1299 comments=comments, 1300 this=this, 1301 kind=create_token.text, 1302 replace=replace, 1303 unique=unique, 1304 expression=expression, 1305 exists=exists, 1306 properties=properties, 1307 indexes=indexes, 1308 no_schema_binding=no_schema_binding, 1309 begin=begin, 1310 clone=clone, 1311 ) 1312 1313 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1314 # only used for teradata currently 1315 self._match(TokenType.COMMA) 1316 1317 kwargs = { 1318 "no": self._match_text_seq("NO"), 1319 "dual": self._match_text_seq("DUAL"), 1320 "before": self._match_text_seq("BEFORE"), 1321 "default": self._match_text_seq("DEFAULT"), 1322 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1323 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1324 "after": self._match_text_seq("AFTER"), 1325 "minimum": self._match_texts(("MIN", "MINIMUM")), 1326 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1327 } 1328 1329 if self._match_texts(self.PROPERTY_PARSERS): 1330 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1331 try: 1332 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1333 except TypeError: 1334 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1335 1336 return None 1337 1338 def _parse_property(self) -> t.Optional[exp.Expression]: 1339 if self._match_texts(self.PROPERTY_PARSERS): 1340 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1341 1342 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1343 return self._parse_character_set(default=True) 1344 1345 if self._match_text_seq("COMPOUND", "SORTKEY"): 1346 return self._parse_sortkey(compound=True) 1347 1348 if self._match_text_seq("SQL", "SECURITY"): 1349 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1350 1351 assignment = self._match_pair( 1352 TokenType.VAR, TokenType.EQ, advance=False 1353 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1354 1355 if assignment: 1356 key = self._parse_var_or_string() 1357 self._match(TokenType.EQ) 1358 return self.expression(exp.Property, this=key, value=self._parse_column()) 1359 1360 return None 1361 1362 def _parse_stored(self) -> exp.FileFormatProperty: 1363 self._match(TokenType.ALIAS) 1364 1365 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1366 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1367 1368 return self.expression( 1369 exp.FileFormatProperty, 1370 this=self.expression( 1371 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1372 ) 1373 if input_format or output_format 1374 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1375 ) 1376 1377 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1378 self._match(TokenType.EQ) 1379 self._match(TokenType.ALIAS) 1380 return self.expression(exp_class, this=self._parse_field()) 1381 1382 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1383 properties = [] 1384 while True: 1385 if before: 1386 prop = self._parse_property_before() 1387 else: 1388 prop = self._parse_property() 1389 1390 if not prop: 1391 break 1392 for p in ensure_list(prop): 1393 properties.append(p) 1394 1395 if properties: 1396 return self.expression(exp.Properties, expressions=properties) 1397 1398 return None 1399 1400 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1401 return self.expression( 1402 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1403 ) 1404 1405 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1406 if self._index >= 2: 1407 pre_volatile_token = self._tokens[self._index - 2] 1408 else: 1409 pre_volatile_token = None 1410 1411 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1412 return exp.VolatileProperty() 1413 1414 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1415 1416 def _parse_with_property( 1417 self, 1418 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1419 if self._match(TokenType.L_PAREN, advance=False): 1420 return self._parse_wrapped_csv(self._parse_property) 1421 1422 if self._match_text_seq("JOURNAL"): 1423 return self._parse_withjournaltable() 1424 1425 if self._match_text_seq("DATA"): 1426 return self._parse_withdata(no=False) 1427 elif self._match_text_seq("NO", "DATA"): 1428 return self._parse_withdata(no=True) 1429 1430 if not self._next: 1431 return None 1432 1433 return self._parse_withisolatedloading() 1434 1435 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1436 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1437 self._match(TokenType.EQ) 1438 1439 user = self._parse_id_var() 1440 self._match(TokenType.PARAMETER) 1441 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1442 1443 if not user or not host: 1444 return None 1445 1446 return exp.DefinerProperty(this=f"{user}@{host}") 1447 1448 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1449 self._match(TokenType.TABLE) 1450 self._match(TokenType.EQ) 1451 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1452 1453 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1454 return self.expression(exp.LogProperty, no=no) 1455 1456 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1457 return self.expression(exp.JournalProperty, **kwargs) 1458 1459 def _parse_checksum(self) -> exp.ChecksumProperty: 1460 self._match(TokenType.EQ) 1461 1462 on = None 1463 if self._match(TokenType.ON): 1464 on = True 1465 elif self._match_text_seq("OFF"): 1466 on = False 1467 1468 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1469 1470 def _parse_cluster(self) -> exp.Cluster: 1471 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1472 1473 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1474 self._match_text_seq("BY") 1475 1476 self._match_l_paren() 1477 expressions = self._parse_csv(self._parse_column) 1478 self._match_r_paren() 1479 1480 if self._match_text_seq("SORTED", "BY"): 1481 self._match_l_paren() 1482 sorted_by = self._parse_csv(self._parse_ordered) 1483 self._match_r_paren() 1484 else: 1485 sorted_by = None 1486 1487 self._match(TokenType.INTO) 1488 buckets = self._parse_number() 1489 self._match_text_seq("BUCKETS") 1490 1491 return self.expression( 1492 exp.ClusteredByProperty, 1493 expressions=expressions, 1494 sorted_by=sorted_by, 1495 buckets=buckets, 1496 ) 1497 1498 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1499 if not self._match_text_seq("GRANTS"): 1500 self._retreat(self._index - 1) 1501 return None 1502 1503 return self.expression(exp.CopyGrantsProperty) 1504 1505 def _parse_freespace(self) -> exp.FreespaceProperty: 1506 self._match(TokenType.EQ) 1507 return self.expression( 1508 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1509 ) 1510 1511 def _parse_mergeblockratio( 1512 self, no: bool = False, default: bool = False 1513 ) -> exp.MergeBlockRatioProperty: 1514 if self._match(TokenType.EQ): 1515 return self.expression( 1516 exp.MergeBlockRatioProperty, 1517 this=self._parse_number(), 1518 percent=self._match(TokenType.PERCENT), 1519 ) 1520 1521 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1522 1523 def _parse_datablocksize( 1524 self, 1525 default: t.Optional[bool] = None, 1526 minimum: t.Optional[bool] = None, 1527 maximum: t.Optional[bool] = None, 1528 ) -> exp.DataBlocksizeProperty: 1529 self._match(TokenType.EQ) 1530 size = self._parse_number() 1531 1532 units = None 1533 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1534 units = self._prev.text 1535 1536 return self.expression( 1537 exp.DataBlocksizeProperty, 1538 size=size, 1539 units=units, 1540 default=default, 1541 minimum=minimum, 1542 maximum=maximum, 1543 ) 1544 1545 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1546 self._match(TokenType.EQ) 1547 always = self._match_text_seq("ALWAYS") 1548 manual = self._match_text_seq("MANUAL") 1549 never = self._match_text_seq("NEVER") 1550 default = self._match_text_seq("DEFAULT") 1551 1552 autotemp = None 1553 if self._match_text_seq("AUTOTEMP"): 1554 autotemp = self._parse_schema() 1555 1556 return self.expression( 1557 exp.BlockCompressionProperty, 1558 always=always, 1559 manual=manual, 1560 never=never, 1561 default=default, 1562 autotemp=autotemp, 1563 ) 1564 1565 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1566 no = self._match_text_seq("NO") 1567 concurrent = self._match_text_seq("CONCURRENT") 1568 self._match_text_seq("ISOLATED", "LOADING") 1569 for_all = self._match_text_seq("FOR", "ALL") 1570 for_insert = self._match_text_seq("FOR", "INSERT") 1571 for_none = self._match_text_seq("FOR", "NONE") 1572 return self.expression( 1573 exp.IsolatedLoadingProperty, 1574 no=no, 1575 concurrent=concurrent, 1576 for_all=for_all, 1577 for_insert=for_insert, 1578 for_none=for_none, 1579 ) 1580 1581 def _parse_locking(self) -> exp.LockingProperty: 1582 if self._match(TokenType.TABLE): 1583 kind = "TABLE" 1584 elif self._match(TokenType.VIEW): 1585 kind = "VIEW" 1586 elif self._match(TokenType.ROW): 1587 kind = "ROW" 1588 elif self._match_text_seq("DATABASE"): 1589 kind = "DATABASE" 1590 else: 1591 kind = None 1592 1593 if kind in ("DATABASE", "TABLE", "VIEW"): 1594 this = self._parse_table_parts() 1595 else: 1596 this = None 1597 1598 if self._match(TokenType.FOR): 1599 for_or_in = "FOR" 1600 elif self._match(TokenType.IN): 1601 for_or_in = "IN" 1602 else: 1603 for_or_in = None 1604 1605 if self._match_text_seq("ACCESS"): 1606 lock_type = "ACCESS" 1607 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1608 lock_type = "EXCLUSIVE" 1609 elif self._match_text_seq("SHARE"): 1610 lock_type = "SHARE" 1611 elif self._match_text_seq("READ"): 1612 lock_type = "READ" 1613 elif self._match_text_seq("WRITE"): 1614 lock_type = "WRITE" 1615 elif self._match_text_seq("CHECKSUM"): 1616 lock_type = "CHECKSUM" 1617 else: 1618 lock_type = None 1619 1620 override = self._match_text_seq("OVERRIDE") 1621 1622 return self.expression( 1623 exp.LockingProperty, 1624 this=this, 1625 kind=kind, 1626 for_or_in=for_or_in, 1627 lock_type=lock_type, 1628 override=override, 1629 ) 1630 1631 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1632 if self._match_set(self.PARTITION_BY_TOKENS): 1633 return self._parse_csv(self._parse_conjunction) 1634 return [] 1635 1636 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1637 self._match(TokenType.EQ) 1638 return self.expression( 1639 exp.PartitionedByProperty, 1640 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1641 ) 1642 1643 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1644 if self._match_text_seq("AND", "STATISTICS"): 1645 statistics = True 1646 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1647 statistics = False 1648 else: 1649 statistics = None 1650 1651 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1652 1653 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1654 if self._match_text_seq("PRIMARY", "INDEX"): 1655 return exp.NoPrimaryIndexProperty() 1656 return None 1657 1658 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1659 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1660 return exp.OnCommitProperty() 1661 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1662 return exp.OnCommitProperty(delete=True) 1663 return None 1664 1665 def _parse_distkey(self) -> exp.DistKeyProperty: 1666 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1667 1668 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1669 table = self._parse_table(schema=True) 1670 1671 options = [] 1672 while self._match_texts(("INCLUDING", "EXCLUDING")): 1673 this = self._prev.text.upper() 1674 1675 id_var = self._parse_id_var() 1676 if not id_var: 1677 return None 1678 1679 options.append( 1680 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1681 ) 1682 1683 return self.expression(exp.LikeProperty, this=table, expressions=options) 1684 1685 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1686 return self.expression( 1687 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1688 ) 1689 1690 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1691 self._match(TokenType.EQ) 1692 return self.expression( 1693 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1694 ) 1695 1696 def _parse_returns(self) -> exp.ReturnsProperty: 1697 value: t.Optional[exp.Expression] 1698 is_table = self._match(TokenType.TABLE) 1699 1700 if is_table: 1701 if self._match(TokenType.LT): 1702 value = self.expression( 1703 exp.Schema, 1704 this="TABLE", 1705 expressions=self._parse_csv(self._parse_struct_types), 1706 ) 1707 if not self._match(TokenType.GT): 1708 self.raise_error("Expecting >") 1709 else: 1710 value = self._parse_schema(exp.var("TABLE")) 1711 else: 1712 value = self._parse_types() 1713 1714 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1715 1716 def _parse_describe(self) -> exp.Describe: 1717 kind = self._match_set(self.CREATABLES) and self._prev.text 1718 this = self._parse_table() 1719 return self.expression(exp.Describe, this=this, kind=kind) 1720 1721 def _parse_insert(self) -> exp.Insert: 1722 comments = ensure_list(self._prev_comments) 1723 overwrite = self._match(TokenType.OVERWRITE) 1724 ignore = self._match(TokenType.IGNORE) 1725 local = self._match_text_seq("LOCAL") 1726 alternative = None 1727 1728 if self._match_text_seq("DIRECTORY"): 1729 this: t.Optional[exp.Expression] = self.expression( 1730 exp.Directory, 1731 this=self._parse_var_or_string(), 1732 local=local, 1733 row_format=self._parse_row_format(match_row=True), 1734 ) 1735 else: 1736 if self._match(TokenType.OR): 1737 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1738 1739 self._match(TokenType.INTO) 1740 comments += ensure_list(self._prev_comments) 1741 self._match(TokenType.TABLE) 1742 this = self._parse_table(schema=True) 1743 1744 returning = self._parse_returning() 1745 1746 return self.expression( 1747 exp.Insert, 1748 comments=comments, 1749 this=this, 1750 exists=self._parse_exists(), 1751 partition=self._parse_partition(), 1752 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1753 and self._parse_conjunction(), 1754 expression=self._parse_ddl_select(), 1755 conflict=self._parse_on_conflict(), 1756 returning=returning or self._parse_returning(), 1757 overwrite=overwrite, 1758 alternative=alternative, 1759 ignore=ignore, 1760 ) 1761 1762 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1763 conflict = self._match_text_seq("ON", "CONFLICT") 1764 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1765 1766 if not conflict and not duplicate: 1767 return None 1768 1769 nothing = None 1770 expressions = None 1771 key = None 1772 constraint = None 1773 1774 if conflict: 1775 if self._match_text_seq("ON", "CONSTRAINT"): 1776 constraint = self._parse_id_var() 1777 else: 1778 key = self._parse_csv(self._parse_value) 1779 1780 self._match_text_seq("DO") 1781 if self._match_text_seq("NOTHING"): 1782 nothing = True 1783 else: 1784 self._match(TokenType.UPDATE) 1785 self._match(TokenType.SET) 1786 expressions = self._parse_csv(self._parse_equality) 1787 1788 return self.expression( 1789 exp.OnConflict, 1790 duplicate=duplicate, 1791 expressions=expressions, 1792 nothing=nothing, 1793 key=key, 1794 constraint=constraint, 1795 ) 1796 1797 def _parse_returning(self) -> t.Optional[exp.Returning]: 1798 if not self._match(TokenType.RETURNING): 1799 return None 1800 return self.expression( 1801 exp.Returning, 1802 expressions=self._parse_csv(self._parse_expression), 1803 into=self._match(TokenType.INTO) and self._parse_table_part(), 1804 ) 1805 1806 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1807 if not self._match(TokenType.FORMAT): 1808 return None 1809 return self._parse_row_format() 1810 1811 def _parse_row_format( 1812 self, match_row: bool = False 1813 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1814 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1815 return None 1816 1817 if self._match_text_seq("SERDE"): 1818 this = self._parse_string() 1819 1820 serde_properties = None 1821 if self._match(TokenType.SERDE_PROPERTIES): 1822 serde_properties = self.expression( 1823 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1824 ) 1825 1826 return self.expression( 1827 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1828 ) 1829 1830 self._match_text_seq("DELIMITED") 1831 1832 kwargs = {} 1833 1834 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1835 kwargs["fields"] = self._parse_string() 1836 if self._match_text_seq("ESCAPED", "BY"): 1837 kwargs["escaped"] = self._parse_string() 1838 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1839 kwargs["collection_items"] = self._parse_string() 1840 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1841 kwargs["map_keys"] = self._parse_string() 1842 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1843 kwargs["lines"] = self._parse_string() 1844 if self._match_text_seq("NULL", "DEFINED", "AS"): 1845 kwargs["null"] = self._parse_string() 1846 1847 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1848 1849 def _parse_load(self) -> exp.LoadData | exp.Command: 1850 if self._match_text_seq("DATA"): 1851 local = self._match_text_seq("LOCAL") 1852 self._match_text_seq("INPATH") 1853 inpath = self._parse_string() 1854 overwrite = self._match(TokenType.OVERWRITE) 1855 self._match_pair(TokenType.INTO, TokenType.TABLE) 1856 1857 return self.expression( 1858 exp.LoadData, 1859 this=self._parse_table(schema=True), 1860 local=local, 1861 overwrite=overwrite, 1862 inpath=inpath, 1863 partition=self._parse_partition(), 1864 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1865 serde=self._match_text_seq("SERDE") and self._parse_string(), 1866 ) 1867 return self._parse_as_command(self._prev) 1868 1869 def _parse_delete(self) -> exp.Delete: 1870 # This handles MySQL's "Multiple-Table Syntax" 1871 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1872 tables = None 1873 comments = self._prev_comments 1874 if not self._match(TokenType.FROM, advance=False): 1875 tables = self._parse_csv(self._parse_table) or None 1876 1877 returning = self._parse_returning() 1878 1879 return self.expression( 1880 exp.Delete, 1881 comments=comments, 1882 tables=tables, 1883 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1884 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1885 where=self._parse_where(), 1886 returning=returning or self._parse_returning(), 1887 limit=self._parse_limit(), 1888 ) 1889 1890 def _parse_update(self) -> exp.Update: 1891 comments = self._prev_comments 1892 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1893 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1894 returning = self._parse_returning() 1895 return self.expression( 1896 exp.Update, 1897 comments=comments, 1898 **{ # type: ignore 1899 "this": this, 1900 "expressions": expressions, 1901 "from": self._parse_from(joins=True), 1902 "where": self._parse_where(), 1903 "returning": returning or self._parse_returning(), 1904 "limit": self._parse_limit(), 1905 }, 1906 ) 1907 1908 def _parse_uncache(self) -> exp.Uncache: 1909 if not self._match(TokenType.TABLE): 1910 self.raise_error("Expecting TABLE after UNCACHE") 1911 1912 return self.expression( 1913 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1914 ) 1915 1916 def _parse_cache(self) -> exp.Cache: 1917 lazy = self._match_text_seq("LAZY") 1918 self._match(TokenType.TABLE) 1919 table = self._parse_table(schema=True) 1920 1921 options = [] 1922 if self._match_text_seq("OPTIONS"): 1923 self._match_l_paren() 1924 k = self._parse_string() 1925 self._match(TokenType.EQ) 1926 v = self._parse_string() 1927 options = [k, v] 1928 self._match_r_paren() 1929 1930 self._match(TokenType.ALIAS) 1931 return self.expression( 1932 exp.Cache, 1933 this=table, 1934 lazy=lazy, 1935 options=options, 1936 expression=self._parse_select(nested=True), 1937 ) 1938 1939 def _parse_partition(self) -> t.Optional[exp.Partition]: 1940 if not self._match(TokenType.PARTITION): 1941 return None 1942 1943 return self.expression( 1944 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1945 ) 1946 1947 def _parse_value(self) -> exp.Tuple: 1948 if self._match(TokenType.L_PAREN): 1949 expressions = self._parse_csv(self._parse_conjunction) 1950 self._match_r_paren() 1951 return self.expression(exp.Tuple, expressions=expressions) 1952 1953 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1954 # https://prestodb.io/docs/current/sql/values.html 1955 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1956 1957 def _parse_projections(self) -> t.List[t.Optional[exp.Expression]]: 1958 return self._parse_expressions() 1959 1960 def _parse_select( 1961 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1962 ) -> t.Optional[exp.Expression]: 1963 cte = self._parse_with() 1964 if cte: 1965 this = self._parse_statement() 1966 1967 if not this: 1968 self.raise_error("Failed to parse any statement following CTE") 1969 return cte 1970 1971 if "with" in this.arg_types: 1972 this.set("with", cte) 1973 else: 1974 self.raise_error(f"{this.key} does not support CTE") 1975 this = cte 1976 elif self._match(TokenType.SELECT): 1977 comments = self._prev_comments 1978 1979 hint = self._parse_hint() 1980 all_ = self._match(TokenType.ALL) 1981 distinct = self._match(TokenType.DISTINCT) 1982 1983 kind = ( 1984 self._match(TokenType.ALIAS) 1985 and self._match_texts(("STRUCT", "VALUE")) 1986 and self._prev.text 1987 ) 1988 1989 if distinct: 1990 distinct = self.expression( 1991 exp.Distinct, 1992 on=self._parse_value() if self._match(TokenType.ON) else None, 1993 ) 1994 1995 if all_ and distinct: 1996 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1997 1998 limit = self._parse_limit(top=True) 1999 projections = self._parse_projections() 2000 2001 this = self.expression( 2002 exp.Select, 2003 kind=kind, 2004 hint=hint, 2005 distinct=distinct, 2006 expressions=projections, 2007 limit=limit, 2008 ) 2009 this.comments = comments 2010 2011 into = self._parse_into() 2012 if into: 2013 this.set("into", into) 2014 2015 from_ = self._parse_from() 2016 if from_: 2017 this.set("from", from_) 2018 2019 this = self._parse_query_modifiers(this) 2020 elif (table or nested) and self._match(TokenType.L_PAREN): 2021 if self._match(TokenType.PIVOT): 2022 this = self._parse_simplified_pivot() 2023 elif self._match(TokenType.FROM): 2024 this = exp.select("*").from_( 2025 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2026 ) 2027 else: 2028 this = self._parse_table() if table else self._parse_select(nested=True) 2029 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2030 2031 self._match_r_paren() 2032 2033 # We return early here so that the UNION isn't attached to the subquery by the 2034 # following call to _parse_set_operations, but instead becomes the parent node 2035 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2036 elif self._match(TokenType.VALUES): 2037 this = self.expression( 2038 exp.Values, 2039 expressions=self._parse_csv(self._parse_value), 2040 alias=self._parse_table_alias(), 2041 ) 2042 else: 2043 this = None 2044 2045 return self._parse_set_operations(this) 2046 2047 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2048 if not skip_with_token and not self._match(TokenType.WITH): 2049 return None 2050 2051 comments = self._prev_comments 2052 recursive = self._match(TokenType.RECURSIVE) 2053 2054 expressions = [] 2055 while True: 2056 expressions.append(self._parse_cte()) 2057 2058 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2059 break 2060 else: 2061 self._match(TokenType.WITH) 2062 2063 return self.expression( 2064 exp.With, comments=comments, expressions=expressions, recursive=recursive 2065 ) 2066 2067 def _parse_cte(self) -> exp.CTE: 2068 alias = self._parse_table_alias() 2069 if not alias or not alias.this: 2070 self.raise_error("Expected CTE to have alias") 2071 2072 self._match(TokenType.ALIAS) 2073 return self.expression( 2074 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2075 ) 2076 2077 def _parse_table_alias( 2078 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2079 ) -> t.Optional[exp.TableAlias]: 2080 any_token = self._match(TokenType.ALIAS) 2081 alias = ( 2082 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2083 or self._parse_string_as_identifier() 2084 ) 2085 2086 index = self._index 2087 if self._match(TokenType.L_PAREN): 2088 columns = self._parse_csv(self._parse_function_parameter) 2089 self._match_r_paren() if columns else self._retreat(index) 2090 else: 2091 columns = None 2092 2093 if not alias and not columns: 2094 return None 2095 2096 return self.expression(exp.TableAlias, this=alias, columns=columns) 2097 2098 def _parse_subquery( 2099 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2100 ) -> t.Optional[exp.Subquery]: 2101 if not this: 2102 return None 2103 2104 return self.expression( 2105 exp.Subquery, 2106 this=this, 2107 pivots=self._parse_pivots(), 2108 alias=self._parse_table_alias() if parse_alias else None, 2109 ) 2110 2111 def _parse_query_modifiers( 2112 self, this: t.Optional[exp.Expression] 2113 ) -> t.Optional[exp.Expression]: 2114 if isinstance(this, self.MODIFIABLES): 2115 for join in iter(self._parse_join, None): 2116 this.append("joins", join) 2117 for lateral in iter(self._parse_lateral, None): 2118 this.append("laterals", lateral) 2119 2120 while True: 2121 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2122 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2123 key, expression = parser(self) 2124 2125 if expression: 2126 this.set(key, expression) 2127 if key == "limit": 2128 offset = expression.args.pop("offset", None) 2129 if offset: 2130 this.set("offset", exp.Offset(expression=offset)) 2131 continue 2132 break 2133 return this 2134 2135 def _parse_hint(self) -> t.Optional[exp.Hint]: 2136 if self._match(TokenType.HINT): 2137 hints = [] 2138 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2139 hints.extend(hint) 2140 2141 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2142 self.raise_error("Expected */ after HINT") 2143 2144 return self.expression(exp.Hint, expressions=hints) 2145 2146 return None 2147 2148 def _parse_into(self) -> t.Optional[exp.Into]: 2149 if not self._match(TokenType.INTO): 2150 return None 2151 2152 temp = self._match(TokenType.TEMPORARY) 2153 unlogged = self._match_text_seq("UNLOGGED") 2154 self._match(TokenType.TABLE) 2155 2156 return self.expression( 2157 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2158 ) 2159 2160 def _parse_from( 2161 self, joins: bool = False, skip_from_token: bool = False 2162 ) -> t.Optional[exp.From]: 2163 if not skip_from_token and not self._match(TokenType.FROM): 2164 return None 2165 2166 return self.expression( 2167 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2168 ) 2169 2170 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2171 if not self._match(TokenType.MATCH_RECOGNIZE): 2172 return None 2173 2174 self._match_l_paren() 2175 2176 partition = self._parse_partition_by() 2177 order = self._parse_order() 2178 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2179 2180 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2181 rows = exp.var("ONE ROW PER MATCH") 2182 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2183 text = "ALL ROWS PER MATCH" 2184 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2185 text += f" SHOW EMPTY MATCHES" 2186 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2187 text += f" OMIT EMPTY MATCHES" 2188 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2189 text += f" WITH UNMATCHED ROWS" 2190 rows = exp.var(text) 2191 else: 2192 rows = None 2193 2194 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2195 text = "AFTER MATCH SKIP" 2196 if self._match_text_seq("PAST", "LAST", "ROW"): 2197 text += f" PAST LAST ROW" 2198 elif self._match_text_seq("TO", "NEXT", "ROW"): 2199 text += f" TO NEXT ROW" 2200 elif self._match_text_seq("TO", "FIRST"): 2201 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2202 elif self._match_text_seq("TO", "LAST"): 2203 text += f" TO LAST {self._advance_any().text}" # type: ignore 2204 after = exp.var(text) 2205 else: 2206 after = None 2207 2208 if self._match_text_seq("PATTERN"): 2209 self._match_l_paren() 2210 2211 if not self._curr: 2212 self.raise_error("Expecting )", self._curr) 2213 2214 paren = 1 2215 start = self._curr 2216 2217 while self._curr and paren > 0: 2218 if self._curr.token_type == TokenType.L_PAREN: 2219 paren += 1 2220 if self._curr.token_type == TokenType.R_PAREN: 2221 paren -= 1 2222 2223 end = self._prev 2224 self._advance() 2225 2226 if paren > 0: 2227 self.raise_error("Expecting )", self._curr) 2228 2229 pattern = exp.var(self._find_sql(start, end)) 2230 else: 2231 pattern = None 2232 2233 define = ( 2234 self._parse_csv( 2235 lambda: self.expression( 2236 exp.Alias, 2237 alias=self._parse_id_var(any_token=True), 2238 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2239 ) 2240 ) 2241 if self._match_text_seq("DEFINE") 2242 else None 2243 ) 2244 2245 self._match_r_paren() 2246 2247 return self.expression( 2248 exp.MatchRecognize, 2249 partition_by=partition, 2250 order=order, 2251 measures=measures, 2252 rows=rows, 2253 after=after, 2254 pattern=pattern, 2255 define=define, 2256 alias=self._parse_table_alias(), 2257 ) 2258 2259 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2260 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2261 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2262 2263 if outer_apply or cross_apply: 2264 this = self._parse_select(table=True) 2265 view = None 2266 outer = not cross_apply 2267 elif self._match(TokenType.LATERAL): 2268 this = self._parse_select(table=True) 2269 view = self._match(TokenType.VIEW) 2270 outer = self._match(TokenType.OUTER) 2271 else: 2272 return None 2273 2274 if not this: 2275 this = ( 2276 self._parse_unnest() 2277 or self._parse_function() 2278 or self._parse_id_var(any_token=False) 2279 ) 2280 2281 while self._match(TokenType.DOT): 2282 this = exp.Dot( 2283 this=this, 2284 expression=self._parse_function() or self._parse_id_var(any_token=False), 2285 ) 2286 2287 if view: 2288 table = self._parse_id_var(any_token=False) 2289 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2290 table_alias: t.Optional[exp.TableAlias] = self.expression( 2291 exp.TableAlias, this=table, columns=columns 2292 ) 2293 elif isinstance(this, exp.Subquery) and this.alias: 2294 # Ensures parity between the Subquery's and the Lateral's "alias" args 2295 table_alias = this.args["alias"].copy() 2296 else: 2297 table_alias = self._parse_table_alias() 2298 2299 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2300 2301 def _parse_join_parts( 2302 self, 2303 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2304 return ( 2305 self._match_set(self.JOIN_METHODS) and self._prev, 2306 self._match_set(self.JOIN_SIDES) and self._prev, 2307 self._match_set(self.JOIN_KINDS) and self._prev, 2308 ) 2309 2310 def _parse_join( 2311 self, skip_join_token: bool = False, parse_bracket: bool = False 2312 ) -> t.Optional[exp.Join]: 2313 if self._match(TokenType.COMMA): 2314 return self.expression(exp.Join, this=self._parse_table()) 2315 2316 index = self._index 2317 method, side, kind = self._parse_join_parts() 2318 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2319 join = self._match(TokenType.JOIN) 2320 2321 if not skip_join_token and not join: 2322 self._retreat(index) 2323 kind = None 2324 method = None 2325 side = None 2326 2327 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2328 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2329 2330 if not skip_join_token and not join and not outer_apply and not cross_apply: 2331 return None 2332 2333 if outer_apply: 2334 side = Token(TokenType.LEFT, "LEFT") 2335 2336 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2337 2338 if method: 2339 kwargs["method"] = method.text 2340 if side: 2341 kwargs["side"] = side.text 2342 if kind: 2343 kwargs["kind"] = kind.text 2344 if hint: 2345 kwargs["hint"] = hint 2346 2347 if self._match(TokenType.ON): 2348 kwargs["on"] = self._parse_conjunction() 2349 elif self._match(TokenType.USING): 2350 kwargs["using"] = self._parse_wrapped_id_vars() 2351 elif not (kind and kind.token_type == TokenType.CROSS): 2352 index = self._index 2353 joins = self._parse_joins() 2354 2355 if joins and self._match(TokenType.ON): 2356 kwargs["on"] = self._parse_conjunction() 2357 elif joins and self._match(TokenType.USING): 2358 kwargs["using"] = self._parse_wrapped_id_vars() 2359 else: 2360 joins = None 2361 self._retreat(index) 2362 2363 kwargs["this"].set("joins", joins) 2364 2365 comments = [c for token in (method, side, kind) if token for c in token.comments] 2366 return self.expression(exp.Join, comments=comments, **kwargs) 2367 2368 def _parse_index( 2369 self, 2370 index: t.Optional[exp.Expression] = None, 2371 ) -> t.Optional[exp.Index]: 2372 if index: 2373 unique = None 2374 primary = None 2375 amp = None 2376 2377 self._match(TokenType.ON) 2378 self._match(TokenType.TABLE) # hive 2379 table = self._parse_table_parts(schema=True) 2380 else: 2381 unique = self._match(TokenType.UNIQUE) 2382 primary = self._match_text_seq("PRIMARY") 2383 amp = self._match_text_seq("AMP") 2384 2385 if not self._match(TokenType.INDEX): 2386 return None 2387 2388 index = self._parse_id_var() 2389 table = None 2390 2391 using = self._parse_field() if self._match(TokenType.USING) else None 2392 2393 if self._match(TokenType.L_PAREN, advance=False): 2394 columns = self._parse_wrapped_csv(self._parse_ordered) 2395 else: 2396 columns = None 2397 2398 return self.expression( 2399 exp.Index, 2400 this=index, 2401 table=table, 2402 using=using, 2403 columns=columns, 2404 unique=unique, 2405 primary=primary, 2406 amp=amp, 2407 partition_by=self._parse_partition_by(), 2408 ) 2409 2410 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2411 hints: t.List[exp.Expression] = [] 2412 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2413 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2414 hints.append( 2415 self.expression( 2416 exp.WithTableHint, 2417 expressions=self._parse_csv( 2418 lambda: self._parse_function() or self._parse_var(any_token=True) 2419 ), 2420 ) 2421 ) 2422 self._match_r_paren() 2423 else: 2424 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2425 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2426 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2427 2428 self._match_texts({"INDEX", "KEY"}) 2429 if self._match(TokenType.FOR): 2430 hint.set("target", self._advance_any() and self._prev.text.upper()) 2431 2432 hint.set("expressions", self._parse_wrapped_id_vars()) 2433 hints.append(hint) 2434 2435 return hints or None 2436 2437 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2438 return ( 2439 (not schema and self._parse_function(optional_parens=False)) 2440 or self._parse_id_var(any_token=False) 2441 or self._parse_string_as_identifier() 2442 or self._parse_placeholder() 2443 ) 2444 2445 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2446 catalog = None 2447 db = None 2448 table = self._parse_table_part(schema=schema) 2449 2450 while self._match(TokenType.DOT): 2451 if catalog: 2452 # This allows nesting the table in arbitrarily many dot expressions if needed 2453 table = self.expression( 2454 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2455 ) 2456 else: 2457 catalog = db 2458 db = table 2459 table = self._parse_table_part(schema=schema) 2460 2461 if not table: 2462 self.raise_error(f"Expected table name but got {self._curr}") 2463 2464 return self.expression( 2465 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2466 ) 2467 2468 def _parse_table( 2469 self, 2470 schema: bool = False, 2471 joins: bool = False, 2472 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2473 parse_bracket: bool = False, 2474 ) -> t.Optional[exp.Expression]: 2475 lateral = self._parse_lateral() 2476 if lateral: 2477 return lateral 2478 2479 unnest = self._parse_unnest() 2480 if unnest: 2481 return unnest 2482 2483 values = self._parse_derived_table_values() 2484 if values: 2485 return values 2486 2487 subquery = self._parse_select(table=True) 2488 if subquery: 2489 if not subquery.args.get("pivots"): 2490 subquery.set("pivots", self._parse_pivots()) 2491 return subquery 2492 2493 bracket = parse_bracket and self._parse_bracket(None) 2494 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2495 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2496 2497 if schema: 2498 return self._parse_schema(this=this) 2499 2500 if self.ALIAS_POST_TABLESAMPLE: 2501 table_sample = self._parse_table_sample() 2502 2503 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2504 if alias: 2505 this.set("alias", alias) 2506 2507 if not this.args.get("pivots"): 2508 this.set("pivots", self._parse_pivots()) 2509 2510 this.set("hints", self._parse_table_hints()) 2511 2512 if not self.ALIAS_POST_TABLESAMPLE: 2513 table_sample = self._parse_table_sample() 2514 2515 if table_sample: 2516 table_sample.set("this", this) 2517 this = table_sample 2518 2519 if joins: 2520 for join in iter(self._parse_join, None): 2521 this.append("joins", join) 2522 2523 return this 2524 2525 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2526 if not self._match(TokenType.UNNEST): 2527 return None 2528 2529 expressions = self._parse_wrapped_csv(self._parse_type) 2530 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2531 2532 alias = self._parse_table_alias() if with_alias else None 2533 2534 if alias and self.UNNEST_COLUMN_ONLY: 2535 if alias.args.get("columns"): 2536 self.raise_error("Unexpected extra column alias in unnest.") 2537 2538 alias.set("columns", [alias.this]) 2539 alias.set("this", None) 2540 2541 offset = None 2542 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2543 self._match(TokenType.ALIAS) 2544 offset = self._parse_id_var() or exp.to_identifier("offset") 2545 2546 return self.expression( 2547 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2548 ) 2549 2550 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2551 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2552 if not is_derived and not self._match(TokenType.VALUES): 2553 return None 2554 2555 expressions = self._parse_csv(self._parse_value) 2556 alias = self._parse_table_alias() 2557 2558 if is_derived: 2559 self._match_r_paren() 2560 2561 return self.expression( 2562 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2563 ) 2564 2565 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2566 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2567 as_modifier and self._match_text_seq("USING", "SAMPLE") 2568 ): 2569 return None 2570 2571 bucket_numerator = None 2572 bucket_denominator = None 2573 bucket_field = None 2574 percent = None 2575 rows = None 2576 size = None 2577 seed = None 2578 2579 kind = ( 2580 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2581 ) 2582 method = self._parse_var(tokens=(TokenType.ROW,)) 2583 2584 self._match(TokenType.L_PAREN) 2585 2586 num = self._parse_number() 2587 2588 if self._match_text_seq("BUCKET"): 2589 bucket_numerator = self._parse_number() 2590 self._match_text_seq("OUT", "OF") 2591 bucket_denominator = bucket_denominator = self._parse_number() 2592 self._match(TokenType.ON) 2593 bucket_field = self._parse_field() 2594 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2595 percent = num 2596 elif self._match(TokenType.ROWS): 2597 rows = num 2598 else: 2599 size = num 2600 2601 self._match(TokenType.R_PAREN) 2602 2603 if self._match(TokenType.L_PAREN): 2604 method = self._parse_var() 2605 seed = self._match(TokenType.COMMA) and self._parse_number() 2606 self._match_r_paren() 2607 elif self._match_texts(("SEED", "REPEATABLE")): 2608 seed = self._parse_wrapped(self._parse_number) 2609 2610 return self.expression( 2611 exp.TableSample, 2612 method=method, 2613 bucket_numerator=bucket_numerator, 2614 bucket_denominator=bucket_denominator, 2615 bucket_field=bucket_field, 2616 percent=percent, 2617 rows=rows, 2618 size=size, 2619 seed=seed, 2620 kind=kind, 2621 ) 2622 2623 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2624 return list(iter(self._parse_pivot, None)) or None 2625 2626 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2627 return list(iter(self._parse_join, None)) or None 2628 2629 # https://duckdb.org/docs/sql/statements/pivot 2630 def _parse_simplified_pivot(self) -> exp.Pivot: 2631 def _parse_on() -> t.Optional[exp.Expression]: 2632 this = self._parse_bitwise() 2633 return self._parse_in(this) if self._match(TokenType.IN) else this 2634 2635 this = self._parse_table() 2636 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2637 using = self._match(TokenType.USING) and self._parse_csv( 2638 lambda: self._parse_alias(self._parse_function()) 2639 ) 2640 group = self._parse_group() 2641 return self.expression( 2642 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2643 ) 2644 2645 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2646 index = self._index 2647 include_nulls = None 2648 2649 if self._match(TokenType.PIVOT): 2650 unpivot = False 2651 elif self._match(TokenType.UNPIVOT): 2652 unpivot = True 2653 2654 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2655 if self._match_text_seq("INCLUDE", "NULLS"): 2656 include_nulls = True 2657 elif self._match_text_seq("EXCLUDE", "NULLS"): 2658 include_nulls = False 2659 else: 2660 return None 2661 2662 expressions = [] 2663 field = None 2664 2665 if not self._match(TokenType.L_PAREN): 2666 self._retreat(index) 2667 return None 2668 2669 if unpivot: 2670 expressions = self._parse_csv(self._parse_column) 2671 else: 2672 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2673 2674 if not expressions: 2675 self.raise_error("Failed to parse PIVOT's aggregation list") 2676 2677 if not self._match(TokenType.FOR): 2678 self.raise_error("Expecting FOR") 2679 2680 value = self._parse_column() 2681 2682 if not self._match(TokenType.IN): 2683 self.raise_error("Expecting IN") 2684 2685 field = self._parse_in(value, alias=True) 2686 2687 self._match_r_paren() 2688 2689 pivot = self.expression( 2690 exp.Pivot, 2691 expressions=expressions, 2692 field=field, 2693 unpivot=unpivot, 2694 include_nulls=include_nulls, 2695 ) 2696 2697 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2698 pivot.set("alias", self._parse_table_alias()) 2699 2700 if not unpivot: 2701 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2702 2703 columns: t.List[exp.Expression] = [] 2704 for fld in pivot.args["field"].expressions: 2705 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2706 for name in names: 2707 if self.PREFIXED_PIVOT_COLUMNS: 2708 name = f"{name}_{field_name}" if name else field_name 2709 else: 2710 name = f"{field_name}_{name}" if name else field_name 2711 2712 columns.append(exp.to_identifier(name)) 2713 2714 pivot.set("columns", columns) 2715 2716 return pivot 2717 2718 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2719 return [agg.alias for agg in aggregations] 2720 2721 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2722 if not skip_where_token and not self._match(TokenType.WHERE): 2723 return None 2724 2725 return self.expression( 2726 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2727 ) 2728 2729 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2730 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2731 return None 2732 2733 elements = defaultdict(list) 2734 2735 if self._match(TokenType.ALL): 2736 return self.expression(exp.Group, all=True) 2737 2738 while True: 2739 expressions = self._parse_csv(self._parse_conjunction) 2740 if expressions: 2741 elements["expressions"].extend(expressions) 2742 2743 grouping_sets = self._parse_grouping_sets() 2744 if grouping_sets: 2745 elements["grouping_sets"].extend(grouping_sets) 2746 2747 rollup = None 2748 cube = None 2749 totals = None 2750 2751 with_ = self._match(TokenType.WITH) 2752 if self._match(TokenType.ROLLUP): 2753 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2754 elements["rollup"].extend(ensure_list(rollup)) 2755 2756 if self._match(TokenType.CUBE): 2757 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2758 elements["cube"].extend(ensure_list(cube)) 2759 2760 if self._match_text_seq("TOTALS"): 2761 totals = True 2762 elements["totals"] = True # type: ignore 2763 2764 if not (grouping_sets or rollup or cube or totals): 2765 break 2766 2767 return self.expression(exp.Group, **elements) # type: ignore 2768 2769 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2770 if not self._match(TokenType.GROUPING_SETS): 2771 return None 2772 2773 return self._parse_wrapped_csv(self._parse_grouping_set) 2774 2775 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2776 if self._match(TokenType.L_PAREN): 2777 grouping_set = self._parse_csv(self._parse_column) 2778 self._match_r_paren() 2779 return self.expression(exp.Tuple, expressions=grouping_set) 2780 2781 return self._parse_column() 2782 2783 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2784 if not skip_having_token and not self._match(TokenType.HAVING): 2785 return None 2786 return self.expression(exp.Having, this=self._parse_conjunction()) 2787 2788 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2789 if not self._match(TokenType.QUALIFY): 2790 return None 2791 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2792 2793 def _parse_order( 2794 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2795 ) -> t.Optional[exp.Expression]: 2796 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2797 return this 2798 2799 return self.expression( 2800 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2801 ) 2802 2803 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2804 if not self._match(token): 2805 return None 2806 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2807 2808 def _parse_ordered(self) -> exp.Ordered: 2809 this = self._parse_conjunction() 2810 self._match(TokenType.ASC) 2811 2812 is_desc = self._match(TokenType.DESC) 2813 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2814 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2815 desc = is_desc or False 2816 asc = not desc 2817 nulls_first = is_nulls_first or False 2818 explicitly_null_ordered = is_nulls_first or is_nulls_last 2819 2820 if ( 2821 not explicitly_null_ordered 2822 and ( 2823 (asc and self.NULL_ORDERING == "nulls_are_small") 2824 or (desc and self.NULL_ORDERING != "nulls_are_small") 2825 ) 2826 and self.NULL_ORDERING != "nulls_are_last" 2827 ): 2828 nulls_first = True 2829 2830 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2831 2832 def _parse_limit( 2833 self, this: t.Optional[exp.Expression] = None, top: bool = False 2834 ) -> t.Optional[exp.Expression]: 2835 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2836 comments = self._prev_comments 2837 if top: 2838 limit_paren = self._match(TokenType.L_PAREN) 2839 expression = self._parse_number() 2840 2841 if limit_paren: 2842 self._match_r_paren() 2843 else: 2844 expression = self._parse_term() 2845 2846 if self._match(TokenType.COMMA): 2847 offset = expression 2848 expression = self._parse_term() 2849 else: 2850 offset = None 2851 2852 limit_exp = self.expression( 2853 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2854 ) 2855 2856 return limit_exp 2857 2858 if self._match(TokenType.FETCH): 2859 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2860 direction = self._prev.text if direction else "FIRST" 2861 2862 count = self._parse_number() 2863 percent = self._match(TokenType.PERCENT) 2864 2865 self._match_set((TokenType.ROW, TokenType.ROWS)) 2866 2867 only = self._match_text_seq("ONLY") 2868 with_ties = self._match_text_seq("WITH", "TIES") 2869 2870 if only and with_ties: 2871 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2872 2873 return self.expression( 2874 exp.Fetch, 2875 direction=direction, 2876 count=count, 2877 percent=percent, 2878 with_ties=with_ties, 2879 ) 2880 2881 return this 2882 2883 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2884 if not self._match(TokenType.OFFSET): 2885 return this 2886 2887 count = self._parse_term() 2888 self._match_set((TokenType.ROW, TokenType.ROWS)) 2889 return self.expression(exp.Offset, this=this, expression=count) 2890 2891 def _parse_locks(self) -> t.List[exp.Lock]: 2892 locks = [] 2893 while True: 2894 if self._match_text_seq("FOR", "UPDATE"): 2895 update = True 2896 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2897 "LOCK", "IN", "SHARE", "MODE" 2898 ): 2899 update = False 2900 else: 2901 break 2902 2903 expressions = None 2904 if self._match_text_seq("OF"): 2905 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2906 2907 wait: t.Optional[bool | exp.Expression] = None 2908 if self._match_text_seq("NOWAIT"): 2909 wait = True 2910 elif self._match_text_seq("WAIT"): 2911 wait = self._parse_primary() 2912 elif self._match_text_seq("SKIP", "LOCKED"): 2913 wait = False 2914 2915 locks.append( 2916 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2917 ) 2918 2919 return locks 2920 2921 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2922 if not self._match_set(self.SET_OPERATIONS): 2923 return this 2924 2925 token_type = self._prev.token_type 2926 2927 if token_type == TokenType.UNION: 2928 expression = exp.Union 2929 elif token_type == TokenType.EXCEPT: 2930 expression = exp.Except 2931 else: 2932 expression = exp.Intersect 2933 2934 return self.expression( 2935 expression, 2936 this=this, 2937 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2938 expression=self._parse_set_operations(self._parse_select(nested=True)), 2939 ) 2940 2941 def _parse_expression(self) -> t.Optional[exp.Expression]: 2942 return self._parse_alias(self._parse_conjunction()) 2943 2944 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2945 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2946 2947 def _parse_equality(self) -> t.Optional[exp.Expression]: 2948 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2949 2950 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2951 return self._parse_tokens(self._parse_range, self.COMPARISON) 2952 2953 def _parse_range(self) -> t.Optional[exp.Expression]: 2954 this = self._parse_bitwise() 2955 negate = self._match(TokenType.NOT) 2956 2957 if self._match_set(self.RANGE_PARSERS): 2958 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2959 if not expression: 2960 return this 2961 2962 this = expression 2963 elif self._match(TokenType.ISNULL): 2964 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2965 2966 # Postgres supports ISNULL and NOTNULL for conditions. 2967 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2968 if self._match(TokenType.NOTNULL): 2969 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2970 this = self.expression(exp.Not, this=this) 2971 2972 if negate: 2973 this = self.expression(exp.Not, this=this) 2974 2975 if self._match(TokenType.IS): 2976 this = self._parse_is(this) 2977 2978 return this 2979 2980 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2981 index = self._index - 1 2982 negate = self._match(TokenType.NOT) 2983 2984 if self._match_text_seq("DISTINCT", "FROM"): 2985 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2986 return self.expression(klass, this=this, expression=self._parse_expression()) 2987 2988 expression = self._parse_null() or self._parse_boolean() 2989 if not expression: 2990 self._retreat(index) 2991 return None 2992 2993 this = self.expression(exp.Is, this=this, expression=expression) 2994 return self.expression(exp.Not, this=this) if negate else this 2995 2996 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2997 unnest = self._parse_unnest(with_alias=False) 2998 if unnest: 2999 this = self.expression(exp.In, this=this, unnest=unnest) 3000 elif self._match(TokenType.L_PAREN): 3001 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3002 3003 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3004 this = self.expression(exp.In, this=this, query=expressions[0]) 3005 else: 3006 this = self.expression(exp.In, this=this, expressions=expressions) 3007 3008 self._match_r_paren(this) 3009 else: 3010 this = self.expression(exp.In, this=this, field=self._parse_field()) 3011 3012 return this 3013 3014 def _parse_between(self, this: exp.Expression) -> exp.Between: 3015 low = self._parse_bitwise() 3016 self._match(TokenType.AND) 3017 high = self._parse_bitwise() 3018 return self.expression(exp.Between, this=this, low=low, high=high) 3019 3020 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3021 if not self._match(TokenType.ESCAPE): 3022 return this 3023 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3024 3025 def _parse_interval(self) -> t.Optional[exp.Interval]: 3026 if not self._match(TokenType.INTERVAL): 3027 return None 3028 3029 if self._match(TokenType.STRING, advance=False): 3030 this = self._parse_primary() 3031 else: 3032 this = self._parse_term() 3033 3034 unit = self._parse_function() or self._parse_var() 3035 3036 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3037 # each INTERVAL expression into this canonical form so it's easy to transpile 3038 if this and this.is_number: 3039 this = exp.Literal.string(this.name) 3040 elif this and this.is_string: 3041 parts = this.name.split() 3042 3043 if len(parts) == 2: 3044 if unit: 3045 # this is not actually a unit, it's something else 3046 unit = None 3047 self._retreat(self._index - 1) 3048 else: 3049 this = exp.Literal.string(parts[0]) 3050 unit = self.expression(exp.Var, this=parts[1]) 3051 3052 return self.expression(exp.Interval, this=this, unit=unit) 3053 3054 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3055 this = self._parse_term() 3056 3057 while True: 3058 if self._match_set(self.BITWISE): 3059 this = self.expression( 3060 self.BITWISE[self._prev.token_type], 3061 this=this, 3062 expression=self._parse_term(), 3063 ) 3064 elif self._match(TokenType.DQMARK): 3065 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3066 elif self._match_pair(TokenType.LT, TokenType.LT): 3067 this = self.expression( 3068 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3069 ) 3070 elif self._match_pair(TokenType.GT, TokenType.GT): 3071 this = self.expression( 3072 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3073 ) 3074 else: 3075 break 3076 3077 return this 3078 3079 def _parse_term(self) -> t.Optional[exp.Expression]: 3080 return self._parse_tokens(self._parse_factor, self.TERM) 3081 3082 def _parse_factor(self) -> t.Optional[exp.Expression]: 3083 return self._parse_tokens(self._parse_unary, self.FACTOR) 3084 3085 def _parse_unary(self) -> t.Optional[exp.Expression]: 3086 if self._match_set(self.UNARY_PARSERS): 3087 return self.UNARY_PARSERS[self._prev.token_type](self) 3088 return self._parse_at_time_zone(self._parse_type()) 3089 3090 def _parse_type(self) -> t.Optional[exp.Expression]: 3091 interval = self._parse_interval() 3092 if interval: 3093 return interval 3094 3095 index = self._index 3096 data_type = self._parse_types(check_func=True) 3097 this = self._parse_column() 3098 3099 if data_type: 3100 if isinstance(this, exp.Literal): 3101 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3102 if parser: 3103 return parser(self, this, data_type) 3104 return self.expression(exp.Cast, this=this, to=data_type) 3105 if not data_type.expressions: 3106 self._retreat(index) 3107 return self._parse_column() 3108 return self._parse_column_ops(data_type) 3109 3110 return this 3111 3112 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3113 this = self._parse_type() 3114 if not this: 3115 return None 3116 3117 return self.expression( 3118 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3119 ) 3120 3121 def _parse_types( 3122 self, check_func: bool = False, schema: bool = False 3123 ) -> t.Optional[exp.Expression]: 3124 index = self._index 3125 3126 prefix = self._match_text_seq("SYSUDTLIB", ".") 3127 3128 if not self._match_set(self.TYPE_TOKENS): 3129 return None 3130 3131 type_token = self._prev.token_type 3132 3133 if type_token == TokenType.PSEUDO_TYPE: 3134 return self.expression(exp.PseudoType, this=self._prev.text) 3135 3136 nested = type_token in self.NESTED_TYPE_TOKENS 3137 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3138 expressions = None 3139 maybe_func = False 3140 3141 if self._match(TokenType.L_PAREN): 3142 if is_struct: 3143 expressions = self._parse_csv(self._parse_struct_types) 3144 elif nested: 3145 expressions = self._parse_csv( 3146 lambda: self._parse_types(check_func=check_func, schema=schema) 3147 ) 3148 elif type_token in self.ENUM_TYPE_TOKENS: 3149 expressions = self._parse_csv(self._parse_equality) 3150 else: 3151 expressions = self._parse_csv(self._parse_type_size) 3152 3153 if not expressions or not self._match(TokenType.R_PAREN): 3154 self._retreat(index) 3155 return None 3156 3157 maybe_func = True 3158 3159 this: t.Optional[exp.Expression] = None 3160 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3161 3162 if nested and self._match(TokenType.LT): 3163 if is_struct: 3164 expressions = self._parse_csv(self._parse_struct_types) 3165 else: 3166 expressions = self._parse_csv( 3167 lambda: self._parse_types(check_func=check_func, schema=schema) 3168 ) 3169 3170 if not self._match(TokenType.GT): 3171 self.raise_error("Expecting >") 3172 3173 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3174 values = self._parse_csv(self._parse_conjunction) 3175 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3176 3177 if type_token in self.TIMESTAMPS: 3178 if self._match_text_seq("WITH", "TIME", "ZONE"): 3179 maybe_func = False 3180 tz_type = ( 3181 exp.DataType.Type.TIMETZ 3182 if type_token in self.TIMES 3183 else exp.DataType.Type.TIMESTAMPTZ 3184 ) 3185 this = exp.DataType(this=tz_type, expressions=expressions) 3186 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3187 maybe_func = False 3188 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3189 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3190 maybe_func = False 3191 elif type_token == TokenType.INTERVAL: 3192 if self._match_text_seq("YEAR", "TO", "MONTH"): 3193 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3194 elif self._match_text_seq("DAY", "TO", "SECOND"): 3195 span = [exp.IntervalDayToSecondSpan()] 3196 else: 3197 span = None 3198 3199 unit = not span and self._parse_var() 3200 if not unit: 3201 this = self.expression( 3202 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3203 ) 3204 else: 3205 this = self.expression(exp.Interval, unit=unit) 3206 3207 if maybe_func and check_func: 3208 index2 = self._index 3209 peek = self._parse_string() 3210 3211 if not peek: 3212 self._retreat(index) 3213 return None 3214 3215 self._retreat(index2) 3216 3217 if not this: 3218 this = exp.DataType( 3219 this=exp.DataType.Type[type_token.value], 3220 expressions=expressions, 3221 nested=nested, 3222 values=values, 3223 prefix=prefix, 3224 ) 3225 3226 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3227 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3228 3229 return this 3230 3231 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3232 this = self._parse_type() or self._parse_id_var() 3233 self._match(TokenType.COLON) 3234 return self._parse_column_def(this) 3235 3236 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3237 if not self._match_text_seq("AT", "TIME", "ZONE"): 3238 return this 3239 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3240 3241 def _parse_column(self) -> t.Optional[exp.Expression]: 3242 this = self._parse_field() 3243 if isinstance(this, exp.Identifier): 3244 this = self.expression(exp.Column, this=this) 3245 elif not this: 3246 return self._parse_bracket(this) 3247 return self._parse_column_ops(this) 3248 3249 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3250 this = self._parse_bracket(this) 3251 3252 while self._match_set(self.COLUMN_OPERATORS): 3253 op_token = self._prev.token_type 3254 op = self.COLUMN_OPERATORS.get(op_token) 3255 3256 if op_token == TokenType.DCOLON: 3257 field = self._parse_types() 3258 if not field: 3259 self.raise_error("Expected type") 3260 elif op and self._curr: 3261 self._advance() 3262 value = self._prev.text 3263 field = ( 3264 exp.Literal.number(value) 3265 if self._prev.token_type == TokenType.NUMBER 3266 else exp.Literal.string(value) 3267 ) 3268 else: 3269 field = self._parse_field(anonymous_func=True, any_token=True) 3270 3271 if isinstance(field, exp.Func): 3272 # bigquery allows function calls like x.y.count(...) 3273 # SAFE.SUBSTR(...) 3274 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3275 this = self._replace_columns_with_dots(this) 3276 3277 if op: 3278 this = op(self, this, field) 3279 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3280 this = self.expression( 3281 exp.Column, 3282 this=field, 3283 table=this.this, 3284 db=this.args.get("table"), 3285 catalog=this.args.get("db"), 3286 ) 3287 else: 3288 this = self.expression(exp.Dot, this=this, expression=field) 3289 this = self._parse_bracket(this) 3290 return this 3291 3292 def _parse_primary(self) -> t.Optional[exp.Expression]: 3293 if self._match_set(self.PRIMARY_PARSERS): 3294 token_type = self._prev.token_type 3295 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3296 3297 if token_type == TokenType.STRING: 3298 expressions = [primary] 3299 while self._match(TokenType.STRING): 3300 expressions.append(exp.Literal.string(self._prev.text)) 3301 3302 if len(expressions) > 1: 3303 return self.expression(exp.Concat, expressions=expressions) 3304 3305 return primary 3306 3307 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3308 return exp.Literal.number(f"0.{self._prev.text}") 3309 3310 if self._match(TokenType.L_PAREN): 3311 comments = self._prev_comments 3312 query = self._parse_select() 3313 3314 if query: 3315 expressions = [query] 3316 else: 3317 expressions = self._parse_expressions() 3318 3319 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3320 3321 if isinstance(this, exp.Subqueryable): 3322 this = self._parse_set_operations( 3323 self._parse_subquery(this=this, parse_alias=False) 3324 ) 3325 elif len(expressions) > 1: 3326 this = self.expression(exp.Tuple, expressions=expressions) 3327 else: 3328 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3329 3330 if this: 3331 this.add_comments(comments) 3332 3333 self._match_r_paren(expression=this) 3334 return this 3335 3336 return None 3337 3338 def _parse_field( 3339 self, 3340 any_token: bool = False, 3341 tokens: t.Optional[t.Collection[TokenType]] = None, 3342 anonymous_func: bool = False, 3343 ) -> t.Optional[exp.Expression]: 3344 return ( 3345 self._parse_primary() 3346 or self._parse_function(anonymous=anonymous_func) 3347 or self._parse_id_var(any_token=any_token, tokens=tokens) 3348 ) 3349 3350 def _parse_function( 3351 self, 3352 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3353 anonymous: bool = False, 3354 optional_parens: bool = True, 3355 ) -> t.Optional[exp.Expression]: 3356 if not self._curr: 3357 return None 3358 3359 token_type = self._curr.token_type 3360 this = self._curr.text 3361 upper = this.upper() 3362 3363 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3364 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3365 self._advance() 3366 return parser(self) 3367 3368 if not self._next or self._next.token_type != TokenType.L_PAREN: 3369 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3370 self._advance() 3371 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3372 3373 return None 3374 3375 if token_type not in self.FUNC_TOKENS: 3376 return None 3377 3378 self._advance(2) 3379 3380 parser = self.FUNCTION_PARSERS.get(upper) 3381 if parser and not anonymous: 3382 this = parser(self) 3383 else: 3384 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3385 3386 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3387 this = self.expression(subquery_predicate, this=self._parse_select()) 3388 self._match_r_paren() 3389 return this 3390 3391 if functions is None: 3392 functions = self.FUNCTIONS 3393 3394 function = functions.get(upper) 3395 3396 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3397 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3398 3399 if function and not anonymous: 3400 func = self.validate_expression(function(args), args) 3401 if not self.NORMALIZE_FUNCTIONS: 3402 func.meta["name"] = this 3403 this = func 3404 else: 3405 this = self.expression(exp.Anonymous, this=this, expressions=args) 3406 3407 self._match_r_paren(this) 3408 return self._parse_window(this) 3409 3410 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3411 return self._parse_column_def(self._parse_id_var()) 3412 3413 def _parse_user_defined_function( 3414 self, kind: t.Optional[TokenType] = None 3415 ) -> t.Optional[exp.Expression]: 3416 this = self._parse_id_var() 3417 3418 while self._match(TokenType.DOT): 3419 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3420 3421 if not self._match(TokenType.L_PAREN): 3422 return this 3423 3424 expressions = self._parse_csv(self._parse_function_parameter) 3425 self._match_r_paren() 3426 return self.expression( 3427 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3428 ) 3429 3430 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3431 literal = self._parse_primary() 3432 if literal: 3433 return self.expression(exp.Introducer, this=token.text, expression=literal) 3434 3435 return self.expression(exp.Identifier, this=token.text) 3436 3437 def _parse_session_parameter(self) -> exp.SessionParameter: 3438 kind = None 3439 this = self._parse_id_var() or self._parse_primary() 3440 3441 if this and self._match(TokenType.DOT): 3442 kind = this.name 3443 this = self._parse_var() or self._parse_primary() 3444 3445 return self.expression(exp.SessionParameter, this=this, kind=kind) 3446 3447 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3448 index = self._index 3449 3450 if self._match(TokenType.L_PAREN): 3451 expressions = self._parse_csv(self._parse_id_var) 3452 3453 if not self._match(TokenType.R_PAREN): 3454 self._retreat(index) 3455 else: 3456 expressions = [self._parse_id_var()] 3457 3458 if self._match_set(self.LAMBDAS): 3459 return self.LAMBDAS[self._prev.token_type](self, expressions) 3460 3461 self._retreat(index) 3462 3463 this: t.Optional[exp.Expression] 3464 3465 if self._match(TokenType.DISTINCT): 3466 this = self.expression( 3467 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3468 ) 3469 else: 3470 this = self._parse_select_or_expression(alias=alias) 3471 3472 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3473 3474 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3475 index = self._index 3476 3477 if not self.errors: 3478 try: 3479 if self._parse_select(nested=True): 3480 return this 3481 except ParseError: 3482 pass 3483 finally: 3484 self.errors.clear() 3485 self._retreat(index) 3486 3487 if not self._match(TokenType.L_PAREN): 3488 return this 3489 3490 args = self._parse_csv( 3491 lambda: self._parse_constraint() 3492 or self._parse_column_def(self._parse_field(any_token=True)) 3493 ) 3494 3495 self._match_r_paren() 3496 return self.expression(exp.Schema, this=this, expressions=args) 3497 3498 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3499 # column defs are not really columns, they're identifiers 3500 if isinstance(this, exp.Column): 3501 this = this.this 3502 3503 kind = self._parse_types(schema=True) 3504 3505 if self._match_text_seq("FOR", "ORDINALITY"): 3506 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3507 3508 constraints = [] 3509 while True: 3510 constraint = self._parse_column_constraint() 3511 if not constraint: 3512 break 3513 constraints.append(constraint) 3514 3515 if not kind and not constraints: 3516 return this 3517 3518 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3519 3520 def _parse_auto_increment( 3521 self, 3522 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3523 start = None 3524 increment = None 3525 3526 if self._match(TokenType.L_PAREN, advance=False): 3527 args = self._parse_wrapped_csv(self._parse_bitwise) 3528 start = seq_get(args, 0) 3529 increment = seq_get(args, 1) 3530 elif self._match_text_seq("START"): 3531 start = self._parse_bitwise() 3532 self._match_text_seq("INCREMENT") 3533 increment = self._parse_bitwise() 3534 3535 if start and increment: 3536 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3537 3538 return exp.AutoIncrementColumnConstraint() 3539 3540 def _parse_compress(self) -> exp.CompressColumnConstraint: 3541 if self._match(TokenType.L_PAREN, advance=False): 3542 return self.expression( 3543 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3544 ) 3545 3546 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3547 3548 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3549 if self._match_text_seq("BY", "DEFAULT"): 3550 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3551 this = self.expression( 3552 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3553 ) 3554 else: 3555 self._match_text_seq("ALWAYS") 3556 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3557 3558 self._match(TokenType.ALIAS) 3559 identity = self._match_text_seq("IDENTITY") 3560 3561 if self._match(TokenType.L_PAREN): 3562 if self._match_text_seq("START", "WITH"): 3563 this.set("start", self._parse_bitwise()) 3564 if self._match_text_seq("INCREMENT", "BY"): 3565 this.set("increment", self._parse_bitwise()) 3566 if self._match_text_seq("MINVALUE"): 3567 this.set("minvalue", self._parse_bitwise()) 3568 if self._match_text_seq("MAXVALUE"): 3569 this.set("maxvalue", self._parse_bitwise()) 3570 3571 if self._match_text_seq("CYCLE"): 3572 this.set("cycle", True) 3573 elif self._match_text_seq("NO", "CYCLE"): 3574 this.set("cycle", False) 3575 3576 if not identity: 3577 this.set("expression", self._parse_bitwise()) 3578 3579 self._match_r_paren() 3580 3581 return this 3582 3583 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3584 self._match_text_seq("LENGTH") 3585 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3586 3587 def _parse_not_constraint( 3588 self, 3589 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3590 if self._match_text_seq("NULL"): 3591 return self.expression(exp.NotNullColumnConstraint) 3592 if self._match_text_seq("CASESPECIFIC"): 3593 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3594 return None 3595 3596 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3597 if self._match(TokenType.CONSTRAINT): 3598 this = self._parse_id_var() 3599 else: 3600 this = None 3601 3602 if self._match_texts(self.CONSTRAINT_PARSERS): 3603 return self.expression( 3604 exp.ColumnConstraint, 3605 this=this, 3606 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3607 ) 3608 3609 return this 3610 3611 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3612 if not self._match(TokenType.CONSTRAINT): 3613 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3614 3615 this = self._parse_id_var() 3616 expressions = [] 3617 3618 while True: 3619 constraint = self._parse_unnamed_constraint() or self._parse_function() 3620 if not constraint: 3621 break 3622 expressions.append(constraint) 3623 3624 return self.expression(exp.Constraint, this=this, expressions=expressions) 3625 3626 def _parse_unnamed_constraint( 3627 self, constraints: t.Optional[t.Collection[str]] = None 3628 ) -> t.Optional[exp.Expression]: 3629 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3630 return None 3631 3632 constraint = self._prev.text.upper() 3633 if constraint not in self.CONSTRAINT_PARSERS: 3634 self.raise_error(f"No parser found for schema constraint {constraint}.") 3635 3636 return self.CONSTRAINT_PARSERS[constraint](self) 3637 3638 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3639 self._match_text_seq("KEY") 3640 return self.expression( 3641 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3642 ) 3643 3644 def _parse_key_constraint_options(self) -> t.List[str]: 3645 options = [] 3646 while True: 3647 if not self._curr: 3648 break 3649 3650 if self._match(TokenType.ON): 3651 action = None 3652 on = self._advance_any() and self._prev.text 3653 3654 if self._match_text_seq("NO", "ACTION"): 3655 action = "NO ACTION" 3656 elif self._match_text_seq("CASCADE"): 3657 action = "CASCADE" 3658 elif self._match_pair(TokenType.SET, TokenType.NULL): 3659 action = "SET NULL" 3660 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3661 action = "SET DEFAULT" 3662 else: 3663 self.raise_error("Invalid key constraint") 3664 3665 options.append(f"ON {on} {action}") 3666 elif self._match_text_seq("NOT", "ENFORCED"): 3667 options.append("NOT ENFORCED") 3668 elif self._match_text_seq("DEFERRABLE"): 3669 options.append("DEFERRABLE") 3670 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3671 options.append("INITIALLY DEFERRED") 3672 elif self._match_text_seq("NORELY"): 3673 options.append("NORELY") 3674 elif self._match_text_seq("MATCH", "FULL"): 3675 options.append("MATCH FULL") 3676 else: 3677 break 3678 3679 return options 3680 3681 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3682 if match and not self._match(TokenType.REFERENCES): 3683 return None 3684 3685 expressions = None 3686 this = self._parse_table(schema=True) 3687 options = self._parse_key_constraint_options() 3688 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3689 3690 def _parse_foreign_key(self) -> exp.ForeignKey: 3691 expressions = self._parse_wrapped_id_vars() 3692 reference = self._parse_references() 3693 options = {} 3694 3695 while self._match(TokenType.ON): 3696 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3697 self.raise_error("Expected DELETE or UPDATE") 3698 3699 kind = self._prev.text.lower() 3700 3701 if self._match_text_seq("NO", "ACTION"): 3702 action = "NO ACTION" 3703 elif self._match(TokenType.SET): 3704 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3705 action = "SET " + self._prev.text.upper() 3706 else: 3707 self._advance() 3708 action = self._prev.text.upper() 3709 3710 options[kind] = action 3711 3712 return self.expression( 3713 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3714 ) 3715 3716 def _parse_primary_key( 3717 self, wrapped_optional: bool = False, in_props: bool = False 3718 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3719 desc = ( 3720 self._match_set((TokenType.ASC, TokenType.DESC)) 3721 and self._prev.token_type == TokenType.DESC 3722 ) 3723 3724 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3725 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3726 3727 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3728 options = self._parse_key_constraint_options() 3729 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3730 3731 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3732 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3733 return this 3734 3735 bracket_kind = self._prev.token_type 3736 3737 if self._match(TokenType.COLON): 3738 expressions: t.List[t.Optional[exp.Expression]] = [ 3739 self.expression(exp.Slice, expression=self._parse_conjunction()) 3740 ] 3741 else: 3742 expressions = self._parse_csv( 3743 lambda: self._parse_slice( 3744 self._parse_alias(self._parse_conjunction(), explicit=True) 3745 ) 3746 ) 3747 3748 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3749 if bracket_kind == TokenType.L_BRACE: 3750 this = self.expression(exp.Struct, expressions=expressions) 3751 elif not this or this.name.upper() == "ARRAY": 3752 this = self.expression(exp.Array, expressions=expressions) 3753 else: 3754 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3755 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3756 3757 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3758 self.raise_error("Expected ]") 3759 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3760 self.raise_error("Expected }") 3761 3762 self._add_comments(this) 3763 return self._parse_bracket(this) 3764 3765 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3766 if self._match(TokenType.COLON): 3767 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3768 return this 3769 3770 def _parse_case(self) -> t.Optional[exp.Expression]: 3771 ifs = [] 3772 default = None 3773 3774 comments = self._prev_comments 3775 expression = self._parse_conjunction() 3776 3777 while self._match(TokenType.WHEN): 3778 this = self._parse_conjunction() 3779 self._match(TokenType.THEN) 3780 then = self._parse_conjunction() 3781 ifs.append(self.expression(exp.If, this=this, true=then)) 3782 3783 if self._match(TokenType.ELSE): 3784 default = self._parse_conjunction() 3785 3786 if not self._match(TokenType.END): 3787 self.raise_error("Expected END after CASE", self._prev) 3788 3789 return self._parse_window( 3790 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3791 ) 3792 3793 def _parse_if(self) -> t.Optional[exp.Expression]: 3794 if self._match(TokenType.L_PAREN): 3795 args = self._parse_csv(self._parse_conjunction) 3796 this = self.validate_expression(exp.If.from_arg_list(args), args) 3797 self._match_r_paren() 3798 else: 3799 index = self._index - 1 3800 condition = self._parse_conjunction() 3801 3802 if not condition: 3803 self._retreat(index) 3804 return None 3805 3806 self._match(TokenType.THEN) 3807 true = self._parse_conjunction() 3808 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3809 self._match(TokenType.END) 3810 this = self.expression(exp.If, this=condition, true=true, false=false) 3811 3812 return self._parse_window(this) 3813 3814 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3815 if not self._match_text_seq("VALUE", "FOR"): 3816 self._retreat(self._index - 1) 3817 return None 3818 3819 return self.expression( 3820 exp.NextValueFor, 3821 this=self._parse_column(), 3822 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3823 ) 3824 3825 def _parse_extract(self) -> exp.Extract: 3826 this = self._parse_function() or self._parse_var() or self._parse_type() 3827 3828 if self._match(TokenType.FROM): 3829 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3830 3831 if not self._match(TokenType.COMMA): 3832 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3833 3834 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3835 3836 def _parse_any_value(self) -> exp.AnyValue: 3837 this = self._parse_lambda() 3838 is_max = None 3839 having = None 3840 3841 if self._match(TokenType.HAVING): 3842 self._match_texts(("MAX", "MIN")) 3843 is_max = self._prev.text == "MAX" 3844 having = self._parse_column() 3845 3846 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3847 3848 def _parse_cast(self, strict: bool) -> exp.Expression: 3849 this = self._parse_conjunction() 3850 3851 if not self._match(TokenType.ALIAS): 3852 if self._match(TokenType.COMMA): 3853 return self.expression( 3854 exp.CastToStrType, this=this, expression=self._parse_string() 3855 ) 3856 else: 3857 self.raise_error("Expected AS after CAST") 3858 3859 fmt = None 3860 to = self._parse_types() 3861 3862 if not to: 3863 self.raise_error("Expected TYPE after CAST") 3864 elif to.this == exp.DataType.Type.CHAR: 3865 if self._match(TokenType.CHARACTER_SET): 3866 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3867 elif self._match(TokenType.FORMAT): 3868 fmt_string = self._parse_string() 3869 fmt = self._parse_at_time_zone(fmt_string) 3870 3871 if to.this in exp.DataType.TEMPORAL_TYPES: 3872 this = self.expression( 3873 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3874 this=this, 3875 format=exp.Literal.string( 3876 format_time( 3877 fmt_string.this if fmt_string else "", 3878 self.FORMAT_MAPPING or self.TIME_MAPPING, 3879 self.FORMAT_TRIE or self.TIME_TRIE, 3880 ) 3881 ), 3882 ) 3883 3884 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3885 this.set("zone", fmt.args["zone"]) 3886 3887 return this 3888 3889 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3890 3891 def _parse_concat(self) -> t.Optional[exp.Expression]: 3892 args = self._parse_csv(self._parse_conjunction) 3893 if self.CONCAT_NULL_OUTPUTS_STRING: 3894 args = [ 3895 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3896 for arg in args 3897 if arg 3898 ] 3899 3900 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3901 # we find such a call we replace it with its argument. 3902 if len(args) == 1: 3903 return args[0] 3904 3905 return self.expression( 3906 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3907 ) 3908 3909 def _parse_string_agg(self) -> exp.Expression: 3910 if self._match(TokenType.DISTINCT): 3911 args: t.List[t.Optional[exp.Expression]] = [ 3912 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3913 ] 3914 if self._match(TokenType.COMMA): 3915 args.extend(self._parse_csv(self._parse_conjunction)) 3916 else: 3917 args = self._parse_csv(self._parse_conjunction) 3918 3919 index = self._index 3920 if not self._match(TokenType.R_PAREN) and args: 3921 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3922 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3923 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3924 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3925 3926 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3927 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3928 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3929 if not self._match_text_seq("WITHIN", "GROUP"): 3930 self._retreat(index) 3931 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3932 3933 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3934 order = self._parse_order(this=seq_get(args, 0)) 3935 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3936 3937 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3938 this = self._parse_bitwise() 3939 3940 if self._match(TokenType.USING): 3941 to: t.Optional[exp.Expression] = self.expression( 3942 exp.CharacterSet, this=self._parse_var() 3943 ) 3944 elif self._match(TokenType.COMMA): 3945 to = self._parse_types() 3946 else: 3947 to = None 3948 3949 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3950 3951 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3952 """ 3953 There are generally two variants of the DECODE function: 3954 3955 - DECODE(bin, charset) 3956 - DECODE(expression, search, result [, search, result] ... [, default]) 3957 3958 The second variant will always be parsed into a CASE expression. Note that NULL 3959 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3960 instead of relying on pattern matching. 3961 """ 3962 args = self._parse_csv(self._parse_conjunction) 3963 3964 if len(args) < 3: 3965 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3966 3967 expression, *expressions = args 3968 if not expression: 3969 return None 3970 3971 ifs = [] 3972 for search, result in zip(expressions[::2], expressions[1::2]): 3973 if not search or not result: 3974 return None 3975 3976 if isinstance(search, exp.Literal): 3977 ifs.append( 3978 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3979 ) 3980 elif isinstance(search, exp.Null): 3981 ifs.append( 3982 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3983 ) 3984 else: 3985 cond = exp.or_( 3986 exp.EQ(this=expression.copy(), expression=search), 3987 exp.and_( 3988 exp.Is(this=expression.copy(), expression=exp.Null()), 3989 exp.Is(this=search.copy(), expression=exp.Null()), 3990 copy=False, 3991 ), 3992 copy=False, 3993 ) 3994 ifs.append(exp.If(this=cond, true=result)) 3995 3996 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3997 3998 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3999 self._match_text_seq("KEY") 4000 key = self._parse_field() 4001 self._match(TokenType.COLON) 4002 self._match_text_seq("VALUE") 4003 value = self._parse_field() 4004 4005 if not key and not value: 4006 return None 4007 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4008 4009 def _parse_json_object(self) -> exp.JSONObject: 4010 star = self._parse_star() 4011 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4012 4013 null_handling = None 4014 if self._match_text_seq("NULL", "ON", "NULL"): 4015 null_handling = "NULL ON NULL" 4016 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4017 null_handling = "ABSENT ON NULL" 4018 4019 unique_keys = None 4020 if self._match_text_seq("WITH", "UNIQUE"): 4021 unique_keys = True 4022 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4023 unique_keys = False 4024 4025 self._match_text_seq("KEYS") 4026 4027 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4028 format_json = self._match_text_seq("FORMAT", "JSON") 4029 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4030 4031 return self.expression( 4032 exp.JSONObject, 4033 expressions=expressions, 4034 null_handling=null_handling, 4035 unique_keys=unique_keys, 4036 return_type=return_type, 4037 format_json=format_json, 4038 encoding=encoding, 4039 ) 4040 4041 def _parse_logarithm(self) -> exp.Func: 4042 # Default argument order is base, expression 4043 args = self._parse_csv(self._parse_range) 4044 4045 if len(args) > 1: 4046 if not self.LOG_BASE_FIRST: 4047 args.reverse() 4048 return exp.Log.from_arg_list(args) 4049 4050 return self.expression( 4051 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4052 ) 4053 4054 def _parse_match_against(self) -> exp.MatchAgainst: 4055 expressions = self._parse_csv(self._parse_column) 4056 4057 self._match_text_seq(")", "AGAINST", "(") 4058 4059 this = self._parse_string() 4060 4061 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4062 modifier = "IN NATURAL LANGUAGE MODE" 4063 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4064 modifier = f"{modifier} WITH QUERY EXPANSION" 4065 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4066 modifier = "IN BOOLEAN MODE" 4067 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4068 modifier = "WITH QUERY EXPANSION" 4069 else: 4070 modifier = None 4071 4072 return self.expression( 4073 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4074 ) 4075 4076 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4077 def _parse_open_json(self) -> exp.OpenJSON: 4078 this = self._parse_bitwise() 4079 path = self._match(TokenType.COMMA) and self._parse_string() 4080 4081 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4082 this = self._parse_field(any_token=True) 4083 kind = self._parse_types() 4084 path = self._parse_string() 4085 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4086 4087 return self.expression( 4088 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4089 ) 4090 4091 expressions = None 4092 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4093 self._match_l_paren() 4094 expressions = self._parse_csv(_parse_open_json_column_def) 4095 4096 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4097 4098 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4099 args = self._parse_csv(self._parse_bitwise) 4100 4101 if self._match(TokenType.IN): 4102 return self.expression( 4103 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4104 ) 4105 4106 if haystack_first: 4107 haystack = seq_get(args, 0) 4108 needle = seq_get(args, 1) 4109 else: 4110 needle = seq_get(args, 0) 4111 haystack = seq_get(args, 1) 4112 4113 return self.expression( 4114 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4115 ) 4116 4117 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4118 args = self._parse_csv(self._parse_table) 4119 return exp.JoinHint(this=func_name.upper(), expressions=args) 4120 4121 def _parse_substring(self) -> exp.Substring: 4122 # Postgres supports the form: substring(string [from int] [for int]) 4123 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4124 4125 args = self._parse_csv(self._parse_bitwise) 4126 4127 if self._match(TokenType.FROM): 4128 args.append(self._parse_bitwise()) 4129 if self._match(TokenType.FOR): 4130 args.append(self._parse_bitwise()) 4131 4132 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4133 4134 def _parse_trim(self) -> exp.Trim: 4135 # https://www.w3resource.com/sql/character-functions/trim.php 4136 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4137 4138 position = None 4139 collation = None 4140 4141 if self._match_texts(self.TRIM_TYPES): 4142 position = self._prev.text.upper() 4143 4144 expression = self._parse_bitwise() 4145 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4146 this = self._parse_bitwise() 4147 else: 4148 this = expression 4149 expression = None 4150 4151 if self._match(TokenType.COLLATE): 4152 collation = self._parse_bitwise() 4153 4154 return self.expression( 4155 exp.Trim, this=this, position=position, expression=expression, collation=collation 4156 ) 4157 4158 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4159 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4160 4161 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4162 return self._parse_window(self._parse_id_var(), alias=True) 4163 4164 def _parse_respect_or_ignore_nulls( 4165 self, this: t.Optional[exp.Expression] 4166 ) -> t.Optional[exp.Expression]: 4167 if self._match_text_seq("IGNORE", "NULLS"): 4168 return self.expression(exp.IgnoreNulls, this=this) 4169 if self._match_text_seq("RESPECT", "NULLS"): 4170 return self.expression(exp.RespectNulls, this=this) 4171 return this 4172 4173 def _parse_window( 4174 self, this: t.Optional[exp.Expression], alias: bool = False 4175 ) -> t.Optional[exp.Expression]: 4176 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4177 self._match(TokenType.WHERE) 4178 this = self.expression( 4179 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4180 ) 4181 self._match_r_paren() 4182 4183 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4184 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4185 if self._match_text_seq("WITHIN", "GROUP"): 4186 order = self._parse_wrapped(self._parse_order) 4187 this = self.expression(exp.WithinGroup, this=this, expression=order) 4188 4189 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4190 # Some dialects choose to implement and some do not. 4191 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4192 4193 # There is some code above in _parse_lambda that handles 4194 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4195 4196 # The below changes handle 4197 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4198 4199 # Oracle allows both formats 4200 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4201 # and Snowflake chose to do the same for familiarity 4202 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4203 this = self._parse_respect_or_ignore_nulls(this) 4204 4205 # bigquery select from window x AS (partition by ...) 4206 if alias: 4207 over = None 4208 self._match(TokenType.ALIAS) 4209 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4210 return this 4211 else: 4212 over = self._prev.text.upper() 4213 4214 if not self._match(TokenType.L_PAREN): 4215 return self.expression( 4216 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4217 ) 4218 4219 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4220 4221 first = self._match(TokenType.FIRST) 4222 if self._match_text_seq("LAST"): 4223 first = False 4224 4225 partition = self._parse_partition_by() 4226 order = self._parse_order() 4227 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4228 4229 if kind: 4230 self._match(TokenType.BETWEEN) 4231 start = self._parse_window_spec() 4232 self._match(TokenType.AND) 4233 end = self._parse_window_spec() 4234 4235 spec = self.expression( 4236 exp.WindowSpec, 4237 kind=kind, 4238 start=start["value"], 4239 start_side=start["side"], 4240 end=end["value"], 4241 end_side=end["side"], 4242 ) 4243 else: 4244 spec = None 4245 4246 self._match_r_paren() 4247 4248 window = self.expression( 4249 exp.Window, 4250 this=this, 4251 partition_by=partition, 4252 order=order, 4253 spec=spec, 4254 alias=window_alias, 4255 over=over, 4256 first=first, 4257 ) 4258 4259 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4260 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4261 return self._parse_window(window, alias=alias) 4262 4263 return window 4264 4265 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4266 self._match(TokenType.BETWEEN) 4267 4268 return { 4269 "value": ( 4270 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4271 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4272 or self._parse_bitwise() 4273 ), 4274 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4275 } 4276 4277 def _parse_alias( 4278 self, this: t.Optional[exp.Expression], explicit: bool = False 4279 ) -> t.Optional[exp.Expression]: 4280 any_token = self._match(TokenType.ALIAS) 4281 4282 if explicit and not any_token: 4283 return this 4284 4285 if self._match(TokenType.L_PAREN): 4286 aliases = self.expression( 4287 exp.Aliases, 4288 this=this, 4289 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4290 ) 4291 self._match_r_paren(aliases) 4292 return aliases 4293 4294 alias = self._parse_id_var(any_token) 4295 4296 if alias: 4297 return self.expression(exp.Alias, this=this, alias=alias) 4298 4299 return this 4300 4301 def _parse_id_var( 4302 self, 4303 any_token: bool = True, 4304 tokens: t.Optional[t.Collection[TokenType]] = None, 4305 ) -> t.Optional[exp.Expression]: 4306 identifier = self._parse_identifier() 4307 4308 if identifier: 4309 return identifier 4310 4311 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4312 quoted = self._prev.token_type == TokenType.STRING 4313 return exp.Identifier(this=self._prev.text, quoted=quoted) 4314 4315 return None 4316 4317 def _parse_string(self) -> t.Optional[exp.Expression]: 4318 if self._match(TokenType.STRING): 4319 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4320 return self._parse_placeholder() 4321 4322 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4323 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4324 4325 def _parse_number(self) -> t.Optional[exp.Expression]: 4326 if self._match(TokenType.NUMBER): 4327 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4328 return self._parse_placeholder() 4329 4330 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4331 if self._match(TokenType.IDENTIFIER): 4332 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4333 return self._parse_placeholder() 4334 4335 def _parse_var( 4336 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4337 ) -> t.Optional[exp.Expression]: 4338 if ( 4339 (any_token and self._advance_any()) 4340 or self._match(TokenType.VAR) 4341 or (self._match_set(tokens) if tokens else False) 4342 ): 4343 return self.expression(exp.Var, this=self._prev.text) 4344 return self._parse_placeholder() 4345 4346 def _advance_any(self) -> t.Optional[Token]: 4347 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4348 self._advance() 4349 return self._prev 4350 return None 4351 4352 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4353 return self._parse_var() or self._parse_string() 4354 4355 def _parse_null(self) -> t.Optional[exp.Expression]: 4356 if self._match(TokenType.NULL): 4357 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4358 return self._parse_placeholder() 4359 4360 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4361 if self._match(TokenType.TRUE): 4362 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4363 if self._match(TokenType.FALSE): 4364 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4365 return self._parse_placeholder() 4366 4367 def _parse_star(self) -> t.Optional[exp.Expression]: 4368 if self._match(TokenType.STAR): 4369 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4370 return self._parse_placeholder() 4371 4372 def _parse_parameter(self) -> exp.Parameter: 4373 wrapped = self._match(TokenType.L_BRACE) 4374 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4375 self._match(TokenType.R_BRACE) 4376 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4377 4378 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4379 if self._match_set(self.PLACEHOLDER_PARSERS): 4380 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4381 if placeholder: 4382 return placeholder 4383 self._advance(-1) 4384 return None 4385 4386 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4387 if not self._match(TokenType.EXCEPT): 4388 return None 4389 if self._match(TokenType.L_PAREN, advance=False): 4390 return self._parse_wrapped_csv(self._parse_column) 4391 return self._parse_csv(self._parse_column) 4392 4393 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4394 if not self._match(TokenType.REPLACE): 4395 return None 4396 if self._match(TokenType.L_PAREN, advance=False): 4397 return self._parse_wrapped_csv(self._parse_expression) 4398 return self._parse_expressions() 4399 4400 def _parse_csv( 4401 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4402 ) -> t.List[t.Optional[exp.Expression]]: 4403 parse_result = parse_method() 4404 items = [parse_result] if parse_result is not None else [] 4405 4406 while self._match(sep): 4407 self._add_comments(parse_result) 4408 parse_result = parse_method() 4409 if parse_result is not None: 4410 items.append(parse_result) 4411 4412 return items 4413 4414 def _parse_tokens( 4415 self, parse_method: t.Callable, expressions: t.Dict 4416 ) -> t.Optional[exp.Expression]: 4417 this = parse_method() 4418 4419 while self._match_set(expressions): 4420 this = self.expression( 4421 expressions[self._prev.token_type], 4422 this=this, 4423 comments=self._prev_comments, 4424 expression=parse_method(), 4425 ) 4426 4427 return this 4428 4429 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4430 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4431 4432 def _parse_wrapped_csv( 4433 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4434 ) -> t.List[t.Optional[exp.Expression]]: 4435 return self._parse_wrapped( 4436 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4437 ) 4438 4439 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4440 wrapped = self._match(TokenType.L_PAREN) 4441 if not wrapped and not optional: 4442 self.raise_error("Expecting (") 4443 parse_result = parse_method() 4444 if wrapped: 4445 self._match_r_paren() 4446 return parse_result 4447 4448 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4449 return self._parse_csv(self._parse_expression) 4450 4451 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4452 return self._parse_select() or self._parse_set_operations( 4453 self._parse_expression() if alias else self._parse_conjunction() 4454 ) 4455 4456 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4457 return self._parse_query_modifiers( 4458 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4459 ) 4460 4461 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4462 this = None 4463 if self._match_texts(self.TRANSACTION_KIND): 4464 this = self._prev.text 4465 4466 self._match_texts({"TRANSACTION", "WORK"}) 4467 4468 modes = [] 4469 while True: 4470 mode = [] 4471 while self._match(TokenType.VAR): 4472 mode.append(self._prev.text) 4473 4474 if mode: 4475 modes.append(" ".join(mode)) 4476 if not self._match(TokenType.COMMA): 4477 break 4478 4479 return self.expression(exp.Transaction, this=this, modes=modes) 4480 4481 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4482 chain = None 4483 savepoint = None 4484 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4485 4486 self._match_texts({"TRANSACTION", "WORK"}) 4487 4488 if self._match_text_seq("TO"): 4489 self._match_text_seq("SAVEPOINT") 4490 savepoint = self._parse_id_var() 4491 4492 if self._match(TokenType.AND): 4493 chain = not self._match_text_seq("NO") 4494 self._match_text_seq("CHAIN") 4495 4496 if is_rollback: 4497 return self.expression(exp.Rollback, savepoint=savepoint) 4498 4499 return self.expression(exp.Commit, chain=chain) 4500 4501 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4502 if not self._match_text_seq("ADD"): 4503 return None 4504 4505 self._match(TokenType.COLUMN) 4506 exists_column = self._parse_exists(not_=True) 4507 expression = self._parse_column_def(self._parse_field(any_token=True)) 4508 4509 if expression: 4510 expression.set("exists", exists_column) 4511 4512 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4513 if self._match_texts(("FIRST", "AFTER")): 4514 position = self._prev.text 4515 column_position = self.expression( 4516 exp.ColumnPosition, this=self._parse_column(), position=position 4517 ) 4518 expression.set("position", column_position) 4519 4520 return expression 4521 4522 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4523 drop = self._match(TokenType.DROP) and self._parse_drop() 4524 if drop and not isinstance(drop, exp.Command): 4525 drop.set("kind", drop.args.get("kind", "COLUMN")) 4526 return drop 4527 4528 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4529 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4530 return self.expression( 4531 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4532 ) 4533 4534 def _parse_add_constraint(self) -> exp.AddConstraint: 4535 this = None 4536 kind = self._prev.token_type 4537 4538 if kind == TokenType.CONSTRAINT: 4539 this = self._parse_id_var() 4540 4541 if self._match_text_seq("CHECK"): 4542 expression = self._parse_wrapped(self._parse_conjunction) 4543 enforced = self._match_text_seq("ENFORCED") 4544 4545 return self.expression( 4546 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4547 ) 4548 4549 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4550 expression = self._parse_foreign_key() 4551 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4552 expression = self._parse_primary_key() 4553 else: 4554 expression = None 4555 4556 return self.expression(exp.AddConstraint, this=this, expression=expression) 4557 4558 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4559 index = self._index - 1 4560 4561 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4562 return self._parse_csv(self._parse_add_constraint) 4563 4564 self._retreat(index) 4565 return self._parse_csv(self._parse_add_column) 4566 4567 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4568 self._match(TokenType.COLUMN) 4569 column = self._parse_field(any_token=True) 4570 4571 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4572 return self.expression(exp.AlterColumn, this=column, drop=True) 4573 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4574 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4575 4576 self._match_text_seq("SET", "DATA") 4577 return self.expression( 4578 exp.AlterColumn, 4579 this=column, 4580 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4581 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4582 using=self._match(TokenType.USING) and self._parse_conjunction(), 4583 ) 4584 4585 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4586 index = self._index - 1 4587 4588 partition_exists = self._parse_exists() 4589 if self._match(TokenType.PARTITION, advance=False): 4590 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4591 4592 self._retreat(index) 4593 return self._parse_csv(self._parse_drop_column) 4594 4595 def _parse_alter_table_rename(self) -> exp.RenameTable: 4596 self._match_text_seq("TO") 4597 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4598 4599 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4600 start = self._prev 4601 4602 if not self._match(TokenType.TABLE): 4603 return self._parse_as_command(start) 4604 4605 exists = self._parse_exists() 4606 this = self._parse_table(schema=True) 4607 4608 if self._next: 4609 self._advance() 4610 4611 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4612 if parser: 4613 actions = ensure_list(parser(self)) 4614 4615 if not self._curr: 4616 return self.expression( 4617 exp.AlterTable, 4618 this=this, 4619 exists=exists, 4620 actions=actions, 4621 ) 4622 return self._parse_as_command(start) 4623 4624 def _parse_merge(self) -> exp.Merge: 4625 self._match(TokenType.INTO) 4626 target = self._parse_table() 4627 4628 if target and self._match(TokenType.ALIAS, advance=False): 4629 target.set("alias", self._parse_table_alias()) 4630 4631 self._match(TokenType.USING) 4632 using = self._parse_table() 4633 4634 self._match(TokenType.ON) 4635 on = self._parse_conjunction() 4636 4637 whens = [] 4638 while self._match(TokenType.WHEN): 4639 matched = not self._match(TokenType.NOT) 4640 self._match_text_seq("MATCHED") 4641 source = ( 4642 False 4643 if self._match_text_seq("BY", "TARGET") 4644 else self._match_text_seq("BY", "SOURCE") 4645 ) 4646 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4647 4648 self._match(TokenType.THEN) 4649 4650 if self._match(TokenType.INSERT): 4651 _this = self._parse_star() 4652 if _this: 4653 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4654 else: 4655 then = self.expression( 4656 exp.Insert, 4657 this=self._parse_value(), 4658 expression=self._match(TokenType.VALUES) and self._parse_value(), 4659 ) 4660 elif self._match(TokenType.UPDATE): 4661 expressions = self._parse_star() 4662 if expressions: 4663 then = self.expression(exp.Update, expressions=expressions) 4664 else: 4665 then = self.expression( 4666 exp.Update, 4667 expressions=self._match(TokenType.SET) 4668 and self._parse_csv(self._parse_equality), 4669 ) 4670 elif self._match(TokenType.DELETE): 4671 then = self.expression(exp.Var, this=self._prev.text) 4672 else: 4673 then = None 4674 4675 whens.append( 4676 self.expression( 4677 exp.When, 4678 matched=matched, 4679 source=source, 4680 condition=condition, 4681 then=then, 4682 ) 4683 ) 4684 4685 return self.expression( 4686 exp.Merge, 4687 this=target, 4688 using=using, 4689 on=on, 4690 expressions=whens, 4691 ) 4692 4693 def _parse_show(self) -> t.Optional[exp.Expression]: 4694 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4695 if parser: 4696 return parser(self) 4697 self._advance() 4698 return self.expression(exp.Show, this=self._prev.text.upper()) 4699 4700 def _parse_set_item_assignment( 4701 self, kind: t.Optional[str] = None 4702 ) -> t.Optional[exp.Expression]: 4703 index = self._index 4704 4705 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4706 return self._parse_set_transaction(global_=kind == "GLOBAL") 4707 4708 left = self._parse_primary() or self._parse_id_var() 4709 4710 if not self._match_texts(("=", "TO")): 4711 self._retreat(index) 4712 return None 4713 4714 right = self._parse_statement() or self._parse_id_var() 4715 this = self.expression(exp.EQ, this=left, expression=right) 4716 4717 return self.expression(exp.SetItem, this=this, kind=kind) 4718 4719 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4720 self._match_text_seq("TRANSACTION") 4721 characteristics = self._parse_csv( 4722 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4723 ) 4724 return self.expression( 4725 exp.SetItem, 4726 expressions=characteristics, 4727 kind="TRANSACTION", 4728 **{"global": global_}, # type: ignore 4729 ) 4730 4731 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4732 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4733 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4734 4735 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4736 index = self._index 4737 set_ = self.expression( 4738 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4739 ) 4740 4741 if self._curr: 4742 self._retreat(index) 4743 return self._parse_as_command(self._prev) 4744 4745 return set_ 4746 4747 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4748 for option in options: 4749 if self._match_text_seq(*option.split(" ")): 4750 return exp.var(option) 4751 return None 4752 4753 def _parse_as_command(self, start: Token) -> exp.Command: 4754 while self._curr: 4755 self._advance() 4756 text = self._find_sql(start, self._prev) 4757 size = len(start.text) 4758 return exp.Command(this=text[:size], expression=text[size:]) 4759 4760 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4761 settings = [] 4762 4763 self._match_l_paren() 4764 kind = self._parse_id_var() 4765 4766 if self._match(TokenType.L_PAREN): 4767 while True: 4768 key = self._parse_id_var() 4769 value = self._parse_primary() 4770 4771 if not key and value is None: 4772 break 4773 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4774 self._match(TokenType.R_PAREN) 4775 4776 self._match_r_paren() 4777 4778 return self.expression( 4779 exp.DictProperty, 4780 this=this, 4781 kind=kind.this if kind else None, 4782 settings=settings, 4783 ) 4784 4785 def _parse_dict_range(self, this: str) -> exp.DictRange: 4786 self._match_l_paren() 4787 has_min = self._match_text_seq("MIN") 4788 if has_min: 4789 min = self._parse_var() or self._parse_primary() 4790 self._match_text_seq("MAX") 4791 max = self._parse_var() or self._parse_primary() 4792 else: 4793 max = self._parse_var() or self._parse_primary() 4794 min = exp.Literal.number(0) 4795 self._match_r_paren() 4796 return self.expression(exp.DictRange, this=this, min=min, max=max) 4797 4798 def _find_parser( 4799 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4800 ) -> t.Optional[t.Callable]: 4801 if not self._curr: 4802 return None 4803 4804 index = self._index 4805 this = [] 4806 while True: 4807 # The current token might be multiple words 4808 curr = self._curr.text.upper() 4809 key = curr.split(" ") 4810 this.append(curr) 4811 4812 self._advance() 4813 result, trie = in_trie(trie, key) 4814 if result == TrieResult.FAILED: 4815 break 4816 4817 if result == TrieResult.EXISTS: 4818 subparser = parsers[" ".join(this)] 4819 return subparser 4820 4821 self._retreat(index) 4822 return None 4823 4824 def _match(self, token_type, advance=True, expression=None): 4825 if not self._curr: 4826 return None 4827 4828 if self._curr.token_type == token_type: 4829 if advance: 4830 self._advance() 4831 self._add_comments(expression) 4832 return True 4833 4834 return None 4835 4836 def _match_set(self, types, advance=True): 4837 if not self._curr: 4838 return None 4839 4840 if self._curr.token_type in types: 4841 if advance: 4842 self._advance() 4843 return True 4844 4845 return None 4846 4847 def _match_pair(self, token_type_a, token_type_b, advance=True): 4848 if not self._curr or not self._next: 4849 return None 4850 4851 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4852 if advance: 4853 self._advance(2) 4854 return True 4855 4856 return None 4857 4858 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4859 if not self._match(TokenType.L_PAREN, expression=expression): 4860 self.raise_error("Expecting (") 4861 4862 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4863 if not self._match(TokenType.R_PAREN, expression=expression): 4864 self.raise_error("Expecting )") 4865 4866 def _match_texts(self, texts, advance=True): 4867 if self._curr and self._curr.text.upper() in texts: 4868 if advance: 4869 self._advance() 4870 return True 4871 return False 4872 4873 def _match_text_seq(self, *texts, advance=True): 4874 index = self._index 4875 for text in texts: 4876 if self._curr and self._curr.text.upper() == text: 4877 self._advance() 4878 else: 4879 self._retreat(index) 4880 return False 4881 4882 if not advance: 4883 self._retreat(index) 4884 4885 return True 4886 4887 @t.overload 4888 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4889 ... 4890 4891 @t.overload 4892 def _replace_columns_with_dots( 4893 self, this: t.Optional[exp.Expression] 4894 ) -> t.Optional[exp.Expression]: 4895 ... 4896 4897 def _replace_columns_with_dots(self, this): 4898 if isinstance(this, exp.Dot): 4899 exp.replace_children(this, self._replace_columns_with_dots) 4900 elif isinstance(this, exp.Column): 4901 exp.replace_children(this, self._replace_columns_with_dots) 4902 table = this.args.get("table") 4903 this = ( 4904 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4905 ) 4906 4907 return this 4908 4909 def _replace_lambda( 4910 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4911 ) -> t.Optional[exp.Expression]: 4912 if not node: 4913 return node 4914 4915 for column in node.find_all(exp.Column): 4916 if column.parts[0].name in lambda_variables: 4917 dot_or_id = column.to_dot() if column.table else column.this 4918 parent = column.parent 4919 4920 while isinstance(parent, exp.Dot): 4921 if not isinstance(parent.parent, exp.Dot): 4922 parent.replace(dot_or_id) 4923 break 4924 parent = parent.parent 4925 else: 4926 if column is node: 4927 node = dot_or_id 4928 else: 4929 column.replace(dot_or_id) 4930 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.FIXEDSTRING, 141 TokenType.FLOAT, 142 TokenType.DOUBLE, 143 TokenType.CHAR, 144 TokenType.NCHAR, 145 TokenType.VARCHAR, 146 TokenType.NVARCHAR, 147 TokenType.TEXT, 148 TokenType.MEDIUMTEXT, 149 TokenType.LONGTEXT, 150 TokenType.MEDIUMBLOB, 151 TokenType.LONGBLOB, 152 TokenType.BINARY, 153 TokenType.VARBINARY, 154 TokenType.JSON, 155 TokenType.JSONB, 156 TokenType.INTERVAL, 157 TokenType.TIME, 158 TokenType.TIMETZ, 159 TokenType.TIMESTAMP, 160 TokenType.TIMESTAMPTZ, 161 TokenType.TIMESTAMPLTZ, 162 TokenType.DATETIME, 163 TokenType.DATETIME64, 164 TokenType.DATE, 165 TokenType.INT4RANGE, 166 TokenType.INT4MULTIRANGE, 167 TokenType.INT8RANGE, 168 TokenType.INT8MULTIRANGE, 169 TokenType.NUMRANGE, 170 TokenType.NUMMULTIRANGE, 171 TokenType.TSRANGE, 172 TokenType.TSMULTIRANGE, 173 TokenType.TSTZRANGE, 174 TokenType.TSTZMULTIRANGE, 175 TokenType.DATERANGE, 176 TokenType.DATEMULTIRANGE, 177 TokenType.DECIMAL, 178 TokenType.BIGDECIMAL, 179 TokenType.UUID, 180 TokenType.GEOGRAPHY, 181 TokenType.GEOMETRY, 182 TokenType.HLLSKETCH, 183 TokenType.HSTORE, 184 TokenType.PSEUDO_TYPE, 185 TokenType.SUPER, 186 TokenType.SERIAL, 187 TokenType.SMALLSERIAL, 188 TokenType.BIGSERIAL, 189 TokenType.XML, 190 TokenType.UNIQUEIDENTIFIER, 191 TokenType.USERDEFINED, 192 TokenType.MONEY, 193 TokenType.SMALLMONEY, 194 TokenType.ROWVERSION, 195 TokenType.IMAGE, 196 TokenType.VARIANT, 197 TokenType.OBJECT, 198 TokenType.INET, 199 TokenType.IPADDRESS, 200 TokenType.IPPREFIX, 201 *ENUM_TYPE_TOKENS, 202 *NESTED_TYPE_TOKENS, 203 } 204 205 SUBQUERY_PREDICATES = { 206 TokenType.ANY: exp.Any, 207 TokenType.ALL: exp.All, 208 TokenType.EXISTS: exp.Exists, 209 TokenType.SOME: exp.Any, 210 } 211 212 RESERVED_KEYWORDS = { 213 *Tokenizer.SINGLE_TOKENS.values(), 214 TokenType.SELECT, 215 } 216 217 DB_CREATABLES = { 218 TokenType.DATABASE, 219 TokenType.SCHEMA, 220 TokenType.TABLE, 221 TokenType.VIEW, 222 TokenType.DICTIONARY, 223 } 224 225 CREATABLES = { 226 TokenType.COLUMN, 227 TokenType.FUNCTION, 228 TokenType.INDEX, 229 TokenType.PROCEDURE, 230 *DB_CREATABLES, 231 } 232 233 # Tokens that can represent identifiers 234 ID_VAR_TOKENS = { 235 TokenType.VAR, 236 TokenType.ANTI, 237 TokenType.APPLY, 238 TokenType.ASC, 239 TokenType.AUTO_INCREMENT, 240 TokenType.BEGIN, 241 TokenType.CACHE, 242 TokenType.CASE, 243 TokenType.COLLATE, 244 TokenType.COMMAND, 245 TokenType.COMMENT, 246 TokenType.COMMIT, 247 TokenType.CONSTRAINT, 248 TokenType.DEFAULT, 249 TokenType.DELETE, 250 TokenType.DESC, 251 TokenType.DESCRIBE, 252 TokenType.DICTIONARY, 253 TokenType.DIV, 254 TokenType.END, 255 TokenType.EXECUTE, 256 TokenType.ESCAPE, 257 TokenType.FALSE, 258 TokenType.FIRST, 259 TokenType.FILTER, 260 TokenType.FORMAT, 261 TokenType.FULL, 262 TokenType.IS, 263 TokenType.ISNULL, 264 TokenType.INTERVAL, 265 TokenType.KEEP, 266 TokenType.LEFT, 267 TokenType.LOAD, 268 TokenType.MERGE, 269 TokenType.NATURAL, 270 TokenType.NEXT, 271 TokenType.OFFSET, 272 TokenType.ORDINALITY, 273 TokenType.OVERWRITE, 274 TokenType.PARTITION, 275 TokenType.PERCENT, 276 TokenType.PIVOT, 277 TokenType.PRAGMA, 278 TokenType.RANGE, 279 TokenType.REFERENCES, 280 TokenType.RIGHT, 281 TokenType.ROW, 282 TokenType.ROWS, 283 TokenType.SEMI, 284 TokenType.SET, 285 TokenType.SETTINGS, 286 TokenType.SHOW, 287 TokenType.TEMPORARY, 288 TokenType.TOP, 289 TokenType.TRUE, 290 TokenType.UNIQUE, 291 TokenType.UNPIVOT, 292 TokenType.UPDATE, 293 TokenType.VOLATILE, 294 TokenType.WINDOW, 295 *CREATABLES, 296 *SUBQUERY_PREDICATES, 297 *TYPE_TOKENS, 298 *NO_PAREN_FUNCTIONS, 299 } 300 301 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 302 303 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 304 TokenType.APPLY, 305 TokenType.ASOF, 306 TokenType.FULL, 307 TokenType.LEFT, 308 TokenType.LOCK, 309 TokenType.NATURAL, 310 TokenType.OFFSET, 311 TokenType.RIGHT, 312 TokenType.WINDOW, 313 } 314 315 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 316 317 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 318 319 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 320 321 FUNC_TOKENS = { 322 TokenType.COMMAND, 323 TokenType.CURRENT_DATE, 324 TokenType.CURRENT_DATETIME, 325 TokenType.CURRENT_TIMESTAMP, 326 TokenType.CURRENT_TIME, 327 TokenType.CURRENT_USER, 328 TokenType.FILTER, 329 TokenType.FIRST, 330 TokenType.FORMAT, 331 TokenType.GLOB, 332 TokenType.IDENTIFIER, 333 TokenType.INDEX, 334 TokenType.ISNULL, 335 TokenType.ILIKE, 336 TokenType.LIKE, 337 TokenType.MERGE, 338 TokenType.OFFSET, 339 TokenType.PRIMARY_KEY, 340 TokenType.RANGE, 341 TokenType.REPLACE, 342 TokenType.RLIKE, 343 TokenType.ROW, 344 TokenType.UNNEST, 345 TokenType.VAR, 346 TokenType.LEFT, 347 TokenType.RIGHT, 348 TokenType.DATE, 349 TokenType.DATETIME, 350 TokenType.TABLE, 351 TokenType.TIMESTAMP, 352 TokenType.TIMESTAMPTZ, 353 TokenType.WINDOW, 354 TokenType.XOR, 355 *TYPE_TOKENS, 356 *SUBQUERY_PREDICATES, 357 } 358 359 CONJUNCTION = { 360 TokenType.AND: exp.And, 361 TokenType.OR: exp.Or, 362 } 363 364 EQUALITY = { 365 TokenType.EQ: exp.EQ, 366 TokenType.NEQ: exp.NEQ, 367 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 368 } 369 370 COMPARISON = { 371 TokenType.GT: exp.GT, 372 TokenType.GTE: exp.GTE, 373 TokenType.LT: exp.LT, 374 TokenType.LTE: exp.LTE, 375 } 376 377 BITWISE = { 378 TokenType.AMP: exp.BitwiseAnd, 379 TokenType.CARET: exp.BitwiseXor, 380 TokenType.PIPE: exp.BitwiseOr, 381 TokenType.DPIPE: exp.DPipe, 382 } 383 384 TERM = { 385 TokenType.DASH: exp.Sub, 386 TokenType.PLUS: exp.Add, 387 TokenType.MOD: exp.Mod, 388 TokenType.COLLATE: exp.Collate, 389 } 390 391 FACTOR = { 392 TokenType.DIV: exp.IntDiv, 393 TokenType.LR_ARROW: exp.Distance, 394 TokenType.SLASH: exp.Div, 395 TokenType.STAR: exp.Mul, 396 } 397 398 TIMES = { 399 TokenType.TIME, 400 TokenType.TIMETZ, 401 } 402 403 TIMESTAMPS = { 404 TokenType.TIMESTAMP, 405 TokenType.TIMESTAMPTZ, 406 TokenType.TIMESTAMPLTZ, 407 *TIMES, 408 } 409 410 SET_OPERATIONS = { 411 TokenType.UNION, 412 TokenType.INTERSECT, 413 TokenType.EXCEPT, 414 } 415 416 JOIN_METHODS = { 417 TokenType.NATURAL, 418 TokenType.ASOF, 419 } 420 421 JOIN_SIDES = { 422 TokenType.LEFT, 423 TokenType.RIGHT, 424 TokenType.FULL, 425 } 426 427 JOIN_KINDS = { 428 TokenType.INNER, 429 TokenType.OUTER, 430 TokenType.CROSS, 431 TokenType.SEMI, 432 TokenType.ANTI, 433 } 434 435 JOIN_HINTS: t.Set[str] = set() 436 437 LAMBDAS = { 438 TokenType.ARROW: lambda self, expressions: self.expression( 439 exp.Lambda, 440 this=self._replace_lambda( 441 self._parse_conjunction(), 442 {node.name for node in expressions}, 443 ), 444 expressions=expressions, 445 ), 446 TokenType.FARROW: lambda self, expressions: self.expression( 447 exp.Kwarg, 448 this=exp.var(expressions[0].name), 449 expression=self._parse_conjunction(), 450 ), 451 } 452 453 COLUMN_OPERATORS = { 454 TokenType.DOT: None, 455 TokenType.DCOLON: lambda self, this, to: self.expression( 456 exp.Cast if self.STRICT_CAST else exp.TryCast, 457 this=this, 458 to=to, 459 ), 460 TokenType.ARROW: lambda self, this, path: self.expression( 461 exp.JSONExtract, 462 this=this, 463 expression=path, 464 ), 465 TokenType.DARROW: lambda self, this, path: self.expression( 466 exp.JSONExtractScalar, 467 this=this, 468 expression=path, 469 ), 470 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 471 exp.JSONBExtract, 472 this=this, 473 expression=path, 474 ), 475 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 476 exp.JSONBExtractScalar, 477 this=this, 478 expression=path, 479 ), 480 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 481 exp.JSONBContains, 482 this=this, 483 expression=key, 484 ), 485 } 486 487 EXPRESSION_PARSERS = { 488 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 489 exp.Column: lambda self: self._parse_column(), 490 exp.Condition: lambda self: self._parse_conjunction(), 491 exp.DataType: lambda self: self._parse_types(), 492 exp.Expression: lambda self: self._parse_statement(), 493 exp.From: lambda self: self._parse_from(), 494 exp.Group: lambda self: self._parse_group(), 495 exp.Having: lambda self: self._parse_having(), 496 exp.Identifier: lambda self: self._parse_id_var(), 497 exp.Join: lambda self: self._parse_join(), 498 exp.Lambda: lambda self: self._parse_lambda(), 499 exp.Lateral: lambda self: self._parse_lateral(), 500 exp.Limit: lambda self: self._parse_limit(), 501 exp.Offset: lambda self: self._parse_offset(), 502 exp.Order: lambda self: self._parse_order(), 503 exp.Ordered: lambda self: self._parse_ordered(), 504 exp.Properties: lambda self: self._parse_properties(), 505 exp.Qualify: lambda self: self._parse_qualify(), 506 exp.Returning: lambda self: self._parse_returning(), 507 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 508 exp.Table: lambda self: self._parse_table_parts(), 509 exp.TableAlias: lambda self: self._parse_table_alias(), 510 exp.Where: lambda self: self._parse_where(), 511 exp.Window: lambda self: self._parse_named_window(), 512 exp.With: lambda self: self._parse_with(), 513 "JOIN_TYPE": lambda self: self._parse_join_parts(), 514 } 515 516 STATEMENT_PARSERS = { 517 TokenType.ALTER: lambda self: self._parse_alter(), 518 TokenType.BEGIN: lambda self: self._parse_transaction(), 519 TokenType.CACHE: lambda self: self._parse_cache(), 520 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 521 TokenType.COMMENT: lambda self: self._parse_comment(), 522 TokenType.CREATE: lambda self: self._parse_create(), 523 TokenType.DELETE: lambda self: self._parse_delete(), 524 TokenType.DESC: lambda self: self._parse_describe(), 525 TokenType.DESCRIBE: lambda self: self._parse_describe(), 526 TokenType.DROP: lambda self: self._parse_drop(), 527 TokenType.FROM: lambda self: exp.select("*").from_( 528 t.cast(exp.From, self._parse_from(skip_from_token=True)) 529 ), 530 TokenType.INSERT: lambda self: self._parse_insert(), 531 TokenType.LOAD: lambda self: self._parse_load(), 532 TokenType.MERGE: lambda self: self._parse_merge(), 533 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 534 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 535 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 536 TokenType.SET: lambda self: self._parse_set(), 537 TokenType.UNCACHE: lambda self: self._parse_uncache(), 538 TokenType.UPDATE: lambda self: self._parse_update(), 539 TokenType.USE: lambda self: self.expression( 540 exp.Use, 541 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 542 and exp.var(self._prev.text), 543 this=self._parse_table(schema=False), 544 ), 545 } 546 547 UNARY_PARSERS = { 548 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 549 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 550 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 551 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 552 } 553 554 PRIMARY_PARSERS = { 555 TokenType.STRING: lambda self, token: self.expression( 556 exp.Literal, this=token.text, is_string=True 557 ), 558 TokenType.NUMBER: lambda self, token: self.expression( 559 exp.Literal, this=token.text, is_string=False 560 ), 561 TokenType.STAR: lambda self, _: self.expression( 562 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 563 ), 564 TokenType.NULL: lambda self, _: self.expression(exp.Null), 565 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 566 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 567 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 568 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 569 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 570 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 571 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 572 exp.National, this=token.text 573 ), 574 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 575 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 576 } 577 578 PLACEHOLDER_PARSERS = { 579 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 580 TokenType.PARAMETER: lambda self: self._parse_parameter(), 581 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 582 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 583 else None, 584 } 585 586 RANGE_PARSERS = { 587 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 588 TokenType.GLOB: binary_range_parser(exp.Glob), 589 TokenType.ILIKE: binary_range_parser(exp.ILike), 590 TokenType.IN: lambda self, this: self._parse_in(this), 591 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 592 TokenType.IS: lambda self, this: self._parse_is(this), 593 TokenType.LIKE: binary_range_parser(exp.Like), 594 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 595 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 596 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 597 } 598 599 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 600 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 601 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 602 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 603 "CHARACTER SET": lambda self: self._parse_character_set(), 604 "CHECKSUM": lambda self: self._parse_checksum(), 605 "CLUSTER BY": lambda self: self._parse_cluster(), 606 "CLUSTERED": lambda self: self._parse_clustered_by(), 607 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 608 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 609 "COPY": lambda self: self._parse_copy_property(), 610 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 611 "DEFINER": lambda self: self._parse_definer(), 612 "DETERMINISTIC": lambda self: self.expression( 613 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 614 ), 615 "DISTKEY": lambda self: self._parse_distkey(), 616 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 617 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 618 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 619 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 620 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 621 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 622 "FREESPACE": lambda self: self._parse_freespace(), 623 "HEAP": lambda self: self.expression(exp.HeapProperty), 624 "IMMUTABLE": lambda self: self.expression( 625 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 626 ), 627 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 628 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 629 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 630 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 631 "LIKE": lambda self: self._parse_create_like(), 632 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 633 "LOCK": lambda self: self._parse_locking(), 634 "LOCKING": lambda self: self._parse_locking(), 635 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 636 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 637 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 638 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 639 "NO": lambda self: self._parse_no_property(), 640 "ON": lambda self: self._parse_on_property(), 641 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 642 "PARTITION BY": lambda self: self._parse_partitioned_by(), 643 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 644 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 645 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 646 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 647 "RETURNS": lambda self: self._parse_returns(), 648 "ROW": lambda self: self._parse_row(), 649 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 650 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 651 "SETTINGS": lambda self: self.expression( 652 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 653 ), 654 "SORTKEY": lambda self: self._parse_sortkey(), 655 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 656 "STABLE": lambda self: self.expression( 657 exp.StabilityProperty, this=exp.Literal.string("STABLE") 658 ), 659 "STORED": lambda self: self._parse_stored(), 660 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 661 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 662 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 663 "TO": lambda self: self._parse_to_table(), 664 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 665 "TTL": lambda self: self._parse_ttl(), 666 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 667 "VOLATILE": lambda self: self._parse_volatile_property(), 668 "WITH": lambda self: self._parse_with_property(), 669 } 670 671 CONSTRAINT_PARSERS = { 672 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 673 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 674 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 675 "CHARACTER SET": lambda self: self.expression( 676 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "CHECK": lambda self: self.expression( 679 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 680 ), 681 "COLLATE": lambda self: self.expression( 682 exp.CollateColumnConstraint, this=self._parse_var() 683 ), 684 "COMMENT": lambda self: self.expression( 685 exp.CommentColumnConstraint, this=self._parse_string() 686 ), 687 "COMPRESS": lambda self: self._parse_compress(), 688 "DEFAULT": lambda self: self.expression( 689 exp.DefaultColumnConstraint, this=self._parse_bitwise() 690 ), 691 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 692 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 693 "FORMAT": lambda self: self.expression( 694 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 695 ), 696 "GENERATED": lambda self: self._parse_generated_as_identity(), 697 "IDENTITY": lambda self: self._parse_auto_increment(), 698 "INLINE": lambda self: self._parse_inline(), 699 "LIKE": lambda self: self._parse_create_like(), 700 "NOT": lambda self: self._parse_not_constraint(), 701 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 702 "ON": lambda self: self._match(TokenType.UPDATE) 703 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 704 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 705 "PRIMARY KEY": lambda self: self._parse_primary_key(), 706 "REFERENCES": lambda self: self._parse_references(match=False), 707 "TITLE": lambda self: self.expression( 708 exp.TitleColumnConstraint, this=self._parse_var_or_string() 709 ), 710 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 711 "UNIQUE": lambda self: self._parse_unique(), 712 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 713 } 714 715 ALTER_PARSERS = { 716 "ADD": lambda self: self._parse_alter_table_add(), 717 "ALTER": lambda self: self._parse_alter_table_alter(), 718 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 719 "DROP": lambda self: self._parse_alter_table_drop(), 720 "RENAME": lambda self: self._parse_alter_table_rename(), 721 } 722 723 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 724 725 NO_PAREN_FUNCTION_PARSERS = { 726 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 727 "CASE": lambda self: self._parse_case(), 728 "IF": lambda self: self._parse_if(), 729 "NEXT": lambda self: self._parse_next_value_for(), 730 } 731 732 INVALID_FUNC_NAME_TOKENS = { 733 TokenType.IDENTIFIER, 734 TokenType.STRING, 735 } 736 737 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 738 739 FUNCTION_PARSERS = { 740 "ANY_VALUE": lambda self: self._parse_any_value(), 741 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 742 "CONCAT": lambda self: self._parse_concat(), 743 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 744 "DECODE": lambda self: self._parse_decode(), 745 "EXTRACT": lambda self: self._parse_extract(), 746 "JSON_OBJECT": lambda self: self._parse_json_object(), 747 "LOG": lambda self: self._parse_logarithm(), 748 "MATCH": lambda self: self._parse_match_against(), 749 "OPENJSON": lambda self: self._parse_open_json(), 750 "POSITION": lambda self: self._parse_position(), 751 "SAFE_CAST": lambda self: self._parse_cast(False), 752 "STRING_AGG": lambda self: self._parse_string_agg(), 753 "SUBSTRING": lambda self: self._parse_substring(), 754 "TRIM": lambda self: self._parse_trim(), 755 "TRY_CAST": lambda self: self._parse_cast(False), 756 "TRY_CONVERT": lambda self: self._parse_convert(False), 757 } 758 759 QUERY_MODIFIER_PARSERS = { 760 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 761 TokenType.WHERE: lambda self: ("where", self._parse_where()), 762 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 763 TokenType.HAVING: lambda self: ("having", self._parse_having()), 764 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 765 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 766 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 767 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 768 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 769 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 770 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 771 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 772 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 773 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 774 TokenType.CLUSTER_BY: lambda self: ( 775 "cluster", 776 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 777 ), 778 TokenType.DISTRIBUTE_BY: lambda self: ( 779 "distribute", 780 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 781 ), 782 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 783 } 784 785 SET_PARSERS = { 786 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 787 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 788 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 789 "TRANSACTION": lambda self: self._parse_set_transaction(), 790 } 791 792 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 793 794 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 795 796 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 797 798 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 799 800 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 801 802 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 803 TRANSACTION_CHARACTERISTICS = { 804 "ISOLATION LEVEL REPEATABLE READ", 805 "ISOLATION LEVEL READ COMMITTED", 806 "ISOLATION LEVEL READ UNCOMMITTED", 807 "ISOLATION LEVEL SERIALIZABLE", 808 "READ WRITE", 809 "READ ONLY", 810 } 811 812 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 813 814 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 815 816 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 817 818 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 819 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 820 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 821 PARTITION_BY_TOKENS = {TokenType.PARTITION_BY} 822 823 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 824 825 STRICT_CAST = True 826 827 # A NULL arg in CONCAT yields NULL by default 828 CONCAT_NULL_OUTPUTS_STRING = False 829 830 PREFIXED_PIVOT_COLUMNS = False 831 IDENTIFY_PIVOT_STRINGS = False 832 833 LOG_BASE_FIRST = True 834 LOG_DEFAULTS_TO_LN = False 835 836 __slots__ = ( 837 "error_level", 838 "error_message_context", 839 "max_errors", 840 "sql", 841 "errors", 842 "_tokens", 843 "_index", 844 "_curr", 845 "_next", 846 "_prev", 847 "_prev_comments", 848 ) 849 850 # Autofilled 851 INDEX_OFFSET: int = 0 852 UNNEST_COLUMN_ONLY: bool = False 853 ALIAS_POST_TABLESAMPLE: bool = False 854 STRICT_STRING_CONCAT = False 855 NORMALIZE_FUNCTIONS = "upper" 856 NULL_ORDERING: str = "nulls_are_small" 857 SHOW_TRIE: t.Dict = {} 858 SET_TRIE: t.Dict = {} 859 FORMAT_MAPPING: t.Dict[str, str] = {} 860 FORMAT_TRIE: t.Dict = {} 861 TIME_MAPPING: t.Dict[str, str] = {} 862 TIME_TRIE: t.Dict = {} 863 864 def __init__( 865 self, 866 error_level: t.Optional[ErrorLevel] = None, 867 error_message_context: int = 100, 868 max_errors: int = 3, 869 ): 870 self.error_level = error_level or ErrorLevel.IMMEDIATE 871 self.error_message_context = error_message_context 872 self.max_errors = max_errors 873 self.reset() 874 875 def reset(self): 876 self.sql = "" 877 self.errors = [] 878 self._tokens = [] 879 self._index = 0 880 self._curr = None 881 self._next = None 882 self._prev = None 883 self._prev_comments = None 884 885 def parse( 886 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens and returns a list of syntax trees, one tree 890 per parsed SQL statement. 891 892 Args: 893 raw_tokens: The list of tokens. 894 sql: The original SQL string, used to produce helpful debug messages. 895 896 Returns: 897 The list of the produced syntax trees. 898 """ 899 return self._parse( 900 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 901 ) 902 903 def parse_into( 904 self, 905 expression_types: exp.IntoType, 906 raw_tokens: t.List[Token], 907 sql: t.Optional[str] = None, 908 ) -> t.List[t.Optional[exp.Expression]]: 909 """ 910 Parses a list of tokens into a given Expression type. If a collection of Expression 911 types is given instead, this method will try to parse the token list into each one 912 of them, stopping at the first for which the parsing succeeds. 913 914 Args: 915 expression_types: The expression type(s) to try and parse the token list into. 916 raw_tokens: The list of tokens. 917 sql: The original SQL string, used to produce helpful debug messages. 918 919 Returns: 920 The target Expression. 921 """ 922 errors = [] 923 for expression_type in ensure_list(expression_types): 924 parser = self.EXPRESSION_PARSERS.get(expression_type) 925 if not parser: 926 raise TypeError(f"No parser registered for {expression_type}") 927 928 try: 929 return self._parse(parser, raw_tokens, sql) 930 except ParseError as e: 931 e.errors[0]["into_expression"] = expression_type 932 errors.append(e) 933 934 raise ParseError( 935 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 936 errors=merge_errors(errors), 937 ) from errors[-1] 938 939 def _parse( 940 self, 941 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 942 raw_tokens: t.List[Token], 943 sql: t.Optional[str] = None, 944 ) -> t.List[t.Optional[exp.Expression]]: 945 self.reset() 946 self.sql = sql or "" 947 948 total = len(raw_tokens) 949 chunks: t.List[t.List[Token]] = [[]] 950 951 for i, token in enumerate(raw_tokens): 952 if token.token_type == TokenType.SEMICOLON: 953 if i < total - 1: 954 chunks.append([]) 955 else: 956 chunks[-1].append(token) 957 958 expressions = [] 959 960 for tokens in chunks: 961 self._index = -1 962 self._tokens = tokens 963 self._advance() 964 965 expressions.append(parse_method(self)) 966 967 if self._index < len(self._tokens): 968 self.raise_error("Invalid expression / Unexpected token") 969 970 self.check_errors() 971 972 return expressions 973 974 def check_errors(self) -> None: 975 """Logs or raises any found errors, depending on the chosen error level setting.""" 976 if self.error_level == ErrorLevel.WARN: 977 for error in self.errors: 978 logger.error(str(error)) 979 elif self.error_level == ErrorLevel.RAISE and self.errors: 980 raise ParseError( 981 concat_messages(self.errors, self.max_errors), 982 errors=merge_errors(self.errors), 983 ) 984 985 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 986 """ 987 Appends an error in the list of recorded errors or raises it, depending on the chosen 988 error level setting. 989 """ 990 token = token or self._curr or self._prev or Token.string("") 991 start = token.start 992 end = token.end + 1 993 start_context = self.sql[max(start - self.error_message_context, 0) : start] 994 highlight = self.sql[start:end] 995 end_context = self.sql[end : end + self.error_message_context] 996 997 error = ParseError.new( 998 f"{message}. Line {token.line}, Col: {token.col}.\n" 999 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1000 description=message, 1001 line=token.line, 1002 col=token.col, 1003 start_context=start_context, 1004 highlight=highlight, 1005 end_context=end_context, 1006 ) 1007 1008 if self.error_level == ErrorLevel.IMMEDIATE: 1009 raise error 1010 1011 self.errors.append(error) 1012 1013 def expression( 1014 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1015 ) -> E: 1016 """ 1017 Creates a new, validated Expression. 1018 1019 Args: 1020 exp_class: The expression class to instantiate. 1021 comments: An optional list of comments to attach to the expression. 1022 kwargs: The arguments to set for the expression along with their respective values. 1023 1024 Returns: 1025 The target expression. 1026 """ 1027 instance = exp_class(**kwargs) 1028 instance.add_comments(comments) if comments else self._add_comments(instance) 1029 return self.validate_expression(instance) 1030 1031 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1032 if expression and self._prev_comments: 1033 expression.add_comments(self._prev_comments) 1034 self._prev_comments = None 1035 1036 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1037 """ 1038 Validates an Expression, making sure that all its mandatory arguments are set. 1039 1040 Args: 1041 expression: The expression to validate. 1042 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1043 1044 Returns: 1045 The validated expression. 1046 """ 1047 if self.error_level != ErrorLevel.IGNORE: 1048 for error_message in expression.error_messages(args): 1049 self.raise_error(error_message) 1050 1051 return expression 1052 1053 def _find_sql(self, start: Token, end: Token) -> str: 1054 return self.sql[start.start : end.end + 1] 1055 1056 def _advance(self, times: int = 1) -> None: 1057 self._index += times 1058 self._curr = seq_get(self._tokens, self._index) 1059 self._next = seq_get(self._tokens, self._index + 1) 1060 1061 if self._index > 0: 1062 self._prev = self._tokens[self._index - 1] 1063 self._prev_comments = self._prev.comments 1064 else: 1065 self._prev = None 1066 self._prev_comments = None 1067 1068 def _retreat(self, index: int) -> None: 1069 if index != self._index: 1070 self._advance(index - self._index) 1071 1072 def _parse_command(self) -> exp.Command: 1073 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1074 1075 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1076 start = self._prev 1077 exists = self._parse_exists() if allow_exists else None 1078 1079 self._match(TokenType.ON) 1080 1081 kind = self._match_set(self.CREATABLES) and self._prev 1082 if not kind: 1083 return self._parse_as_command(start) 1084 1085 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1086 this = self._parse_user_defined_function(kind=kind.token_type) 1087 elif kind.token_type == TokenType.TABLE: 1088 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1089 elif kind.token_type == TokenType.COLUMN: 1090 this = self._parse_column() 1091 else: 1092 this = self._parse_id_var() 1093 1094 self._match(TokenType.IS) 1095 1096 return self.expression( 1097 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1098 ) 1099 1100 def _parse_to_table( 1101 self, 1102 ) -> exp.ToTableProperty: 1103 table = self._parse_table_parts(schema=True) 1104 return self.expression(exp.ToTableProperty, this=table) 1105 1106 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1107 def _parse_ttl(self) -> exp.Expression: 1108 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1109 this = self._parse_bitwise() 1110 1111 if self._match_text_seq("DELETE"): 1112 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1113 if self._match_text_seq("RECOMPRESS"): 1114 return self.expression( 1115 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1116 ) 1117 if self._match_text_seq("TO", "DISK"): 1118 return self.expression( 1119 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1120 ) 1121 if self._match_text_seq("TO", "VOLUME"): 1122 return self.expression( 1123 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1124 ) 1125 1126 return this 1127 1128 expressions = self._parse_csv(_parse_ttl_action) 1129 where = self._parse_where() 1130 group = self._parse_group() 1131 1132 aggregates = None 1133 if group and self._match(TokenType.SET): 1134 aggregates = self._parse_csv(self._parse_set_item) 1135 1136 return self.expression( 1137 exp.MergeTreeTTL, 1138 expressions=expressions, 1139 where=where, 1140 group=group, 1141 aggregates=aggregates, 1142 ) 1143 1144 def _parse_statement(self) -> t.Optional[exp.Expression]: 1145 if self._curr is None: 1146 return None 1147 1148 if self._match_set(self.STATEMENT_PARSERS): 1149 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1150 1151 if self._match_set(Tokenizer.COMMANDS): 1152 return self._parse_command() 1153 1154 expression = self._parse_expression() 1155 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1156 return self._parse_query_modifiers(expression) 1157 1158 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1159 start = self._prev 1160 temporary = self._match(TokenType.TEMPORARY) 1161 materialized = self._match_text_seq("MATERIALIZED") 1162 1163 kind = self._match_set(self.CREATABLES) and self._prev.text 1164 if not kind: 1165 return self._parse_as_command(start) 1166 1167 return self.expression( 1168 exp.Drop, 1169 comments=start.comments, 1170 exists=exists or self._parse_exists(), 1171 this=self._parse_table(schema=True), 1172 kind=kind, 1173 temporary=temporary, 1174 materialized=materialized, 1175 cascade=self._match_text_seq("CASCADE"), 1176 constraints=self._match_text_seq("CONSTRAINTS"), 1177 purge=self._match_text_seq("PURGE"), 1178 ) 1179 1180 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1181 return ( 1182 self._match_text_seq("IF") 1183 and (not not_ or self._match(TokenType.NOT)) 1184 and self._match(TokenType.EXISTS) 1185 ) 1186 1187 def _parse_create(self) -> exp.Create | exp.Command: 1188 # Note: this can't be None because we've matched a statement parser 1189 start = self._prev 1190 comments = self._prev_comments 1191 1192 replace = start.text.upper() == "REPLACE" or self._match_pair( 1193 TokenType.OR, TokenType.REPLACE 1194 ) 1195 unique = self._match(TokenType.UNIQUE) 1196 1197 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1198 self._advance() 1199 1200 properties = None 1201 create_token = self._match_set(self.CREATABLES) and self._prev 1202 1203 if not create_token: 1204 # exp.Properties.Location.POST_CREATE 1205 properties = self._parse_properties() 1206 create_token = self._match_set(self.CREATABLES) and self._prev 1207 1208 if not properties or not create_token: 1209 return self._parse_as_command(start) 1210 1211 exists = self._parse_exists(not_=True) 1212 this = None 1213 expression: t.Optional[exp.Expression] = None 1214 indexes = None 1215 no_schema_binding = None 1216 begin = None 1217 clone = None 1218 1219 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1220 nonlocal properties 1221 if properties and temp_props: 1222 properties.expressions.extend(temp_props.expressions) 1223 elif temp_props: 1224 properties = temp_props 1225 1226 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1227 this = self._parse_user_defined_function(kind=create_token.token_type) 1228 1229 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1230 extend_props(self._parse_properties()) 1231 1232 self._match(TokenType.ALIAS) 1233 1234 if self._match(TokenType.COMMAND): 1235 expression = self._parse_as_command(self._prev) 1236 else: 1237 begin = self._match(TokenType.BEGIN) 1238 return_ = self._match_text_seq("RETURN") 1239 expression = self._parse_statement() 1240 1241 if return_: 1242 expression = self.expression(exp.Return, this=expression) 1243 elif create_token.token_type == TokenType.INDEX: 1244 this = self._parse_index(index=self._parse_id_var()) 1245 elif create_token.token_type in self.DB_CREATABLES: 1246 table_parts = self._parse_table_parts(schema=True) 1247 1248 # exp.Properties.Location.POST_NAME 1249 self._match(TokenType.COMMA) 1250 extend_props(self._parse_properties(before=True)) 1251 1252 this = self._parse_schema(this=table_parts) 1253 1254 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1255 extend_props(self._parse_properties()) 1256 1257 self._match(TokenType.ALIAS) 1258 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1259 # exp.Properties.Location.POST_ALIAS 1260 extend_props(self._parse_properties()) 1261 1262 expression = self._parse_ddl_select() 1263 1264 if create_token.token_type == TokenType.TABLE: 1265 # exp.Properties.Location.POST_EXPRESSION 1266 extend_props(self._parse_properties()) 1267 1268 indexes = [] 1269 while True: 1270 index = self._parse_index() 1271 1272 # exp.Properties.Location.POST_INDEX 1273 extend_props(self._parse_properties()) 1274 1275 if not index: 1276 break 1277 else: 1278 self._match(TokenType.COMMA) 1279 indexes.append(index) 1280 elif create_token.token_type == TokenType.VIEW: 1281 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1282 no_schema_binding = True 1283 1284 if self._match_text_seq("CLONE"): 1285 clone = self._parse_table(schema=True) 1286 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1287 clone_kind = ( 1288 self._match(TokenType.L_PAREN) 1289 and self._match_texts(self.CLONE_KINDS) 1290 and self._prev.text.upper() 1291 ) 1292 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1293 self._match(TokenType.R_PAREN) 1294 clone = self.expression( 1295 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1296 ) 1297 1298 return self.expression( 1299 exp.Create, 1300 comments=comments, 1301 this=this, 1302 kind=create_token.text, 1303 replace=replace, 1304 unique=unique, 1305 expression=expression, 1306 exists=exists, 1307 properties=properties, 1308 indexes=indexes, 1309 no_schema_binding=no_schema_binding, 1310 begin=begin, 1311 clone=clone, 1312 ) 1313 1314 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1315 # only used for teradata currently 1316 self._match(TokenType.COMMA) 1317 1318 kwargs = { 1319 "no": self._match_text_seq("NO"), 1320 "dual": self._match_text_seq("DUAL"), 1321 "before": self._match_text_seq("BEFORE"), 1322 "default": self._match_text_seq("DEFAULT"), 1323 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1324 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1325 "after": self._match_text_seq("AFTER"), 1326 "minimum": self._match_texts(("MIN", "MINIMUM")), 1327 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1328 } 1329 1330 if self._match_texts(self.PROPERTY_PARSERS): 1331 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1332 try: 1333 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1334 except TypeError: 1335 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1336 1337 return None 1338 1339 def _parse_property(self) -> t.Optional[exp.Expression]: 1340 if self._match_texts(self.PROPERTY_PARSERS): 1341 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1342 1343 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1344 return self._parse_character_set(default=True) 1345 1346 if self._match_text_seq("COMPOUND", "SORTKEY"): 1347 return self._parse_sortkey(compound=True) 1348 1349 if self._match_text_seq("SQL", "SECURITY"): 1350 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1351 1352 assignment = self._match_pair( 1353 TokenType.VAR, TokenType.EQ, advance=False 1354 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1355 1356 if assignment: 1357 key = self._parse_var_or_string() 1358 self._match(TokenType.EQ) 1359 return self.expression(exp.Property, this=key, value=self._parse_column()) 1360 1361 return None 1362 1363 def _parse_stored(self) -> exp.FileFormatProperty: 1364 self._match(TokenType.ALIAS) 1365 1366 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1367 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1368 1369 return self.expression( 1370 exp.FileFormatProperty, 1371 this=self.expression( 1372 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1373 ) 1374 if input_format or output_format 1375 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1376 ) 1377 1378 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1379 self._match(TokenType.EQ) 1380 self._match(TokenType.ALIAS) 1381 return self.expression(exp_class, this=self._parse_field()) 1382 1383 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1384 properties = [] 1385 while True: 1386 if before: 1387 prop = self._parse_property_before() 1388 else: 1389 prop = self._parse_property() 1390 1391 if not prop: 1392 break 1393 for p in ensure_list(prop): 1394 properties.append(p) 1395 1396 if properties: 1397 return self.expression(exp.Properties, expressions=properties) 1398 1399 return None 1400 1401 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1402 return self.expression( 1403 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1404 ) 1405 1406 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1407 if self._index >= 2: 1408 pre_volatile_token = self._tokens[self._index - 2] 1409 else: 1410 pre_volatile_token = None 1411 1412 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1413 return exp.VolatileProperty() 1414 1415 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1416 1417 def _parse_with_property( 1418 self, 1419 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1420 if self._match(TokenType.L_PAREN, advance=False): 1421 return self._parse_wrapped_csv(self._parse_property) 1422 1423 if self._match_text_seq("JOURNAL"): 1424 return self._parse_withjournaltable() 1425 1426 if self._match_text_seq("DATA"): 1427 return self._parse_withdata(no=False) 1428 elif self._match_text_seq("NO", "DATA"): 1429 return self._parse_withdata(no=True) 1430 1431 if not self._next: 1432 return None 1433 1434 return self._parse_withisolatedloading() 1435 1436 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1437 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1438 self._match(TokenType.EQ) 1439 1440 user = self._parse_id_var() 1441 self._match(TokenType.PARAMETER) 1442 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1443 1444 if not user or not host: 1445 return None 1446 1447 return exp.DefinerProperty(this=f"{user}@{host}") 1448 1449 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1450 self._match(TokenType.TABLE) 1451 self._match(TokenType.EQ) 1452 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1453 1454 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1455 return self.expression(exp.LogProperty, no=no) 1456 1457 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1458 return self.expression(exp.JournalProperty, **kwargs) 1459 1460 def _parse_checksum(self) -> exp.ChecksumProperty: 1461 self._match(TokenType.EQ) 1462 1463 on = None 1464 if self._match(TokenType.ON): 1465 on = True 1466 elif self._match_text_seq("OFF"): 1467 on = False 1468 1469 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1470 1471 def _parse_cluster(self) -> exp.Cluster: 1472 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1473 1474 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1475 self._match_text_seq("BY") 1476 1477 self._match_l_paren() 1478 expressions = self._parse_csv(self._parse_column) 1479 self._match_r_paren() 1480 1481 if self._match_text_seq("SORTED", "BY"): 1482 self._match_l_paren() 1483 sorted_by = self._parse_csv(self._parse_ordered) 1484 self._match_r_paren() 1485 else: 1486 sorted_by = None 1487 1488 self._match(TokenType.INTO) 1489 buckets = self._parse_number() 1490 self._match_text_seq("BUCKETS") 1491 1492 return self.expression( 1493 exp.ClusteredByProperty, 1494 expressions=expressions, 1495 sorted_by=sorted_by, 1496 buckets=buckets, 1497 ) 1498 1499 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1500 if not self._match_text_seq("GRANTS"): 1501 self._retreat(self._index - 1) 1502 return None 1503 1504 return self.expression(exp.CopyGrantsProperty) 1505 1506 def _parse_freespace(self) -> exp.FreespaceProperty: 1507 self._match(TokenType.EQ) 1508 return self.expression( 1509 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1510 ) 1511 1512 def _parse_mergeblockratio( 1513 self, no: bool = False, default: bool = False 1514 ) -> exp.MergeBlockRatioProperty: 1515 if self._match(TokenType.EQ): 1516 return self.expression( 1517 exp.MergeBlockRatioProperty, 1518 this=self._parse_number(), 1519 percent=self._match(TokenType.PERCENT), 1520 ) 1521 1522 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1523 1524 def _parse_datablocksize( 1525 self, 1526 default: t.Optional[bool] = None, 1527 minimum: t.Optional[bool] = None, 1528 maximum: t.Optional[bool] = None, 1529 ) -> exp.DataBlocksizeProperty: 1530 self._match(TokenType.EQ) 1531 size = self._parse_number() 1532 1533 units = None 1534 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1535 units = self._prev.text 1536 1537 return self.expression( 1538 exp.DataBlocksizeProperty, 1539 size=size, 1540 units=units, 1541 default=default, 1542 minimum=minimum, 1543 maximum=maximum, 1544 ) 1545 1546 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1547 self._match(TokenType.EQ) 1548 always = self._match_text_seq("ALWAYS") 1549 manual = self._match_text_seq("MANUAL") 1550 never = self._match_text_seq("NEVER") 1551 default = self._match_text_seq("DEFAULT") 1552 1553 autotemp = None 1554 if self._match_text_seq("AUTOTEMP"): 1555 autotemp = self._parse_schema() 1556 1557 return self.expression( 1558 exp.BlockCompressionProperty, 1559 always=always, 1560 manual=manual, 1561 never=never, 1562 default=default, 1563 autotemp=autotemp, 1564 ) 1565 1566 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1567 no = self._match_text_seq("NO") 1568 concurrent = self._match_text_seq("CONCURRENT") 1569 self._match_text_seq("ISOLATED", "LOADING") 1570 for_all = self._match_text_seq("FOR", "ALL") 1571 for_insert = self._match_text_seq("FOR", "INSERT") 1572 for_none = self._match_text_seq("FOR", "NONE") 1573 return self.expression( 1574 exp.IsolatedLoadingProperty, 1575 no=no, 1576 concurrent=concurrent, 1577 for_all=for_all, 1578 for_insert=for_insert, 1579 for_none=for_none, 1580 ) 1581 1582 def _parse_locking(self) -> exp.LockingProperty: 1583 if self._match(TokenType.TABLE): 1584 kind = "TABLE" 1585 elif self._match(TokenType.VIEW): 1586 kind = "VIEW" 1587 elif self._match(TokenType.ROW): 1588 kind = "ROW" 1589 elif self._match_text_seq("DATABASE"): 1590 kind = "DATABASE" 1591 else: 1592 kind = None 1593 1594 if kind in ("DATABASE", "TABLE", "VIEW"): 1595 this = self._parse_table_parts() 1596 else: 1597 this = None 1598 1599 if self._match(TokenType.FOR): 1600 for_or_in = "FOR" 1601 elif self._match(TokenType.IN): 1602 for_or_in = "IN" 1603 else: 1604 for_or_in = None 1605 1606 if self._match_text_seq("ACCESS"): 1607 lock_type = "ACCESS" 1608 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1609 lock_type = "EXCLUSIVE" 1610 elif self._match_text_seq("SHARE"): 1611 lock_type = "SHARE" 1612 elif self._match_text_seq("READ"): 1613 lock_type = "READ" 1614 elif self._match_text_seq("WRITE"): 1615 lock_type = "WRITE" 1616 elif self._match_text_seq("CHECKSUM"): 1617 lock_type = "CHECKSUM" 1618 else: 1619 lock_type = None 1620 1621 override = self._match_text_seq("OVERRIDE") 1622 1623 return self.expression( 1624 exp.LockingProperty, 1625 this=this, 1626 kind=kind, 1627 for_or_in=for_or_in, 1628 lock_type=lock_type, 1629 override=override, 1630 ) 1631 1632 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1633 if self._match_set(self.PARTITION_BY_TOKENS): 1634 return self._parse_csv(self._parse_conjunction) 1635 return [] 1636 1637 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1638 self._match(TokenType.EQ) 1639 return self.expression( 1640 exp.PartitionedByProperty, 1641 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1642 ) 1643 1644 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1645 if self._match_text_seq("AND", "STATISTICS"): 1646 statistics = True 1647 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1648 statistics = False 1649 else: 1650 statistics = None 1651 1652 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1653 1654 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1655 if self._match_text_seq("PRIMARY", "INDEX"): 1656 return exp.NoPrimaryIndexProperty() 1657 return None 1658 1659 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1660 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1661 return exp.OnCommitProperty() 1662 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1663 return exp.OnCommitProperty(delete=True) 1664 return None 1665 1666 def _parse_distkey(self) -> exp.DistKeyProperty: 1667 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1668 1669 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1670 table = self._parse_table(schema=True) 1671 1672 options = [] 1673 while self._match_texts(("INCLUDING", "EXCLUDING")): 1674 this = self._prev.text.upper() 1675 1676 id_var = self._parse_id_var() 1677 if not id_var: 1678 return None 1679 1680 options.append( 1681 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1682 ) 1683 1684 return self.expression(exp.LikeProperty, this=table, expressions=options) 1685 1686 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1687 return self.expression( 1688 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1689 ) 1690 1691 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1692 self._match(TokenType.EQ) 1693 return self.expression( 1694 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1695 ) 1696 1697 def _parse_returns(self) -> exp.ReturnsProperty: 1698 value: t.Optional[exp.Expression] 1699 is_table = self._match(TokenType.TABLE) 1700 1701 if is_table: 1702 if self._match(TokenType.LT): 1703 value = self.expression( 1704 exp.Schema, 1705 this="TABLE", 1706 expressions=self._parse_csv(self._parse_struct_types), 1707 ) 1708 if not self._match(TokenType.GT): 1709 self.raise_error("Expecting >") 1710 else: 1711 value = self._parse_schema(exp.var("TABLE")) 1712 else: 1713 value = self._parse_types() 1714 1715 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1716 1717 def _parse_describe(self) -> exp.Describe: 1718 kind = self._match_set(self.CREATABLES) and self._prev.text 1719 this = self._parse_table() 1720 return self.expression(exp.Describe, this=this, kind=kind) 1721 1722 def _parse_insert(self) -> exp.Insert: 1723 comments = ensure_list(self._prev_comments) 1724 overwrite = self._match(TokenType.OVERWRITE) 1725 ignore = self._match(TokenType.IGNORE) 1726 local = self._match_text_seq("LOCAL") 1727 alternative = None 1728 1729 if self._match_text_seq("DIRECTORY"): 1730 this: t.Optional[exp.Expression] = self.expression( 1731 exp.Directory, 1732 this=self._parse_var_or_string(), 1733 local=local, 1734 row_format=self._parse_row_format(match_row=True), 1735 ) 1736 else: 1737 if self._match(TokenType.OR): 1738 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1739 1740 self._match(TokenType.INTO) 1741 comments += ensure_list(self._prev_comments) 1742 self._match(TokenType.TABLE) 1743 this = self._parse_table(schema=True) 1744 1745 returning = self._parse_returning() 1746 1747 return self.expression( 1748 exp.Insert, 1749 comments=comments, 1750 this=this, 1751 exists=self._parse_exists(), 1752 partition=self._parse_partition(), 1753 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1754 and self._parse_conjunction(), 1755 expression=self._parse_ddl_select(), 1756 conflict=self._parse_on_conflict(), 1757 returning=returning or self._parse_returning(), 1758 overwrite=overwrite, 1759 alternative=alternative, 1760 ignore=ignore, 1761 ) 1762 1763 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1764 conflict = self._match_text_seq("ON", "CONFLICT") 1765 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1766 1767 if not conflict and not duplicate: 1768 return None 1769 1770 nothing = None 1771 expressions = None 1772 key = None 1773 constraint = None 1774 1775 if conflict: 1776 if self._match_text_seq("ON", "CONSTRAINT"): 1777 constraint = self._parse_id_var() 1778 else: 1779 key = self._parse_csv(self._parse_value) 1780 1781 self._match_text_seq("DO") 1782 if self._match_text_seq("NOTHING"): 1783 nothing = True 1784 else: 1785 self._match(TokenType.UPDATE) 1786 self._match(TokenType.SET) 1787 expressions = self._parse_csv(self._parse_equality) 1788 1789 return self.expression( 1790 exp.OnConflict, 1791 duplicate=duplicate, 1792 expressions=expressions, 1793 nothing=nothing, 1794 key=key, 1795 constraint=constraint, 1796 ) 1797 1798 def _parse_returning(self) -> t.Optional[exp.Returning]: 1799 if not self._match(TokenType.RETURNING): 1800 return None 1801 return self.expression( 1802 exp.Returning, 1803 expressions=self._parse_csv(self._parse_expression), 1804 into=self._match(TokenType.INTO) and self._parse_table_part(), 1805 ) 1806 1807 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1808 if not self._match(TokenType.FORMAT): 1809 return None 1810 return self._parse_row_format() 1811 1812 def _parse_row_format( 1813 self, match_row: bool = False 1814 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1815 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1816 return None 1817 1818 if self._match_text_seq("SERDE"): 1819 this = self._parse_string() 1820 1821 serde_properties = None 1822 if self._match(TokenType.SERDE_PROPERTIES): 1823 serde_properties = self.expression( 1824 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1825 ) 1826 1827 return self.expression( 1828 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1829 ) 1830 1831 self._match_text_seq("DELIMITED") 1832 1833 kwargs = {} 1834 1835 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1836 kwargs["fields"] = self._parse_string() 1837 if self._match_text_seq("ESCAPED", "BY"): 1838 kwargs["escaped"] = self._parse_string() 1839 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1840 kwargs["collection_items"] = self._parse_string() 1841 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1842 kwargs["map_keys"] = self._parse_string() 1843 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1844 kwargs["lines"] = self._parse_string() 1845 if self._match_text_seq("NULL", "DEFINED", "AS"): 1846 kwargs["null"] = self._parse_string() 1847 1848 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1849 1850 def _parse_load(self) -> exp.LoadData | exp.Command: 1851 if self._match_text_seq("DATA"): 1852 local = self._match_text_seq("LOCAL") 1853 self._match_text_seq("INPATH") 1854 inpath = self._parse_string() 1855 overwrite = self._match(TokenType.OVERWRITE) 1856 self._match_pair(TokenType.INTO, TokenType.TABLE) 1857 1858 return self.expression( 1859 exp.LoadData, 1860 this=self._parse_table(schema=True), 1861 local=local, 1862 overwrite=overwrite, 1863 inpath=inpath, 1864 partition=self._parse_partition(), 1865 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1866 serde=self._match_text_seq("SERDE") and self._parse_string(), 1867 ) 1868 return self._parse_as_command(self._prev) 1869 1870 def _parse_delete(self) -> exp.Delete: 1871 # This handles MySQL's "Multiple-Table Syntax" 1872 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1873 tables = None 1874 comments = self._prev_comments 1875 if not self._match(TokenType.FROM, advance=False): 1876 tables = self._parse_csv(self._parse_table) or None 1877 1878 returning = self._parse_returning() 1879 1880 return self.expression( 1881 exp.Delete, 1882 comments=comments, 1883 tables=tables, 1884 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1885 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1886 where=self._parse_where(), 1887 returning=returning or self._parse_returning(), 1888 limit=self._parse_limit(), 1889 ) 1890 1891 def _parse_update(self) -> exp.Update: 1892 comments = self._prev_comments 1893 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1894 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1895 returning = self._parse_returning() 1896 return self.expression( 1897 exp.Update, 1898 comments=comments, 1899 **{ # type: ignore 1900 "this": this, 1901 "expressions": expressions, 1902 "from": self._parse_from(joins=True), 1903 "where": self._parse_where(), 1904 "returning": returning or self._parse_returning(), 1905 "limit": self._parse_limit(), 1906 }, 1907 ) 1908 1909 def _parse_uncache(self) -> exp.Uncache: 1910 if not self._match(TokenType.TABLE): 1911 self.raise_error("Expecting TABLE after UNCACHE") 1912 1913 return self.expression( 1914 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1915 ) 1916 1917 def _parse_cache(self) -> exp.Cache: 1918 lazy = self._match_text_seq("LAZY") 1919 self._match(TokenType.TABLE) 1920 table = self._parse_table(schema=True) 1921 1922 options = [] 1923 if self._match_text_seq("OPTIONS"): 1924 self._match_l_paren() 1925 k = self._parse_string() 1926 self._match(TokenType.EQ) 1927 v = self._parse_string() 1928 options = [k, v] 1929 self._match_r_paren() 1930 1931 self._match(TokenType.ALIAS) 1932 return self.expression( 1933 exp.Cache, 1934 this=table, 1935 lazy=lazy, 1936 options=options, 1937 expression=self._parse_select(nested=True), 1938 ) 1939 1940 def _parse_partition(self) -> t.Optional[exp.Partition]: 1941 if not self._match(TokenType.PARTITION): 1942 return None 1943 1944 return self.expression( 1945 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1946 ) 1947 1948 def _parse_value(self) -> exp.Tuple: 1949 if self._match(TokenType.L_PAREN): 1950 expressions = self._parse_csv(self._parse_conjunction) 1951 self._match_r_paren() 1952 return self.expression(exp.Tuple, expressions=expressions) 1953 1954 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1955 # https://prestodb.io/docs/current/sql/values.html 1956 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1957 1958 def _parse_projections(self) -> t.List[t.Optional[exp.Expression]]: 1959 return self._parse_expressions() 1960 1961 def _parse_select( 1962 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1963 ) -> t.Optional[exp.Expression]: 1964 cte = self._parse_with() 1965 if cte: 1966 this = self._parse_statement() 1967 1968 if not this: 1969 self.raise_error("Failed to parse any statement following CTE") 1970 return cte 1971 1972 if "with" in this.arg_types: 1973 this.set("with", cte) 1974 else: 1975 self.raise_error(f"{this.key} does not support CTE") 1976 this = cte 1977 elif self._match(TokenType.SELECT): 1978 comments = self._prev_comments 1979 1980 hint = self._parse_hint() 1981 all_ = self._match(TokenType.ALL) 1982 distinct = self._match(TokenType.DISTINCT) 1983 1984 kind = ( 1985 self._match(TokenType.ALIAS) 1986 and self._match_texts(("STRUCT", "VALUE")) 1987 and self._prev.text 1988 ) 1989 1990 if distinct: 1991 distinct = self.expression( 1992 exp.Distinct, 1993 on=self._parse_value() if self._match(TokenType.ON) else None, 1994 ) 1995 1996 if all_ and distinct: 1997 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1998 1999 limit = self._parse_limit(top=True) 2000 projections = self._parse_projections() 2001 2002 this = self.expression( 2003 exp.Select, 2004 kind=kind, 2005 hint=hint, 2006 distinct=distinct, 2007 expressions=projections, 2008 limit=limit, 2009 ) 2010 this.comments = comments 2011 2012 into = self._parse_into() 2013 if into: 2014 this.set("into", into) 2015 2016 from_ = self._parse_from() 2017 if from_: 2018 this.set("from", from_) 2019 2020 this = self._parse_query_modifiers(this) 2021 elif (table or nested) and self._match(TokenType.L_PAREN): 2022 if self._match(TokenType.PIVOT): 2023 this = self._parse_simplified_pivot() 2024 elif self._match(TokenType.FROM): 2025 this = exp.select("*").from_( 2026 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2027 ) 2028 else: 2029 this = self._parse_table() if table else self._parse_select(nested=True) 2030 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2031 2032 self._match_r_paren() 2033 2034 # We return early here so that the UNION isn't attached to the subquery by the 2035 # following call to _parse_set_operations, but instead becomes the parent node 2036 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2037 elif self._match(TokenType.VALUES): 2038 this = self.expression( 2039 exp.Values, 2040 expressions=self._parse_csv(self._parse_value), 2041 alias=self._parse_table_alias(), 2042 ) 2043 else: 2044 this = None 2045 2046 return self._parse_set_operations(this) 2047 2048 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2049 if not skip_with_token and not self._match(TokenType.WITH): 2050 return None 2051 2052 comments = self._prev_comments 2053 recursive = self._match(TokenType.RECURSIVE) 2054 2055 expressions = [] 2056 while True: 2057 expressions.append(self._parse_cte()) 2058 2059 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2060 break 2061 else: 2062 self._match(TokenType.WITH) 2063 2064 return self.expression( 2065 exp.With, comments=comments, expressions=expressions, recursive=recursive 2066 ) 2067 2068 def _parse_cte(self) -> exp.CTE: 2069 alias = self._parse_table_alias() 2070 if not alias or not alias.this: 2071 self.raise_error("Expected CTE to have alias") 2072 2073 self._match(TokenType.ALIAS) 2074 return self.expression( 2075 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2076 ) 2077 2078 def _parse_table_alias( 2079 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2080 ) -> t.Optional[exp.TableAlias]: 2081 any_token = self._match(TokenType.ALIAS) 2082 alias = ( 2083 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2084 or self._parse_string_as_identifier() 2085 ) 2086 2087 index = self._index 2088 if self._match(TokenType.L_PAREN): 2089 columns = self._parse_csv(self._parse_function_parameter) 2090 self._match_r_paren() if columns else self._retreat(index) 2091 else: 2092 columns = None 2093 2094 if not alias and not columns: 2095 return None 2096 2097 return self.expression(exp.TableAlias, this=alias, columns=columns) 2098 2099 def _parse_subquery( 2100 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2101 ) -> t.Optional[exp.Subquery]: 2102 if not this: 2103 return None 2104 2105 return self.expression( 2106 exp.Subquery, 2107 this=this, 2108 pivots=self._parse_pivots(), 2109 alias=self._parse_table_alias() if parse_alias else None, 2110 ) 2111 2112 def _parse_query_modifiers( 2113 self, this: t.Optional[exp.Expression] 2114 ) -> t.Optional[exp.Expression]: 2115 if isinstance(this, self.MODIFIABLES): 2116 for join in iter(self._parse_join, None): 2117 this.append("joins", join) 2118 for lateral in iter(self._parse_lateral, None): 2119 this.append("laterals", lateral) 2120 2121 while True: 2122 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2123 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2124 key, expression = parser(self) 2125 2126 if expression: 2127 this.set(key, expression) 2128 if key == "limit": 2129 offset = expression.args.pop("offset", None) 2130 if offset: 2131 this.set("offset", exp.Offset(expression=offset)) 2132 continue 2133 break 2134 return this 2135 2136 def _parse_hint(self) -> t.Optional[exp.Hint]: 2137 if self._match(TokenType.HINT): 2138 hints = [] 2139 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2140 hints.extend(hint) 2141 2142 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2143 self.raise_error("Expected */ after HINT") 2144 2145 return self.expression(exp.Hint, expressions=hints) 2146 2147 return None 2148 2149 def _parse_into(self) -> t.Optional[exp.Into]: 2150 if not self._match(TokenType.INTO): 2151 return None 2152 2153 temp = self._match(TokenType.TEMPORARY) 2154 unlogged = self._match_text_seq("UNLOGGED") 2155 self._match(TokenType.TABLE) 2156 2157 return self.expression( 2158 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2159 ) 2160 2161 def _parse_from( 2162 self, joins: bool = False, skip_from_token: bool = False 2163 ) -> t.Optional[exp.From]: 2164 if not skip_from_token and not self._match(TokenType.FROM): 2165 return None 2166 2167 return self.expression( 2168 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2169 ) 2170 2171 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2172 if not self._match(TokenType.MATCH_RECOGNIZE): 2173 return None 2174 2175 self._match_l_paren() 2176 2177 partition = self._parse_partition_by() 2178 order = self._parse_order() 2179 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2180 2181 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2182 rows = exp.var("ONE ROW PER MATCH") 2183 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2184 text = "ALL ROWS PER MATCH" 2185 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2186 text += f" SHOW EMPTY MATCHES" 2187 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2188 text += f" OMIT EMPTY MATCHES" 2189 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2190 text += f" WITH UNMATCHED ROWS" 2191 rows = exp.var(text) 2192 else: 2193 rows = None 2194 2195 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2196 text = "AFTER MATCH SKIP" 2197 if self._match_text_seq("PAST", "LAST", "ROW"): 2198 text += f" PAST LAST ROW" 2199 elif self._match_text_seq("TO", "NEXT", "ROW"): 2200 text += f" TO NEXT ROW" 2201 elif self._match_text_seq("TO", "FIRST"): 2202 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2203 elif self._match_text_seq("TO", "LAST"): 2204 text += f" TO LAST {self._advance_any().text}" # type: ignore 2205 after = exp.var(text) 2206 else: 2207 after = None 2208 2209 if self._match_text_seq("PATTERN"): 2210 self._match_l_paren() 2211 2212 if not self._curr: 2213 self.raise_error("Expecting )", self._curr) 2214 2215 paren = 1 2216 start = self._curr 2217 2218 while self._curr and paren > 0: 2219 if self._curr.token_type == TokenType.L_PAREN: 2220 paren += 1 2221 if self._curr.token_type == TokenType.R_PAREN: 2222 paren -= 1 2223 2224 end = self._prev 2225 self._advance() 2226 2227 if paren > 0: 2228 self.raise_error("Expecting )", self._curr) 2229 2230 pattern = exp.var(self._find_sql(start, end)) 2231 else: 2232 pattern = None 2233 2234 define = ( 2235 self._parse_csv( 2236 lambda: self.expression( 2237 exp.Alias, 2238 alias=self._parse_id_var(any_token=True), 2239 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2240 ) 2241 ) 2242 if self._match_text_seq("DEFINE") 2243 else None 2244 ) 2245 2246 self._match_r_paren() 2247 2248 return self.expression( 2249 exp.MatchRecognize, 2250 partition_by=partition, 2251 order=order, 2252 measures=measures, 2253 rows=rows, 2254 after=after, 2255 pattern=pattern, 2256 define=define, 2257 alias=self._parse_table_alias(), 2258 ) 2259 2260 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2261 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2262 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2263 2264 if outer_apply or cross_apply: 2265 this = self._parse_select(table=True) 2266 view = None 2267 outer = not cross_apply 2268 elif self._match(TokenType.LATERAL): 2269 this = self._parse_select(table=True) 2270 view = self._match(TokenType.VIEW) 2271 outer = self._match(TokenType.OUTER) 2272 else: 2273 return None 2274 2275 if not this: 2276 this = ( 2277 self._parse_unnest() 2278 or self._parse_function() 2279 or self._parse_id_var(any_token=False) 2280 ) 2281 2282 while self._match(TokenType.DOT): 2283 this = exp.Dot( 2284 this=this, 2285 expression=self._parse_function() or self._parse_id_var(any_token=False), 2286 ) 2287 2288 if view: 2289 table = self._parse_id_var(any_token=False) 2290 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2291 table_alias: t.Optional[exp.TableAlias] = self.expression( 2292 exp.TableAlias, this=table, columns=columns 2293 ) 2294 elif isinstance(this, exp.Subquery) and this.alias: 2295 # Ensures parity between the Subquery's and the Lateral's "alias" args 2296 table_alias = this.args["alias"].copy() 2297 else: 2298 table_alias = self._parse_table_alias() 2299 2300 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2301 2302 def _parse_join_parts( 2303 self, 2304 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2305 return ( 2306 self._match_set(self.JOIN_METHODS) and self._prev, 2307 self._match_set(self.JOIN_SIDES) and self._prev, 2308 self._match_set(self.JOIN_KINDS) and self._prev, 2309 ) 2310 2311 def _parse_join( 2312 self, skip_join_token: bool = False, parse_bracket: bool = False 2313 ) -> t.Optional[exp.Join]: 2314 if self._match(TokenType.COMMA): 2315 return self.expression(exp.Join, this=self._parse_table()) 2316 2317 index = self._index 2318 method, side, kind = self._parse_join_parts() 2319 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2320 join = self._match(TokenType.JOIN) 2321 2322 if not skip_join_token and not join: 2323 self._retreat(index) 2324 kind = None 2325 method = None 2326 side = None 2327 2328 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2329 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2330 2331 if not skip_join_token and not join and not outer_apply and not cross_apply: 2332 return None 2333 2334 if outer_apply: 2335 side = Token(TokenType.LEFT, "LEFT") 2336 2337 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2338 2339 if method: 2340 kwargs["method"] = method.text 2341 if side: 2342 kwargs["side"] = side.text 2343 if kind: 2344 kwargs["kind"] = kind.text 2345 if hint: 2346 kwargs["hint"] = hint 2347 2348 if self._match(TokenType.ON): 2349 kwargs["on"] = self._parse_conjunction() 2350 elif self._match(TokenType.USING): 2351 kwargs["using"] = self._parse_wrapped_id_vars() 2352 elif not (kind and kind.token_type == TokenType.CROSS): 2353 index = self._index 2354 joins = self._parse_joins() 2355 2356 if joins and self._match(TokenType.ON): 2357 kwargs["on"] = self._parse_conjunction() 2358 elif joins and self._match(TokenType.USING): 2359 kwargs["using"] = self._parse_wrapped_id_vars() 2360 else: 2361 joins = None 2362 self._retreat(index) 2363 2364 kwargs["this"].set("joins", joins) 2365 2366 comments = [c for token in (method, side, kind) if token for c in token.comments] 2367 return self.expression(exp.Join, comments=comments, **kwargs) 2368 2369 def _parse_index( 2370 self, 2371 index: t.Optional[exp.Expression] = None, 2372 ) -> t.Optional[exp.Index]: 2373 if index: 2374 unique = None 2375 primary = None 2376 amp = None 2377 2378 self._match(TokenType.ON) 2379 self._match(TokenType.TABLE) # hive 2380 table = self._parse_table_parts(schema=True) 2381 else: 2382 unique = self._match(TokenType.UNIQUE) 2383 primary = self._match_text_seq("PRIMARY") 2384 amp = self._match_text_seq("AMP") 2385 2386 if not self._match(TokenType.INDEX): 2387 return None 2388 2389 index = self._parse_id_var() 2390 table = None 2391 2392 using = self._parse_field() if self._match(TokenType.USING) else None 2393 2394 if self._match(TokenType.L_PAREN, advance=False): 2395 columns = self._parse_wrapped_csv(self._parse_ordered) 2396 else: 2397 columns = None 2398 2399 return self.expression( 2400 exp.Index, 2401 this=index, 2402 table=table, 2403 using=using, 2404 columns=columns, 2405 unique=unique, 2406 primary=primary, 2407 amp=amp, 2408 partition_by=self._parse_partition_by(), 2409 ) 2410 2411 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2412 hints: t.List[exp.Expression] = [] 2413 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2414 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2415 hints.append( 2416 self.expression( 2417 exp.WithTableHint, 2418 expressions=self._parse_csv( 2419 lambda: self._parse_function() or self._parse_var(any_token=True) 2420 ), 2421 ) 2422 ) 2423 self._match_r_paren() 2424 else: 2425 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2426 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2427 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2428 2429 self._match_texts({"INDEX", "KEY"}) 2430 if self._match(TokenType.FOR): 2431 hint.set("target", self._advance_any() and self._prev.text.upper()) 2432 2433 hint.set("expressions", self._parse_wrapped_id_vars()) 2434 hints.append(hint) 2435 2436 return hints or None 2437 2438 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2439 return ( 2440 (not schema and self._parse_function(optional_parens=False)) 2441 or self._parse_id_var(any_token=False) 2442 or self._parse_string_as_identifier() 2443 or self._parse_placeholder() 2444 ) 2445 2446 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2447 catalog = None 2448 db = None 2449 table = self._parse_table_part(schema=schema) 2450 2451 while self._match(TokenType.DOT): 2452 if catalog: 2453 # This allows nesting the table in arbitrarily many dot expressions if needed 2454 table = self.expression( 2455 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2456 ) 2457 else: 2458 catalog = db 2459 db = table 2460 table = self._parse_table_part(schema=schema) 2461 2462 if not table: 2463 self.raise_error(f"Expected table name but got {self._curr}") 2464 2465 return self.expression( 2466 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2467 ) 2468 2469 def _parse_table( 2470 self, 2471 schema: bool = False, 2472 joins: bool = False, 2473 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2474 parse_bracket: bool = False, 2475 ) -> t.Optional[exp.Expression]: 2476 lateral = self._parse_lateral() 2477 if lateral: 2478 return lateral 2479 2480 unnest = self._parse_unnest() 2481 if unnest: 2482 return unnest 2483 2484 values = self._parse_derived_table_values() 2485 if values: 2486 return values 2487 2488 subquery = self._parse_select(table=True) 2489 if subquery: 2490 if not subquery.args.get("pivots"): 2491 subquery.set("pivots", self._parse_pivots()) 2492 return subquery 2493 2494 bracket = parse_bracket and self._parse_bracket(None) 2495 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2496 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2497 2498 if schema: 2499 return self._parse_schema(this=this) 2500 2501 if self.ALIAS_POST_TABLESAMPLE: 2502 table_sample = self._parse_table_sample() 2503 2504 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2505 if alias: 2506 this.set("alias", alias) 2507 2508 if not this.args.get("pivots"): 2509 this.set("pivots", self._parse_pivots()) 2510 2511 this.set("hints", self._parse_table_hints()) 2512 2513 if not self.ALIAS_POST_TABLESAMPLE: 2514 table_sample = self._parse_table_sample() 2515 2516 if table_sample: 2517 table_sample.set("this", this) 2518 this = table_sample 2519 2520 if joins: 2521 for join in iter(self._parse_join, None): 2522 this.append("joins", join) 2523 2524 return this 2525 2526 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2527 if not self._match(TokenType.UNNEST): 2528 return None 2529 2530 expressions = self._parse_wrapped_csv(self._parse_type) 2531 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2532 2533 alias = self._parse_table_alias() if with_alias else None 2534 2535 if alias and self.UNNEST_COLUMN_ONLY: 2536 if alias.args.get("columns"): 2537 self.raise_error("Unexpected extra column alias in unnest.") 2538 2539 alias.set("columns", [alias.this]) 2540 alias.set("this", None) 2541 2542 offset = None 2543 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2544 self._match(TokenType.ALIAS) 2545 offset = self._parse_id_var() or exp.to_identifier("offset") 2546 2547 return self.expression( 2548 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2549 ) 2550 2551 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2552 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2553 if not is_derived and not self._match(TokenType.VALUES): 2554 return None 2555 2556 expressions = self._parse_csv(self._parse_value) 2557 alias = self._parse_table_alias() 2558 2559 if is_derived: 2560 self._match_r_paren() 2561 2562 return self.expression( 2563 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2564 ) 2565 2566 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2567 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2568 as_modifier and self._match_text_seq("USING", "SAMPLE") 2569 ): 2570 return None 2571 2572 bucket_numerator = None 2573 bucket_denominator = None 2574 bucket_field = None 2575 percent = None 2576 rows = None 2577 size = None 2578 seed = None 2579 2580 kind = ( 2581 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2582 ) 2583 method = self._parse_var(tokens=(TokenType.ROW,)) 2584 2585 self._match(TokenType.L_PAREN) 2586 2587 num = self._parse_number() 2588 2589 if self._match_text_seq("BUCKET"): 2590 bucket_numerator = self._parse_number() 2591 self._match_text_seq("OUT", "OF") 2592 bucket_denominator = bucket_denominator = self._parse_number() 2593 self._match(TokenType.ON) 2594 bucket_field = self._parse_field() 2595 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2596 percent = num 2597 elif self._match(TokenType.ROWS): 2598 rows = num 2599 else: 2600 size = num 2601 2602 self._match(TokenType.R_PAREN) 2603 2604 if self._match(TokenType.L_PAREN): 2605 method = self._parse_var() 2606 seed = self._match(TokenType.COMMA) and self._parse_number() 2607 self._match_r_paren() 2608 elif self._match_texts(("SEED", "REPEATABLE")): 2609 seed = self._parse_wrapped(self._parse_number) 2610 2611 return self.expression( 2612 exp.TableSample, 2613 method=method, 2614 bucket_numerator=bucket_numerator, 2615 bucket_denominator=bucket_denominator, 2616 bucket_field=bucket_field, 2617 percent=percent, 2618 rows=rows, 2619 size=size, 2620 seed=seed, 2621 kind=kind, 2622 ) 2623 2624 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2625 return list(iter(self._parse_pivot, None)) or None 2626 2627 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2628 return list(iter(self._parse_join, None)) or None 2629 2630 # https://duckdb.org/docs/sql/statements/pivot 2631 def _parse_simplified_pivot(self) -> exp.Pivot: 2632 def _parse_on() -> t.Optional[exp.Expression]: 2633 this = self._parse_bitwise() 2634 return self._parse_in(this) if self._match(TokenType.IN) else this 2635 2636 this = self._parse_table() 2637 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2638 using = self._match(TokenType.USING) and self._parse_csv( 2639 lambda: self._parse_alias(self._parse_function()) 2640 ) 2641 group = self._parse_group() 2642 return self.expression( 2643 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2644 ) 2645 2646 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2647 index = self._index 2648 include_nulls = None 2649 2650 if self._match(TokenType.PIVOT): 2651 unpivot = False 2652 elif self._match(TokenType.UNPIVOT): 2653 unpivot = True 2654 2655 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2656 if self._match_text_seq("INCLUDE", "NULLS"): 2657 include_nulls = True 2658 elif self._match_text_seq("EXCLUDE", "NULLS"): 2659 include_nulls = False 2660 else: 2661 return None 2662 2663 expressions = [] 2664 field = None 2665 2666 if not self._match(TokenType.L_PAREN): 2667 self._retreat(index) 2668 return None 2669 2670 if unpivot: 2671 expressions = self._parse_csv(self._parse_column) 2672 else: 2673 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2674 2675 if not expressions: 2676 self.raise_error("Failed to parse PIVOT's aggregation list") 2677 2678 if not self._match(TokenType.FOR): 2679 self.raise_error("Expecting FOR") 2680 2681 value = self._parse_column() 2682 2683 if not self._match(TokenType.IN): 2684 self.raise_error("Expecting IN") 2685 2686 field = self._parse_in(value, alias=True) 2687 2688 self._match_r_paren() 2689 2690 pivot = self.expression( 2691 exp.Pivot, 2692 expressions=expressions, 2693 field=field, 2694 unpivot=unpivot, 2695 include_nulls=include_nulls, 2696 ) 2697 2698 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2699 pivot.set("alias", self._parse_table_alias()) 2700 2701 if not unpivot: 2702 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2703 2704 columns: t.List[exp.Expression] = [] 2705 for fld in pivot.args["field"].expressions: 2706 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2707 for name in names: 2708 if self.PREFIXED_PIVOT_COLUMNS: 2709 name = f"{name}_{field_name}" if name else field_name 2710 else: 2711 name = f"{field_name}_{name}" if name else field_name 2712 2713 columns.append(exp.to_identifier(name)) 2714 2715 pivot.set("columns", columns) 2716 2717 return pivot 2718 2719 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2720 return [agg.alias for agg in aggregations] 2721 2722 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2723 if not skip_where_token and not self._match(TokenType.WHERE): 2724 return None 2725 2726 return self.expression( 2727 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2728 ) 2729 2730 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2731 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2732 return None 2733 2734 elements = defaultdict(list) 2735 2736 if self._match(TokenType.ALL): 2737 return self.expression(exp.Group, all=True) 2738 2739 while True: 2740 expressions = self._parse_csv(self._parse_conjunction) 2741 if expressions: 2742 elements["expressions"].extend(expressions) 2743 2744 grouping_sets = self._parse_grouping_sets() 2745 if grouping_sets: 2746 elements["grouping_sets"].extend(grouping_sets) 2747 2748 rollup = None 2749 cube = None 2750 totals = None 2751 2752 with_ = self._match(TokenType.WITH) 2753 if self._match(TokenType.ROLLUP): 2754 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2755 elements["rollup"].extend(ensure_list(rollup)) 2756 2757 if self._match(TokenType.CUBE): 2758 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2759 elements["cube"].extend(ensure_list(cube)) 2760 2761 if self._match_text_seq("TOTALS"): 2762 totals = True 2763 elements["totals"] = True # type: ignore 2764 2765 if not (grouping_sets or rollup or cube or totals): 2766 break 2767 2768 return self.expression(exp.Group, **elements) # type: ignore 2769 2770 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2771 if not self._match(TokenType.GROUPING_SETS): 2772 return None 2773 2774 return self._parse_wrapped_csv(self._parse_grouping_set) 2775 2776 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2777 if self._match(TokenType.L_PAREN): 2778 grouping_set = self._parse_csv(self._parse_column) 2779 self._match_r_paren() 2780 return self.expression(exp.Tuple, expressions=grouping_set) 2781 2782 return self._parse_column() 2783 2784 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2785 if not skip_having_token and not self._match(TokenType.HAVING): 2786 return None 2787 return self.expression(exp.Having, this=self._parse_conjunction()) 2788 2789 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2790 if not self._match(TokenType.QUALIFY): 2791 return None 2792 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2793 2794 def _parse_order( 2795 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2796 ) -> t.Optional[exp.Expression]: 2797 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2798 return this 2799 2800 return self.expression( 2801 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2802 ) 2803 2804 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2805 if not self._match(token): 2806 return None 2807 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2808 2809 def _parse_ordered(self) -> exp.Ordered: 2810 this = self._parse_conjunction() 2811 self._match(TokenType.ASC) 2812 2813 is_desc = self._match(TokenType.DESC) 2814 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2815 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2816 desc = is_desc or False 2817 asc = not desc 2818 nulls_first = is_nulls_first or False 2819 explicitly_null_ordered = is_nulls_first or is_nulls_last 2820 2821 if ( 2822 not explicitly_null_ordered 2823 and ( 2824 (asc and self.NULL_ORDERING == "nulls_are_small") 2825 or (desc and self.NULL_ORDERING != "nulls_are_small") 2826 ) 2827 and self.NULL_ORDERING != "nulls_are_last" 2828 ): 2829 nulls_first = True 2830 2831 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2832 2833 def _parse_limit( 2834 self, this: t.Optional[exp.Expression] = None, top: bool = False 2835 ) -> t.Optional[exp.Expression]: 2836 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2837 comments = self._prev_comments 2838 if top: 2839 limit_paren = self._match(TokenType.L_PAREN) 2840 expression = self._parse_number() 2841 2842 if limit_paren: 2843 self._match_r_paren() 2844 else: 2845 expression = self._parse_term() 2846 2847 if self._match(TokenType.COMMA): 2848 offset = expression 2849 expression = self._parse_term() 2850 else: 2851 offset = None 2852 2853 limit_exp = self.expression( 2854 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2855 ) 2856 2857 return limit_exp 2858 2859 if self._match(TokenType.FETCH): 2860 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2861 direction = self._prev.text if direction else "FIRST" 2862 2863 count = self._parse_number() 2864 percent = self._match(TokenType.PERCENT) 2865 2866 self._match_set((TokenType.ROW, TokenType.ROWS)) 2867 2868 only = self._match_text_seq("ONLY") 2869 with_ties = self._match_text_seq("WITH", "TIES") 2870 2871 if only and with_ties: 2872 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2873 2874 return self.expression( 2875 exp.Fetch, 2876 direction=direction, 2877 count=count, 2878 percent=percent, 2879 with_ties=with_ties, 2880 ) 2881 2882 return this 2883 2884 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2885 if not self._match(TokenType.OFFSET): 2886 return this 2887 2888 count = self._parse_term() 2889 self._match_set((TokenType.ROW, TokenType.ROWS)) 2890 return self.expression(exp.Offset, this=this, expression=count) 2891 2892 def _parse_locks(self) -> t.List[exp.Lock]: 2893 locks = [] 2894 while True: 2895 if self._match_text_seq("FOR", "UPDATE"): 2896 update = True 2897 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2898 "LOCK", "IN", "SHARE", "MODE" 2899 ): 2900 update = False 2901 else: 2902 break 2903 2904 expressions = None 2905 if self._match_text_seq("OF"): 2906 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2907 2908 wait: t.Optional[bool | exp.Expression] = None 2909 if self._match_text_seq("NOWAIT"): 2910 wait = True 2911 elif self._match_text_seq("WAIT"): 2912 wait = self._parse_primary() 2913 elif self._match_text_seq("SKIP", "LOCKED"): 2914 wait = False 2915 2916 locks.append( 2917 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2918 ) 2919 2920 return locks 2921 2922 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2923 if not self._match_set(self.SET_OPERATIONS): 2924 return this 2925 2926 token_type = self._prev.token_type 2927 2928 if token_type == TokenType.UNION: 2929 expression = exp.Union 2930 elif token_type == TokenType.EXCEPT: 2931 expression = exp.Except 2932 else: 2933 expression = exp.Intersect 2934 2935 return self.expression( 2936 expression, 2937 this=this, 2938 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2939 expression=self._parse_set_operations(self._parse_select(nested=True)), 2940 ) 2941 2942 def _parse_expression(self) -> t.Optional[exp.Expression]: 2943 return self._parse_alias(self._parse_conjunction()) 2944 2945 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2946 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2947 2948 def _parse_equality(self) -> t.Optional[exp.Expression]: 2949 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2950 2951 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2952 return self._parse_tokens(self._parse_range, self.COMPARISON) 2953 2954 def _parse_range(self) -> t.Optional[exp.Expression]: 2955 this = self._parse_bitwise() 2956 negate = self._match(TokenType.NOT) 2957 2958 if self._match_set(self.RANGE_PARSERS): 2959 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2960 if not expression: 2961 return this 2962 2963 this = expression 2964 elif self._match(TokenType.ISNULL): 2965 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2966 2967 # Postgres supports ISNULL and NOTNULL for conditions. 2968 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2969 if self._match(TokenType.NOTNULL): 2970 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2971 this = self.expression(exp.Not, this=this) 2972 2973 if negate: 2974 this = self.expression(exp.Not, this=this) 2975 2976 if self._match(TokenType.IS): 2977 this = self._parse_is(this) 2978 2979 return this 2980 2981 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2982 index = self._index - 1 2983 negate = self._match(TokenType.NOT) 2984 2985 if self._match_text_seq("DISTINCT", "FROM"): 2986 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2987 return self.expression(klass, this=this, expression=self._parse_expression()) 2988 2989 expression = self._parse_null() or self._parse_boolean() 2990 if not expression: 2991 self._retreat(index) 2992 return None 2993 2994 this = self.expression(exp.Is, this=this, expression=expression) 2995 return self.expression(exp.Not, this=this) if negate else this 2996 2997 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2998 unnest = self._parse_unnest(with_alias=False) 2999 if unnest: 3000 this = self.expression(exp.In, this=this, unnest=unnest) 3001 elif self._match(TokenType.L_PAREN): 3002 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3003 3004 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3005 this = self.expression(exp.In, this=this, query=expressions[0]) 3006 else: 3007 this = self.expression(exp.In, this=this, expressions=expressions) 3008 3009 self._match_r_paren(this) 3010 else: 3011 this = self.expression(exp.In, this=this, field=self._parse_field()) 3012 3013 return this 3014 3015 def _parse_between(self, this: exp.Expression) -> exp.Between: 3016 low = self._parse_bitwise() 3017 self._match(TokenType.AND) 3018 high = self._parse_bitwise() 3019 return self.expression(exp.Between, this=this, low=low, high=high) 3020 3021 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3022 if not self._match(TokenType.ESCAPE): 3023 return this 3024 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3025 3026 def _parse_interval(self) -> t.Optional[exp.Interval]: 3027 if not self._match(TokenType.INTERVAL): 3028 return None 3029 3030 if self._match(TokenType.STRING, advance=False): 3031 this = self._parse_primary() 3032 else: 3033 this = self._parse_term() 3034 3035 unit = self._parse_function() or self._parse_var() 3036 3037 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3038 # each INTERVAL expression into this canonical form so it's easy to transpile 3039 if this and this.is_number: 3040 this = exp.Literal.string(this.name) 3041 elif this and this.is_string: 3042 parts = this.name.split() 3043 3044 if len(parts) == 2: 3045 if unit: 3046 # this is not actually a unit, it's something else 3047 unit = None 3048 self._retreat(self._index - 1) 3049 else: 3050 this = exp.Literal.string(parts[0]) 3051 unit = self.expression(exp.Var, this=parts[1]) 3052 3053 return self.expression(exp.Interval, this=this, unit=unit) 3054 3055 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3056 this = self._parse_term() 3057 3058 while True: 3059 if self._match_set(self.BITWISE): 3060 this = self.expression( 3061 self.BITWISE[self._prev.token_type], 3062 this=this, 3063 expression=self._parse_term(), 3064 ) 3065 elif self._match(TokenType.DQMARK): 3066 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3067 elif self._match_pair(TokenType.LT, TokenType.LT): 3068 this = self.expression( 3069 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3070 ) 3071 elif self._match_pair(TokenType.GT, TokenType.GT): 3072 this = self.expression( 3073 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3074 ) 3075 else: 3076 break 3077 3078 return this 3079 3080 def _parse_term(self) -> t.Optional[exp.Expression]: 3081 return self._parse_tokens(self._parse_factor, self.TERM) 3082 3083 def _parse_factor(self) -> t.Optional[exp.Expression]: 3084 return self._parse_tokens(self._parse_unary, self.FACTOR) 3085 3086 def _parse_unary(self) -> t.Optional[exp.Expression]: 3087 if self._match_set(self.UNARY_PARSERS): 3088 return self.UNARY_PARSERS[self._prev.token_type](self) 3089 return self._parse_at_time_zone(self._parse_type()) 3090 3091 def _parse_type(self) -> t.Optional[exp.Expression]: 3092 interval = self._parse_interval() 3093 if interval: 3094 return interval 3095 3096 index = self._index 3097 data_type = self._parse_types(check_func=True) 3098 this = self._parse_column() 3099 3100 if data_type: 3101 if isinstance(this, exp.Literal): 3102 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3103 if parser: 3104 return parser(self, this, data_type) 3105 return self.expression(exp.Cast, this=this, to=data_type) 3106 if not data_type.expressions: 3107 self._retreat(index) 3108 return self._parse_column() 3109 return self._parse_column_ops(data_type) 3110 3111 return this 3112 3113 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3114 this = self._parse_type() 3115 if not this: 3116 return None 3117 3118 return self.expression( 3119 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3120 ) 3121 3122 def _parse_types( 3123 self, check_func: bool = False, schema: bool = False 3124 ) -> t.Optional[exp.Expression]: 3125 index = self._index 3126 3127 prefix = self._match_text_seq("SYSUDTLIB", ".") 3128 3129 if not self._match_set(self.TYPE_TOKENS): 3130 return None 3131 3132 type_token = self._prev.token_type 3133 3134 if type_token == TokenType.PSEUDO_TYPE: 3135 return self.expression(exp.PseudoType, this=self._prev.text) 3136 3137 nested = type_token in self.NESTED_TYPE_TOKENS 3138 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3139 expressions = None 3140 maybe_func = False 3141 3142 if self._match(TokenType.L_PAREN): 3143 if is_struct: 3144 expressions = self._parse_csv(self._parse_struct_types) 3145 elif nested: 3146 expressions = self._parse_csv( 3147 lambda: self._parse_types(check_func=check_func, schema=schema) 3148 ) 3149 elif type_token in self.ENUM_TYPE_TOKENS: 3150 expressions = self._parse_csv(self._parse_equality) 3151 else: 3152 expressions = self._parse_csv(self._parse_type_size) 3153 3154 if not expressions or not self._match(TokenType.R_PAREN): 3155 self._retreat(index) 3156 return None 3157 3158 maybe_func = True 3159 3160 this: t.Optional[exp.Expression] = None 3161 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3162 3163 if nested and self._match(TokenType.LT): 3164 if is_struct: 3165 expressions = self._parse_csv(self._parse_struct_types) 3166 else: 3167 expressions = self._parse_csv( 3168 lambda: self._parse_types(check_func=check_func, schema=schema) 3169 ) 3170 3171 if not self._match(TokenType.GT): 3172 self.raise_error("Expecting >") 3173 3174 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3175 values = self._parse_csv(self._parse_conjunction) 3176 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3177 3178 if type_token in self.TIMESTAMPS: 3179 if self._match_text_seq("WITH", "TIME", "ZONE"): 3180 maybe_func = False 3181 tz_type = ( 3182 exp.DataType.Type.TIMETZ 3183 if type_token in self.TIMES 3184 else exp.DataType.Type.TIMESTAMPTZ 3185 ) 3186 this = exp.DataType(this=tz_type, expressions=expressions) 3187 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3188 maybe_func = False 3189 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3190 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3191 maybe_func = False 3192 elif type_token == TokenType.INTERVAL: 3193 if self._match_text_seq("YEAR", "TO", "MONTH"): 3194 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3195 elif self._match_text_seq("DAY", "TO", "SECOND"): 3196 span = [exp.IntervalDayToSecondSpan()] 3197 else: 3198 span = None 3199 3200 unit = not span and self._parse_var() 3201 if not unit: 3202 this = self.expression( 3203 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3204 ) 3205 else: 3206 this = self.expression(exp.Interval, unit=unit) 3207 3208 if maybe_func and check_func: 3209 index2 = self._index 3210 peek = self._parse_string() 3211 3212 if not peek: 3213 self._retreat(index) 3214 return None 3215 3216 self._retreat(index2) 3217 3218 if not this: 3219 this = exp.DataType( 3220 this=exp.DataType.Type[type_token.value], 3221 expressions=expressions, 3222 nested=nested, 3223 values=values, 3224 prefix=prefix, 3225 ) 3226 3227 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3228 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3229 3230 return this 3231 3232 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3233 this = self._parse_type() or self._parse_id_var() 3234 self._match(TokenType.COLON) 3235 return self._parse_column_def(this) 3236 3237 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3238 if not self._match_text_seq("AT", "TIME", "ZONE"): 3239 return this 3240 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3241 3242 def _parse_column(self) -> t.Optional[exp.Expression]: 3243 this = self._parse_field() 3244 if isinstance(this, exp.Identifier): 3245 this = self.expression(exp.Column, this=this) 3246 elif not this: 3247 return self._parse_bracket(this) 3248 return self._parse_column_ops(this) 3249 3250 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3251 this = self._parse_bracket(this) 3252 3253 while self._match_set(self.COLUMN_OPERATORS): 3254 op_token = self._prev.token_type 3255 op = self.COLUMN_OPERATORS.get(op_token) 3256 3257 if op_token == TokenType.DCOLON: 3258 field = self._parse_types() 3259 if not field: 3260 self.raise_error("Expected type") 3261 elif op and self._curr: 3262 self._advance() 3263 value = self._prev.text 3264 field = ( 3265 exp.Literal.number(value) 3266 if self._prev.token_type == TokenType.NUMBER 3267 else exp.Literal.string(value) 3268 ) 3269 else: 3270 field = self._parse_field(anonymous_func=True, any_token=True) 3271 3272 if isinstance(field, exp.Func): 3273 # bigquery allows function calls like x.y.count(...) 3274 # SAFE.SUBSTR(...) 3275 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3276 this = self._replace_columns_with_dots(this) 3277 3278 if op: 3279 this = op(self, this, field) 3280 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3281 this = self.expression( 3282 exp.Column, 3283 this=field, 3284 table=this.this, 3285 db=this.args.get("table"), 3286 catalog=this.args.get("db"), 3287 ) 3288 else: 3289 this = self.expression(exp.Dot, this=this, expression=field) 3290 this = self._parse_bracket(this) 3291 return this 3292 3293 def _parse_primary(self) -> t.Optional[exp.Expression]: 3294 if self._match_set(self.PRIMARY_PARSERS): 3295 token_type = self._prev.token_type 3296 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3297 3298 if token_type == TokenType.STRING: 3299 expressions = [primary] 3300 while self._match(TokenType.STRING): 3301 expressions.append(exp.Literal.string(self._prev.text)) 3302 3303 if len(expressions) > 1: 3304 return self.expression(exp.Concat, expressions=expressions) 3305 3306 return primary 3307 3308 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3309 return exp.Literal.number(f"0.{self._prev.text}") 3310 3311 if self._match(TokenType.L_PAREN): 3312 comments = self._prev_comments 3313 query = self._parse_select() 3314 3315 if query: 3316 expressions = [query] 3317 else: 3318 expressions = self._parse_expressions() 3319 3320 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3321 3322 if isinstance(this, exp.Subqueryable): 3323 this = self._parse_set_operations( 3324 self._parse_subquery(this=this, parse_alias=False) 3325 ) 3326 elif len(expressions) > 1: 3327 this = self.expression(exp.Tuple, expressions=expressions) 3328 else: 3329 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3330 3331 if this: 3332 this.add_comments(comments) 3333 3334 self._match_r_paren(expression=this) 3335 return this 3336 3337 return None 3338 3339 def _parse_field( 3340 self, 3341 any_token: bool = False, 3342 tokens: t.Optional[t.Collection[TokenType]] = None, 3343 anonymous_func: bool = False, 3344 ) -> t.Optional[exp.Expression]: 3345 return ( 3346 self._parse_primary() 3347 or self._parse_function(anonymous=anonymous_func) 3348 or self._parse_id_var(any_token=any_token, tokens=tokens) 3349 ) 3350 3351 def _parse_function( 3352 self, 3353 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3354 anonymous: bool = False, 3355 optional_parens: bool = True, 3356 ) -> t.Optional[exp.Expression]: 3357 if not self._curr: 3358 return None 3359 3360 token_type = self._curr.token_type 3361 this = self._curr.text 3362 upper = this.upper() 3363 3364 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3365 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3366 self._advance() 3367 return parser(self) 3368 3369 if not self._next or self._next.token_type != TokenType.L_PAREN: 3370 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3371 self._advance() 3372 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3373 3374 return None 3375 3376 if token_type not in self.FUNC_TOKENS: 3377 return None 3378 3379 self._advance(2) 3380 3381 parser = self.FUNCTION_PARSERS.get(upper) 3382 if parser and not anonymous: 3383 this = parser(self) 3384 else: 3385 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3386 3387 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3388 this = self.expression(subquery_predicate, this=self._parse_select()) 3389 self._match_r_paren() 3390 return this 3391 3392 if functions is None: 3393 functions = self.FUNCTIONS 3394 3395 function = functions.get(upper) 3396 3397 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3398 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3399 3400 if function and not anonymous: 3401 func = self.validate_expression(function(args), args) 3402 if not self.NORMALIZE_FUNCTIONS: 3403 func.meta["name"] = this 3404 this = func 3405 else: 3406 this = self.expression(exp.Anonymous, this=this, expressions=args) 3407 3408 self._match_r_paren(this) 3409 return self._parse_window(this) 3410 3411 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3412 return self._parse_column_def(self._parse_id_var()) 3413 3414 def _parse_user_defined_function( 3415 self, kind: t.Optional[TokenType] = None 3416 ) -> t.Optional[exp.Expression]: 3417 this = self._parse_id_var() 3418 3419 while self._match(TokenType.DOT): 3420 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3421 3422 if not self._match(TokenType.L_PAREN): 3423 return this 3424 3425 expressions = self._parse_csv(self._parse_function_parameter) 3426 self._match_r_paren() 3427 return self.expression( 3428 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3429 ) 3430 3431 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3432 literal = self._parse_primary() 3433 if literal: 3434 return self.expression(exp.Introducer, this=token.text, expression=literal) 3435 3436 return self.expression(exp.Identifier, this=token.text) 3437 3438 def _parse_session_parameter(self) -> exp.SessionParameter: 3439 kind = None 3440 this = self._parse_id_var() or self._parse_primary() 3441 3442 if this and self._match(TokenType.DOT): 3443 kind = this.name 3444 this = self._parse_var() or self._parse_primary() 3445 3446 return self.expression(exp.SessionParameter, this=this, kind=kind) 3447 3448 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3449 index = self._index 3450 3451 if self._match(TokenType.L_PAREN): 3452 expressions = self._parse_csv(self._parse_id_var) 3453 3454 if not self._match(TokenType.R_PAREN): 3455 self._retreat(index) 3456 else: 3457 expressions = [self._parse_id_var()] 3458 3459 if self._match_set(self.LAMBDAS): 3460 return self.LAMBDAS[self._prev.token_type](self, expressions) 3461 3462 self._retreat(index) 3463 3464 this: t.Optional[exp.Expression] 3465 3466 if self._match(TokenType.DISTINCT): 3467 this = self.expression( 3468 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3469 ) 3470 else: 3471 this = self._parse_select_or_expression(alias=alias) 3472 3473 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3474 3475 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3476 index = self._index 3477 3478 if not self.errors: 3479 try: 3480 if self._parse_select(nested=True): 3481 return this 3482 except ParseError: 3483 pass 3484 finally: 3485 self.errors.clear() 3486 self._retreat(index) 3487 3488 if not self._match(TokenType.L_PAREN): 3489 return this 3490 3491 args = self._parse_csv( 3492 lambda: self._parse_constraint() 3493 or self._parse_column_def(self._parse_field(any_token=True)) 3494 ) 3495 3496 self._match_r_paren() 3497 return self.expression(exp.Schema, this=this, expressions=args) 3498 3499 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3500 # column defs are not really columns, they're identifiers 3501 if isinstance(this, exp.Column): 3502 this = this.this 3503 3504 kind = self._parse_types(schema=True) 3505 3506 if self._match_text_seq("FOR", "ORDINALITY"): 3507 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3508 3509 constraints = [] 3510 while True: 3511 constraint = self._parse_column_constraint() 3512 if not constraint: 3513 break 3514 constraints.append(constraint) 3515 3516 if not kind and not constraints: 3517 return this 3518 3519 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3520 3521 def _parse_auto_increment( 3522 self, 3523 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3524 start = None 3525 increment = None 3526 3527 if self._match(TokenType.L_PAREN, advance=False): 3528 args = self._parse_wrapped_csv(self._parse_bitwise) 3529 start = seq_get(args, 0) 3530 increment = seq_get(args, 1) 3531 elif self._match_text_seq("START"): 3532 start = self._parse_bitwise() 3533 self._match_text_seq("INCREMENT") 3534 increment = self._parse_bitwise() 3535 3536 if start and increment: 3537 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3538 3539 return exp.AutoIncrementColumnConstraint() 3540 3541 def _parse_compress(self) -> exp.CompressColumnConstraint: 3542 if self._match(TokenType.L_PAREN, advance=False): 3543 return self.expression( 3544 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3545 ) 3546 3547 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3548 3549 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3550 if self._match_text_seq("BY", "DEFAULT"): 3551 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3552 this = self.expression( 3553 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3554 ) 3555 else: 3556 self._match_text_seq("ALWAYS") 3557 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3558 3559 self._match(TokenType.ALIAS) 3560 identity = self._match_text_seq("IDENTITY") 3561 3562 if self._match(TokenType.L_PAREN): 3563 if self._match_text_seq("START", "WITH"): 3564 this.set("start", self._parse_bitwise()) 3565 if self._match_text_seq("INCREMENT", "BY"): 3566 this.set("increment", self._parse_bitwise()) 3567 if self._match_text_seq("MINVALUE"): 3568 this.set("minvalue", self._parse_bitwise()) 3569 if self._match_text_seq("MAXVALUE"): 3570 this.set("maxvalue", self._parse_bitwise()) 3571 3572 if self._match_text_seq("CYCLE"): 3573 this.set("cycle", True) 3574 elif self._match_text_seq("NO", "CYCLE"): 3575 this.set("cycle", False) 3576 3577 if not identity: 3578 this.set("expression", self._parse_bitwise()) 3579 3580 self._match_r_paren() 3581 3582 return this 3583 3584 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3585 self._match_text_seq("LENGTH") 3586 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3587 3588 def _parse_not_constraint( 3589 self, 3590 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3591 if self._match_text_seq("NULL"): 3592 return self.expression(exp.NotNullColumnConstraint) 3593 if self._match_text_seq("CASESPECIFIC"): 3594 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3595 return None 3596 3597 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3598 if self._match(TokenType.CONSTRAINT): 3599 this = self._parse_id_var() 3600 else: 3601 this = None 3602 3603 if self._match_texts(self.CONSTRAINT_PARSERS): 3604 return self.expression( 3605 exp.ColumnConstraint, 3606 this=this, 3607 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3608 ) 3609 3610 return this 3611 3612 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3613 if not self._match(TokenType.CONSTRAINT): 3614 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3615 3616 this = self._parse_id_var() 3617 expressions = [] 3618 3619 while True: 3620 constraint = self._parse_unnamed_constraint() or self._parse_function() 3621 if not constraint: 3622 break 3623 expressions.append(constraint) 3624 3625 return self.expression(exp.Constraint, this=this, expressions=expressions) 3626 3627 def _parse_unnamed_constraint( 3628 self, constraints: t.Optional[t.Collection[str]] = None 3629 ) -> t.Optional[exp.Expression]: 3630 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3631 return None 3632 3633 constraint = self._prev.text.upper() 3634 if constraint not in self.CONSTRAINT_PARSERS: 3635 self.raise_error(f"No parser found for schema constraint {constraint}.") 3636 3637 return self.CONSTRAINT_PARSERS[constraint](self) 3638 3639 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3640 self._match_text_seq("KEY") 3641 return self.expression( 3642 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3643 ) 3644 3645 def _parse_key_constraint_options(self) -> t.List[str]: 3646 options = [] 3647 while True: 3648 if not self._curr: 3649 break 3650 3651 if self._match(TokenType.ON): 3652 action = None 3653 on = self._advance_any() and self._prev.text 3654 3655 if self._match_text_seq("NO", "ACTION"): 3656 action = "NO ACTION" 3657 elif self._match_text_seq("CASCADE"): 3658 action = "CASCADE" 3659 elif self._match_pair(TokenType.SET, TokenType.NULL): 3660 action = "SET NULL" 3661 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3662 action = "SET DEFAULT" 3663 else: 3664 self.raise_error("Invalid key constraint") 3665 3666 options.append(f"ON {on} {action}") 3667 elif self._match_text_seq("NOT", "ENFORCED"): 3668 options.append("NOT ENFORCED") 3669 elif self._match_text_seq("DEFERRABLE"): 3670 options.append("DEFERRABLE") 3671 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3672 options.append("INITIALLY DEFERRED") 3673 elif self._match_text_seq("NORELY"): 3674 options.append("NORELY") 3675 elif self._match_text_seq("MATCH", "FULL"): 3676 options.append("MATCH FULL") 3677 else: 3678 break 3679 3680 return options 3681 3682 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3683 if match and not self._match(TokenType.REFERENCES): 3684 return None 3685 3686 expressions = None 3687 this = self._parse_table(schema=True) 3688 options = self._parse_key_constraint_options() 3689 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3690 3691 def _parse_foreign_key(self) -> exp.ForeignKey: 3692 expressions = self._parse_wrapped_id_vars() 3693 reference = self._parse_references() 3694 options = {} 3695 3696 while self._match(TokenType.ON): 3697 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3698 self.raise_error("Expected DELETE or UPDATE") 3699 3700 kind = self._prev.text.lower() 3701 3702 if self._match_text_seq("NO", "ACTION"): 3703 action = "NO ACTION" 3704 elif self._match(TokenType.SET): 3705 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3706 action = "SET " + self._prev.text.upper() 3707 else: 3708 self._advance() 3709 action = self._prev.text.upper() 3710 3711 options[kind] = action 3712 3713 return self.expression( 3714 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3715 ) 3716 3717 def _parse_primary_key( 3718 self, wrapped_optional: bool = False, in_props: bool = False 3719 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3720 desc = ( 3721 self._match_set((TokenType.ASC, TokenType.DESC)) 3722 and self._prev.token_type == TokenType.DESC 3723 ) 3724 3725 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3726 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3727 3728 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3729 options = self._parse_key_constraint_options() 3730 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3731 3732 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3733 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3734 return this 3735 3736 bracket_kind = self._prev.token_type 3737 3738 if self._match(TokenType.COLON): 3739 expressions: t.List[t.Optional[exp.Expression]] = [ 3740 self.expression(exp.Slice, expression=self._parse_conjunction()) 3741 ] 3742 else: 3743 expressions = self._parse_csv( 3744 lambda: self._parse_slice( 3745 self._parse_alias(self._parse_conjunction(), explicit=True) 3746 ) 3747 ) 3748 3749 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3750 if bracket_kind == TokenType.L_BRACE: 3751 this = self.expression(exp.Struct, expressions=expressions) 3752 elif not this or this.name.upper() == "ARRAY": 3753 this = self.expression(exp.Array, expressions=expressions) 3754 else: 3755 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3756 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3757 3758 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3759 self.raise_error("Expected ]") 3760 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3761 self.raise_error("Expected }") 3762 3763 self._add_comments(this) 3764 return self._parse_bracket(this) 3765 3766 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3767 if self._match(TokenType.COLON): 3768 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3769 return this 3770 3771 def _parse_case(self) -> t.Optional[exp.Expression]: 3772 ifs = [] 3773 default = None 3774 3775 comments = self._prev_comments 3776 expression = self._parse_conjunction() 3777 3778 while self._match(TokenType.WHEN): 3779 this = self._parse_conjunction() 3780 self._match(TokenType.THEN) 3781 then = self._parse_conjunction() 3782 ifs.append(self.expression(exp.If, this=this, true=then)) 3783 3784 if self._match(TokenType.ELSE): 3785 default = self._parse_conjunction() 3786 3787 if not self._match(TokenType.END): 3788 self.raise_error("Expected END after CASE", self._prev) 3789 3790 return self._parse_window( 3791 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3792 ) 3793 3794 def _parse_if(self) -> t.Optional[exp.Expression]: 3795 if self._match(TokenType.L_PAREN): 3796 args = self._parse_csv(self._parse_conjunction) 3797 this = self.validate_expression(exp.If.from_arg_list(args), args) 3798 self._match_r_paren() 3799 else: 3800 index = self._index - 1 3801 condition = self._parse_conjunction() 3802 3803 if not condition: 3804 self._retreat(index) 3805 return None 3806 3807 self._match(TokenType.THEN) 3808 true = self._parse_conjunction() 3809 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3810 self._match(TokenType.END) 3811 this = self.expression(exp.If, this=condition, true=true, false=false) 3812 3813 return self._parse_window(this) 3814 3815 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3816 if not self._match_text_seq("VALUE", "FOR"): 3817 self._retreat(self._index - 1) 3818 return None 3819 3820 return self.expression( 3821 exp.NextValueFor, 3822 this=self._parse_column(), 3823 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3824 ) 3825 3826 def _parse_extract(self) -> exp.Extract: 3827 this = self._parse_function() or self._parse_var() or self._parse_type() 3828 3829 if self._match(TokenType.FROM): 3830 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3831 3832 if not self._match(TokenType.COMMA): 3833 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3834 3835 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3836 3837 def _parse_any_value(self) -> exp.AnyValue: 3838 this = self._parse_lambda() 3839 is_max = None 3840 having = None 3841 3842 if self._match(TokenType.HAVING): 3843 self._match_texts(("MAX", "MIN")) 3844 is_max = self._prev.text == "MAX" 3845 having = self._parse_column() 3846 3847 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3848 3849 def _parse_cast(self, strict: bool) -> exp.Expression: 3850 this = self._parse_conjunction() 3851 3852 if not self._match(TokenType.ALIAS): 3853 if self._match(TokenType.COMMA): 3854 return self.expression( 3855 exp.CastToStrType, this=this, expression=self._parse_string() 3856 ) 3857 else: 3858 self.raise_error("Expected AS after CAST") 3859 3860 fmt = None 3861 to = self._parse_types() 3862 3863 if not to: 3864 self.raise_error("Expected TYPE after CAST") 3865 elif to.this == exp.DataType.Type.CHAR: 3866 if self._match(TokenType.CHARACTER_SET): 3867 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3868 elif self._match(TokenType.FORMAT): 3869 fmt_string = self._parse_string() 3870 fmt = self._parse_at_time_zone(fmt_string) 3871 3872 if to.this in exp.DataType.TEMPORAL_TYPES: 3873 this = self.expression( 3874 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3875 this=this, 3876 format=exp.Literal.string( 3877 format_time( 3878 fmt_string.this if fmt_string else "", 3879 self.FORMAT_MAPPING or self.TIME_MAPPING, 3880 self.FORMAT_TRIE or self.TIME_TRIE, 3881 ) 3882 ), 3883 ) 3884 3885 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3886 this.set("zone", fmt.args["zone"]) 3887 3888 return this 3889 3890 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3891 3892 def _parse_concat(self) -> t.Optional[exp.Expression]: 3893 args = self._parse_csv(self._parse_conjunction) 3894 if self.CONCAT_NULL_OUTPUTS_STRING: 3895 args = [ 3896 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3897 for arg in args 3898 if arg 3899 ] 3900 3901 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3902 # we find such a call we replace it with its argument. 3903 if len(args) == 1: 3904 return args[0] 3905 3906 return self.expression( 3907 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3908 ) 3909 3910 def _parse_string_agg(self) -> exp.Expression: 3911 if self._match(TokenType.DISTINCT): 3912 args: t.List[t.Optional[exp.Expression]] = [ 3913 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3914 ] 3915 if self._match(TokenType.COMMA): 3916 args.extend(self._parse_csv(self._parse_conjunction)) 3917 else: 3918 args = self._parse_csv(self._parse_conjunction) 3919 3920 index = self._index 3921 if not self._match(TokenType.R_PAREN) and args: 3922 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3923 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3924 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3925 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3926 3927 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3928 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3929 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3930 if not self._match_text_seq("WITHIN", "GROUP"): 3931 self._retreat(index) 3932 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3933 3934 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3935 order = self._parse_order(this=seq_get(args, 0)) 3936 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3937 3938 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3939 this = self._parse_bitwise() 3940 3941 if self._match(TokenType.USING): 3942 to: t.Optional[exp.Expression] = self.expression( 3943 exp.CharacterSet, this=self._parse_var() 3944 ) 3945 elif self._match(TokenType.COMMA): 3946 to = self._parse_types() 3947 else: 3948 to = None 3949 3950 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3951 3952 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3953 """ 3954 There are generally two variants of the DECODE function: 3955 3956 - DECODE(bin, charset) 3957 - DECODE(expression, search, result [, search, result] ... [, default]) 3958 3959 The second variant will always be parsed into a CASE expression. Note that NULL 3960 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3961 instead of relying on pattern matching. 3962 """ 3963 args = self._parse_csv(self._parse_conjunction) 3964 3965 if len(args) < 3: 3966 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3967 3968 expression, *expressions = args 3969 if not expression: 3970 return None 3971 3972 ifs = [] 3973 for search, result in zip(expressions[::2], expressions[1::2]): 3974 if not search or not result: 3975 return None 3976 3977 if isinstance(search, exp.Literal): 3978 ifs.append( 3979 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3980 ) 3981 elif isinstance(search, exp.Null): 3982 ifs.append( 3983 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3984 ) 3985 else: 3986 cond = exp.or_( 3987 exp.EQ(this=expression.copy(), expression=search), 3988 exp.and_( 3989 exp.Is(this=expression.copy(), expression=exp.Null()), 3990 exp.Is(this=search.copy(), expression=exp.Null()), 3991 copy=False, 3992 ), 3993 copy=False, 3994 ) 3995 ifs.append(exp.If(this=cond, true=result)) 3996 3997 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3998 3999 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4000 self._match_text_seq("KEY") 4001 key = self._parse_field() 4002 self._match(TokenType.COLON) 4003 self._match_text_seq("VALUE") 4004 value = self._parse_field() 4005 4006 if not key and not value: 4007 return None 4008 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4009 4010 def _parse_json_object(self) -> exp.JSONObject: 4011 star = self._parse_star() 4012 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4013 4014 null_handling = None 4015 if self._match_text_seq("NULL", "ON", "NULL"): 4016 null_handling = "NULL ON NULL" 4017 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4018 null_handling = "ABSENT ON NULL" 4019 4020 unique_keys = None 4021 if self._match_text_seq("WITH", "UNIQUE"): 4022 unique_keys = True 4023 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4024 unique_keys = False 4025 4026 self._match_text_seq("KEYS") 4027 4028 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4029 format_json = self._match_text_seq("FORMAT", "JSON") 4030 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4031 4032 return self.expression( 4033 exp.JSONObject, 4034 expressions=expressions, 4035 null_handling=null_handling, 4036 unique_keys=unique_keys, 4037 return_type=return_type, 4038 format_json=format_json, 4039 encoding=encoding, 4040 ) 4041 4042 def _parse_logarithm(self) -> exp.Func: 4043 # Default argument order is base, expression 4044 args = self._parse_csv(self._parse_range) 4045 4046 if len(args) > 1: 4047 if not self.LOG_BASE_FIRST: 4048 args.reverse() 4049 return exp.Log.from_arg_list(args) 4050 4051 return self.expression( 4052 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4053 ) 4054 4055 def _parse_match_against(self) -> exp.MatchAgainst: 4056 expressions = self._parse_csv(self._parse_column) 4057 4058 self._match_text_seq(")", "AGAINST", "(") 4059 4060 this = self._parse_string() 4061 4062 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4063 modifier = "IN NATURAL LANGUAGE MODE" 4064 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4065 modifier = f"{modifier} WITH QUERY EXPANSION" 4066 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4067 modifier = "IN BOOLEAN MODE" 4068 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4069 modifier = "WITH QUERY EXPANSION" 4070 else: 4071 modifier = None 4072 4073 return self.expression( 4074 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4075 ) 4076 4077 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4078 def _parse_open_json(self) -> exp.OpenJSON: 4079 this = self._parse_bitwise() 4080 path = self._match(TokenType.COMMA) and self._parse_string() 4081 4082 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4083 this = self._parse_field(any_token=True) 4084 kind = self._parse_types() 4085 path = self._parse_string() 4086 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4087 4088 return self.expression( 4089 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4090 ) 4091 4092 expressions = None 4093 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4094 self._match_l_paren() 4095 expressions = self._parse_csv(_parse_open_json_column_def) 4096 4097 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4098 4099 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4100 args = self._parse_csv(self._parse_bitwise) 4101 4102 if self._match(TokenType.IN): 4103 return self.expression( 4104 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4105 ) 4106 4107 if haystack_first: 4108 haystack = seq_get(args, 0) 4109 needle = seq_get(args, 1) 4110 else: 4111 needle = seq_get(args, 0) 4112 haystack = seq_get(args, 1) 4113 4114 return self.expression( 4115 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4116 ) 4117 4118 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4119 args = self._parse_csv(self._parse_table) 4120 return exp.JoinHint(this=func_name.upper(), expressions=args) 4121 4122 def _parse_substring(self) -> exp.Substring: 4123 # Postgres supports the form: substring(string [from int] [for int]) 4124 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4125 4126 args = self._parse_csv(self._parse_bitwise) 4127 4128 if self._match(TokenType.FROM): 4129 args.append(self._parse_bitwise()) 4130 if self._match(TokenType.FOR): 4131 args.append(self._parse_bitwise()) 4132 4133 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4134 4135 def _parse_trim(self) -> exp.Trim: 4136 # https://www.w3resource.com/sql/character-functions/trim.php 4137 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4138 4139 position = None 4140 collation = None 4141 4142 if self._match_texts(self.TRIM_TYPES): 4143 position = self._prev.text.upper() 4144 4145 expression = self._parse_bitwise() 4146 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4147 this = self._parse_bitwise() 4148 else: 4149 this = expression 4150 expression = None 4151 4152 if self._match(TokenType.COLLATE): 4153 collation = self._parse_bitwise() 4154 4155 return self.expression( 4156 exp.Trim, this=this, position=position, expression=expression, collation=collation 4157 ) 4158 4159 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4160 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4161 4162 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4163 return self._parse_window(self._parse_id_var(), alias=True) 4164 4165 def _parse_respect_or_ignore_nulls( 4166 self, this: t.Optional[exp.Expression] 4167 ) -> t.Optional[exp.Expression]: 4168 if self._match_text_seq("IGNORE", "NULLS"): 4169 return self.expression(exp.IgnoreNulls, this=this) 4170 if self._match_text_seq("RESPECT", "NULLS"): 4171 return self.expression(exp.RespectNulls, this=this) 4172 return this 4173 4174 def _parse_window( 4175 self, this: t.Optional[exp.Expression], alias: bool = False 4176 ) -> t.Optional[exp.Expression]: 4177 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4178 self._match(TokenType.WHERE) 4179 this = self.expression( 4180 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4181 ) 4182 self._match_r_paren() 4183 4184 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4185 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4186 if self._match_text_seq("WITHIN", "GROUP"): 4187 order = self._parse_wrapped(self._parse_order) 4188 this = self.expression(exp.WithinGroup, this=this, expression=order) 4189 4190 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4191 # Some dialects choose to implement and some do not. 4192 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4193 4194 # There is some code above in _parse_lambda that handles 4195 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4196 4197 # The below changes handle 4198 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4199 4200 # Oracle allows both formats 4201 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4202 # and Snowflake chose to do the same for familiarity 4203 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4204 this = self._parse_respect_or_ignore_nulls(this) 4205 4206 # bigquery select from window x AS (partition by ...) 4207 if alias: 4208 over = None 4209 self._match(TokenType.ALIAS) 4210 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4211 return this 4212 else: 4213 over = self._prev.text.upper() 4214 4215 if not self._match(TokenType.L_PAREN): 4216 return self.expression( 4217 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4218 ) 4219 4220 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4221 4222 first = self._match(TokenType.FIRST) 4223 if self._match_text_seq("LAST"): 4224 first = False 4225 4226 partition = self._parse_partition_by() 4227 order = self._parse_order() 4228 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4229 4230 if kind: 4231 self._match(TokenType.BETWEEN) 4232 start = self._parse_window_spec() 4233 self._match(TokenType.AND) 4234 end = self._parse_window_spec() 4235 4236 spec = self.expression( 4237 exp.WindowSpec, 4238 kind=kind, 4239 start=start["value"], 4240 start_side=start["side"], 4241 end=end["value"], 4242 end_side=end["side"], 4243 ) 4244 else: 4245 spec = None 4246 4247 self._match_r_paren() 4248 4249 window = self.expression( 4250 exp.Window, 4251 this=this, 4252 partition_by=partition, 4253 order=order, 4254 spec=spec, 4255 alias=window_alias, 4256 over=over, 4257 first=first, 4258 ) 4259 4260 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4261 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4262 return self._parse_window(window, alias=alias) 4263 4264 return window 4265 4266 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4267 self._match(TokenType.BETWEEN) 4268 4269 return { 4270 "value": ( 4271 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4272 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4273 or self._parse_bitwise() 4274 ), 4275 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4276 } 4277 4278 def _parse_alias( 4279 self, this: t.Optional[exp.Expression], explicit: bool = False 4280 ) -> t.Optional[exp.Expression]: 4281 any_token = self._match(TokenType.ALIAS) 4282 4283 if explicit and not any_token: 4284 return this 4285 4286 if self._match(TokenType.L_PAREN): 4287 aliases = self.expression( 4288 exp.Aliases, 4289 this=this, 4290 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4291 ) 4292 self._match_r_paren(aliases) 4293 return aliases 4294 4295 alias = self._parse_id_var(any_token) 4296 4297 if alias: 4298 return self.expression(exp.Alias, this=this, alias=alias) 4299 4300 return this 4301 4302 def _parse_id_var( 4303 self, 4304 any_token: bool = True, 4305 tokens: t.Optional[t.Collection[TokenType]] = None, 4306 ) -> t.Optional[exp.Expression]: 4307 identifier = self._parse_identifier() 4308 4309 if identifier: 4310 return identifier 4311 4312 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4313 quoted = self._prev.token_type == TokenType.STRING 4314 return exp.Identifier(this=self._prev.text, quoted=quoted) 4315 4316 return None 4317 4318 def _parse_string(self) -> t.Optional[exp.Expression]: 4319 if self._match(TokenType.STRING): 4320 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4321 return self._parse_placeholder() 4322 4323 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4324 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4325 4326 def _parse_number(self) -> t.Optional[exp.Expression]: 4327 if self._match(TokenType.NUMBER): 4328 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4329 return self._parse_placeholder() 4330 4331 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4332 if self._match(TokenType.IDENTIFIER): 4333 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4334 return self._parse_placeholder() 4335 4336 def _parse_var( 4337 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4338 ) -> t.Optional[exp.Expression]: 4339 if ( 4340 (any_token and self._advance_any()) 4341 or self._match(TokenType.VAR) 4342 or (self._match_set(tokens) if tokens else False) 4343 ): 4344 return self.expression(exp.Var, this=self._prev.text) 4345 return self._parse_placeholder() 4346 4347 def _advance_any(self) -> t.Optional[Token]: 4348 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4349 self._advance() 4350 return self._prev 4351 return None 4352 4353 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4354 return self._parse_var() or self._parse_string() 4355 4356 def _parse_null(self) -> t.Optional[exp.Expression]: 4357 if self._match(TokenType.NULL): 4358 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4359 return self._parse_placeholder() 4360 4361 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4362 if self._match(TokenType.TRUE): 4363 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4364 if self._match(TokenType.FALSE): 4365 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4366 return self._parse_placeholder() 4367 4368 def _parse_star(self) -> t.Optional[exp.Expression]: 4369 if self._match(TokenType.STAR): 4370 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4371 return self._parse_placeholder() 4372 4373 def _parse_parameter(self) -> exp.Parameter: 4374 wrapped = self._match(TokenType.L_BRACE) 4375 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4376 self._match(TokenType.R_BRACE) 4377 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4378 4379 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4380 if self._match_set(self.PLACEHOLDER_PARSERS): 4381 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4382 if placeholder: 4383 return placeholder 4384 self._advance(-1) 4385 return None 4386 4387 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4388 if not self._match(TokenType.EXCEPT): 4389 return None 4390 if self._match(TokenType.L_PAREN, advance=False): 4391 return self._parse_wrapped_csv(self._parse_column) 4392 return self._parse_csv(self._parse_column) 4393 4394 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4395 if not self._match(TokenType.REPLACE): 4396 return None 4397 if self._match(TokenType.L_PAREN, advance=False): 4398 return self._parse_wrapped_csv(self._parse_expression) 4399 return self._parse_expressions() 4400 4401 def _parse_csv( 4402 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4403 ) -> t.List[t.Optional[exp.Expression]]: 4404 parse_result = parse_method() 4405 items = [parse_result] if parse_result is not None else [] 4406 4407 while self._match(sep): 4408 self._add_comments(parse_result) 4409 parse_result = parse_method() 4410 if parse_result is not None: 4411 items.append(parse_result) 4412 4413 return items 4414 4415 def _parse_tokens( 4416 self, parse_method: t.Callable, expressions: t.Dict 4417 ) -> t.Optional[exp.Expression]: 4418 this = parse_method() 4419 4420 while self._match_set(expressions): 4421 this = self.expression( 4422 expressions[self._prev.token_type], 4423 this=this, 4424 comments=self._prev_comments, 4425 expression=parse_method(), 4426 ) 4427 4428 return this 4429 4430 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4431 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4432 4433 def _parse_wrapped_csv( 4434 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4435 ) -> t.List[t.Optional[exp.Expression]]: 4436 return self._parse_wrapped( 4437 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4438 ) 4439 4440 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4441 wrapped = self._match(TokenType.L_PAREN) 4442 if not wrapped and not optional: 4443 self.raise_error("Expecting (") 4444 parse_result = parse_method() 4445 if wrapped: 4446 self._match_r_paren() 4447 return parse_result 4448 4449 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4450 return self._parse_csv(self._parse_expression) 4451 4452 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4453 return self._parse_select() or self._parse_set_operations( 4454 self._parse_expression() if alias else self._parse_conjunction() 4455 ) 4456 4457 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4458 return self._parse_query_modifiers( 4459 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4460 ) 4461 4462 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4463 this = None 4464 if self._match_texts(self.TRANSACTION_KIND): 4465 this = self._prev.text 4466 4467 self._match_texts({"TRANSACTION", "WORK"}) 4468 4469 modes = [] 4470 while True: 4471 mode = [] 4472 while self._match(TokenType.VAR): 4473 mode.append(self._prev.text) 4474 4475 if mode: 4476 modes.append(" ".join(mode)) 4477 if not self._match(TokenType.COMMA): 4478 break 4479 4480 return self.expression(exp.Transaction, this=this, modes=modes) 4481 4482 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4483 chain = None 4484 savepoint = None 4485 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4486 4487 self._match_texts({"TRANSACTION", "WORK"}) 4488 4489 if self._match_text_seq("TO"): 4490 self._match_text_seq("SAVEPOINT") 4491 savepoint = self._parse_id_var() 4492 4493 if self._match(TokenType.AND): 4494 chain = not self._match_text_seq("NO") 4495 self._match_text_seq("CHAIN") 4496 4497 if is_rollback: 4498 return self.expression(exp.Rollback, savepoint=savepoint) 4499 4500 return self.expression(exp.Commit, chain=chain) 4501 4502 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4503 if not self._match_text_seq("ADD"): 4504 return None 4505 4506 self._match(TokenType.COLUMN) 4507 exists_column = self._parse_exists(not_=True) 4508 expression = self._parse_column_def(self._parse_field(any_token=True)) 4509 4510 if expression: 4511 expression.set("exists", exists_column) 4512 4513 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4514 if self._match_texts(("FIRST", "AFTER")): 4515 position = self._prev.text 4516 column_position = self.expression( 4517 exp.ColumnPosition, this=self._parse_column(), position=position 4518 ) 4519 expression.set("position", column_position) 4520 4521 return expression 4522 4523 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4524 drop = self._match(TokenType.DROP) and self._parse_drop() 4525 if drop and not isinstance(drop, exp.Command): 4526 drop.set("kind", drop.args.get("kind", "COLUMN")) 4527 return drop 4528 4529 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4530 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4531 return self.expression( 4532 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4533 ) 4534 4535 def _parse_add_constraint(self) -> exp.AddConstraint: 4536 this = None 4537 kind = self._prev.token_type 4538 4539 if kind == TokenType.CONSTRAINT: 4540 this = self._parse_id_var() 4541 4542 if self._match_text_seq("CHECK"): 4543 expression = self._parse_wrapped(self._parse_conjunction) 4544 enforced = self._match_text_seq("ENFORCED") 4545 4546 return self.expression( 4547 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4548 ) 4549 4550 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4551 expression = self._parse_foreign_key() 4552 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4553 expression = self._parse_primary_key() 4554 else: 4555 expression = None 4556 4557 return self.expression(exp.AddConstraint, this=this, expression=expression) 4558 4559 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4560 index = self._index - 1 4561 4562 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4563 return self._parse_csv(self._parse_add_constraint) 4564 4565 self._retreat(index) 4566 return self._parse_csv(self._parse_add_column) 4567 4568 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4569 self._match(TokenType.COLUMN) 4570 column = self._parse_field(any_token=True) 4571 4572 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4573 return self.expression(exp.AlterColumn, this=column, drop=True) 4574 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4575 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4576 4577 self._match_text_seq("SET", "DATA") 4578 return self.expression( 4579 exp.AlterColumn, 4580 this=column, 4581 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4582 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4583 using=self._match(TokenType.USING) and self._parse_conjunction(), 4584 ) 4585 4586 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4587 index = self._index - 1 4588 4589 partition_exists = self._parse_exists() 4590 if self._match(TokenType.PARTITION, advance=False): 4591 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4592 4593 self._retreat(index) 4594 return self._parse_csv(self._parse_drop_column) 4595 4596 def _parse_alter_table_rename(self) -> exp.RenameTable: 4597 self._match_text_seq("TO") 4598 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4599 4600 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4601 start = self._prev 4602 4603 if not self._match(TokenType.TABLE): 4604 return self._parse_as_command(start) 4605 4606 exists = self._parse_exists() 4607 this = self._parse_table(schema=True) 4608 4609 if self._next: 4610 self._advance() 4611 4612 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4613 if parser: 4614 actions = ensure_list(parser(self)) 4615 4616 if not self._curr: 4617 return self.expression( 4618 exp.AlterTable, 4619 this=this, 4620 exists=exists, 4621 actions=actions, 4622 ) 4623 return self._parse_as_command(start) 4624 4625 def _parse_merge(self) -> exp.Merge: 4626 self._match(TokenType.INTO) 4627 target = self._parse_table() 4628 4629 if target and self._match(TokenType.ALIAS, advance=False): 4630 target.set("alias", self._parse_table_alias()) 4631 4632 self._match(TokenType.USING) 4633 using = self._parse_table() 4634 4635 self._match(TokenType.ON) 4636 on = self._parse_conjunction() 4637 4638 whens = [] 4639 while self._match(TokenType.WHEN): 4640 matched = not self._match(TokenType.NOT) 4641 self._match_text_seq("MATCHED") 4642 source = ( 4643 False 4644 if self._match_text_seq("BY", "TARGET") 4645 else self._match_text_seq("BY", "SOURCE") 4646 ) 4647 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4648 4649 self._match(TokenType.THEN) 4650 4651 if self._match(TokenType.INSERT): 4652 _this = self._parse_star() 4653 if _this: 4654 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4655 else: 4656 then = self.expression( 4657 exp.Insert, 4658 this=self._parse_value(), 4659 expression=self._match(TokenType.VALUES) and self._parse_value(), 4660 ) 4661 elif self._match(TokenType.UPDATE): 4662 expressions = self._parse_star() 4663 if expressions: 4664 then = self.expression(exp.Update, expressions=expressions) 4665 else: 4666 then = self.expression( 4667 exp.Update, 4668 expressions=self._match(TokenType.SET) 4669 and self._parse_csv(self._parse_equality), 4670 ) 4671 elif self._match(TokenType.DELETE): 4672 then = self.expression(exp.Var, this=self._prev.text) 4673 else: 4674 then = None 4675 4676 whens.append( 4677 self.expression( 4678 exp.When, 4679 matched=matched, 4680 source=source, 4681 condition=condition, 4682 then=then, 4683 ) 4684 ) 4685 4686 return self.expression( 4687 exp.Merge, 4688 this=target, 4689 using=using, 4690 on=on, 4691 expressions=whens, 4692 ) 4693 4694 def _parse_show(self) -> t.Optional[exp.Expression]: 4695 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4696 if parser: 4697 return parser(self) 4698 self._advance() 4699 return self.expression(exp.Show, this=self._prev.text.upper()) 4700 4701 def _parse_set_item_assignment( 4702 self, kind: t.Optional[str] = None 4703 ) -> t.Optional[exp.Expression]: 4704 index = self._index 4705 4706 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4707 return self._parse_set_transaction(global_=kind == "GLOBAL") 4708 4709 left = self._parse_primary() or self._parse_id_var() 4710 4711 if not self._match_texts(("=", "TO")): 4712 self._retreat(index) 4713 return None 4714 4715 right = self._parse_statement() or self._parse_id_var() 4716 this = self.expression(exp.EQ, this=left, expression=right) 4717 4718 return self.expression(exp.SetItem, this=this, kind=kind) 4719 4720 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4721 self._match_text_seq("TRANSACTION") 4722 characteristics = self._parse_csv( 4723 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4724 ) 4725 return self.expression( 4726 exp.SetItem, 4727 expressions=characteristics, 4728 kind="TRANSACTION", 4729 **{"global": global_}, # type: ignore 4730 ) 4731 4732 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4733 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4734 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4735 4736 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4737 index = self._index 4738 set_ = self.expression( 4739 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4740 ) 4741 4742 if self._curr: 4743 self._retreat(index) 4744 return self._parse_as_command(self._prev) 4745 4746 return set_ 4747 4748 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4749 for option in options: 4750 if self._match_text_seq(*option.split(" ")): 4751 return exp.var(option) 4752 return None 4753 4754 def _parse_as_command(self, start: Token) -> exp.Command: 4755 while self._curr: 4756 self._advance() 4757 text = self._find_sql(start, self._prev) 4758 size = len(start.text) 4759 return exp.Command(this=text[:size], expression=text[size:]) 4760 4761 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4762 settings = [] 4763 4764 self._match_l_paren() 4765 kind = self._parse_id_var() 4766 4767 if self._match(TokenType.L_PAREN): 4768 while True: 4769 key = self._parse_id_var() 4770 value = self._parse_primary() 4771 4772 if not key and value is None: 4773 break 4774 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4775 self._match(TokenType.R_PAREN) 4776 4777 self._match_r_paren() 4778 4779 return self.expression( 4780 exp.DictProperty, 4781 this=this, 4782 kind=kind.this if kind else None, 4783 settings=settings, 4784 ) 4785 4786 def _parse_dict_range(self, this: str) -> exp.DictRange: 4787 self._match_l_paren() 4788 has_min = self._match_text_seq("MIN") 4789 if has_min: 4790 min = self._parse_var() or self._parse_primary() 4791 self._match_text_seq("MAX") 4792 max = self._parse_var() or self._parse_primary() 4793 else: 4794 max = self._parse_var() or self._parse_primary() 4795 min = exp.Literal.number(0) 4796 self._match_r_paren() 4797 return self.expression(exp.DictRange, this=this, min=min, max=max) 4798 4799 def _find_parser( 4800 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4801 ) -> t.Optional[t.Callable]: 4802 if not self._curr: 4803 return None 4804 4805 index = self._index 4806 this = [] 4807 while True: 4808 # The current token might be multiple words 4809 curr = self._curr.text.upper() 4810 key = curr.split(" ") 4811 this.append(curr) 4812 4813 self._advance() 4814 result, trie = in_trie(trie, key) 4815 if result == TrieResult.FAILED: 4816 break 4817 4818 if result == TrieResult.EXISTS: 4819 subparser = parsers[" ".join(this)] 4820 return subparser 4821 4822 self._retreat(index) 4823 return None 4824 4825 def _match(self, token_type, advance=True, expression=None): 4826 if not self._curr: 4827 return None 4828 4829 if self._curr.token_type == token_type: 4830 if advance: 4831 self._advance() 4832 self._add_comments(expression) 4833 return True 4834 4835 return None 4836 4837 def _match_set(self, types, advance=True): 4838 if not self._curr: 4839 return None 4840 4841 if self._curr.token_type in types: 4842 if advance: 4843 self._advance() 4844 return True 4845 4846 return None 4847 4848 def _match_pair(self, token_type_a, token_type_b, advance=True): 4849 if not self._curr or not self._next: 4850 return None 4851 4852 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4853 if advance: 4854 self._advance(2) 4855 return True 4856 4857 return None 4858 4859 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4860 if not self._match(TokenType.L_PAREN, expression=expression): 4861 self.raise_error("Expecting (") 4862 4863 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4864 if not self._match(TokenType.R_PAREN, expression=expression): 4865 self.raise_error("Expecting )") 4866 4867 def _match_texts(self, texts, advance=True): 4868 if self._curr and self._curr.text.upper() in texts: 4869 if advance: 4870 self._advance() 4871 return True 4872 return False 4873 4874 def _match_text_seq(self, *texts, advance=True): 4875 index = self._index 4876 for text in texts: 4877 if self._curr and self._curr.text.upper() == text: 4878 self._advance() 4879 else: 4880 self._retreat(index) 4881 return False 4882 4883 if not advance: 4884 self._retreat(index) 4885 4886 return True 4887 4888 @t.overload 4889 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4890 ... 4891 4892 @t.overload 4893 def _replace_columns_with_dots( 4894 self, this: t.Optional[exp.Expression] 4895 ) -> t.Optional[exp.Expression]: 4896 ... 4897 4898 def _replace_columns_with_dots(self, this): 4899 if isinstance(this, exp.Dot): 4900 exp.replace_children(this, self._replace_columns_with_dots) 4901 elif isinstance(this, exp.Column): 4902 exp.replace_children(this, self._replace_columns_with_dots) 4903 table = this.args.get("table") 4904 this = ( 4905 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4906 ) 4907 4908 return this 4909 4910 def _replace_lambda( 4911 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4912 ) -> t.Optional[exp.Expression]: 4913 if not node: 4914 return node 4915 4916 for column in node.find_all(exp.Column): 4917 if column.parts[0].name in lambda_variables: 4918 dot_or_id = column.to_dot() if column.table else column.this 4919 parent = column.parent 4920 4921 while isinstance(parent, exp.Dot): 4922 if not isinstance(parent.parent, exp.Dot): 4923 parent.replace(dot_or_id) 4924 break 4925 parent = parent.parent 4926 else: 4927 if column is node: 4928 node = dot_or_id 4929 else: 4930 column.replace(dot_or_id) 4931 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
864 def __init__( 865 self, 866 error_level: t.Optional[ErrorLevel] = None, 867 error_message_context: int = 100, 868 max_errors: int = 3, 869 ): 870 self.error_level = error_level or ErrorLevel.IMMEDIATE 871 self.error_message_context = error_message_context 872 self.max_errors = max_errors 873 self.reset()
885 def parse( 886 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens and returns a list of syntax trees, one tree 890 per parsed SQL statement. 891 892 Args: 893 raw_tokens: The list of tokens. 894 sql: The original SQL string, used to produce helpful debug messages. 895 896 Returns: 897 The list of the produced syntax trees. 898 """ 899 return self._parse( 900 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 901 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
903 def parse_into( 904 self, 905 expression_types: exp.IntoType, 906 raw_tokens: t.List[Token], 907 sql: t.Optional[str] = None, 908 ) -> t.List[t.Optional[exp.Expression]]: 909 """ 910 Parses a list of tokens into a given Expression type. If a collection of Expression 911 types is given instead, this method will try to parse the token list into each one 912 of them, stopping at the first for which the parsing succeeds. 913 914 Args: 915 expression_types: The expression type(s) to try and parse the token list into. 916 raw_tokens: The list of tokens. 917 sql: The original SQL string, used to produce helpful debug messages. 918 919 Returns: 920 The target Expression. 921 """ 922 errors = [] 923 for expression_type in ensure_list(expression_types): 924 parser = self.EXPRESSION_PARSERS.get(expression_type) 925 if not parser: 926 raise TypeError(f"No parser registered for {expression_type}") 927 928 try: 929 return self._parse(parser, raw_tokens, sql) 930 except ParseError as e: 931 e.errors[0]["into_expression"] = expression_type 932 errors.append(e) 933 934 raise ParseError( 935 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 936 errors=merge_errors(errors), 937 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
974 def check_errors(self) -> None: 975 """Logs or raises any found errors, depending on the chosen error level setting.""" 976 if self.error_level == ErrorLevel.WARN: 977 for error in self.errors: 978 logger.error(str(error)) 979 elif self.error_level == ErrorLevel.RAISE and self.errors: 980 raise ParseError( 981 concat_messages(self.errors, self.max_errors), 982 errors=merge_errors(self.errors), 983 )
Logs or raises any found errors, depending on the chosen error level setting.
985 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 986 """ 987 Appends an error in the list of recorded errors or raises it, depending on the chosen 988 error level setting. 989 """ 990 token = token or self._curr or self._prev or Token.string("") 991 start = token.start 992 end = token.end + 1 993 start_context = self.sql[max(start - self.error_message_context, 0) : start] 994 highlight = self.sql[start:end] 995 end_context = self.sql[end : end + self.error_message_context] 996 997 error = ParseError.new( 998 f"{message}. Line {token.line}, Col: {token.col}.\n" 999 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1000 description=message, 1001 line=token.line, 1002 col=token.col, 1003 start_context=start_context, 1004 highlight=highlight, 1005 end_context=end_context, 1006 ) 1007 1008 if self.error_level == ErrorLevel.IMMEDIATE: 1009 raise error 1010 1011 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1013 def expression( 1014 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1015 ) -> E: 1016 """ 1017 Creates a new, validated Expression. 1018 1019 Args: 1020 exp_class: The expression class to instantiate. 1021 comments: An optional list of comments to attach to the expression. 1022 kwargs: The arguments to set for the expression along with their respective values. 1023 1024 Returns: 1025 The target expression. 1026 """ 1027 instance = exp_class(**kwargs) 1028 instance.add_comments(comments) if comments else self._add_comments(instance) 1029 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1036 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1037 """ 1038 Validates an Expression, making sure that all its mandatory arguments are set. 1039 1040 Args: 1041 expression: The expression to validate. 1042 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1043 1044 Returns: 1045 The validated expression. 1046 """ 1047 if self.error_level != ErrorLevel.IGNORE: 1048 for error_message in expression.error_messages(args): 1049 self.raise_error(error_message) 1050 1051 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.