sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.FIXEDSTRING, 140 TokenType.FLOAT, 141 TokenType.DOUBLE, 142 TokenType.CHAR, 143 TokenType.NCHAR, 144 TokenType.VARCHAR, 145 TokenType.NVARCHAR, 146 TokenType.TEXT, 147 TokenType.MEDIUMTEXT, 148 TokenType.LONGTEXT, 149 TokenType.MEDIUMBLOB, 150 TokenType.LONGBLOB, 151 TokenType.BINARY, 152 TokenType.VARBINARY, 153 TokenType.JSON, 154 TokenType.JSONB, 155 TokenType.INTERVAL, 156 TokenType.TIME, 157 TokenType.TIMETZ, 158 TokenType.TIMESTAMP, 159 TokenType.TIMESTAMPTZ, 160 TokenType.TIMESTAMPLTZ, 161 TokenType.DATETIME, 162 TokenType.DATETIME64, 163 TokenType.DATE, 164 TokenType.INT4RANGE, 165 TokenType.INT4MULTIRANGE, 166 TokenType.INT8RANGE, 167 TokenType.INT8MULTIRANGE, 168 TokenType.NUMRANGE, 169 TokenType.NUMMULTIRANGE, 170 TokenType.TSRANGE, 171 TokenType.TSMULTIRANGE, 172 TokenType.TSTZRANGE, 173 TokenType.TSTZMULTIRANGE, 174 TokenType.DATERANGE, 175 TokenType.DATEMULTIRANGE, 176 TokenType.DECIMAL, 177 TokenType.BIGDECIMAL, 178 TokenType.UUID, 179 TokenType.GEOGRAPHY, 180 TokenType.GEOMETRY, 181 TokenType.HLLSKETCH, 182 TokenType.HSTORE, 183 TokenType.PSEUDO_TYPE, 184 TokenType.SUPER, 185 TokenType.SERIAL, 186 TokenType.SMALLSERIAL, 187 TokenType.BIGSERIAL, 188 TokenType.XML, 189 TokenType.UNIQUEIDENTIFIER, 190 TokenType.USERDEFINED, 191 TokenType.MONEY, 192 TokenType.SMALLMONEY, 193 TokenType.ROWVERSION, 194 TokenType.IMAGE, 195 TokenType.VARIANT, 196 TokenType.OBJECT, 197 TokenType.INET, 198 TokenType.IPADDRESS, 199 TokenType.IPPREFIX, 200 TokenType.UNKNOWN, 201 TokenType.NULL, 202 *ENUM_TYPE_TOKENS, 203 *NESTED_TYPE_TOKENS, 204 } 205 206 SUBQUERY_PREDICATES = { 207 TokenType.ANY: exp.Any, 208 TokenType.ALL: exp.All, 209 TokenType.EXISTS: exp.Exists, 210 TokenType.SOME: exp.Any, 211 } 212 213 RESERVED_KEYWORDS = { 214 *Tokenizer.SINGLE_TOKENS.values(), 215 TokenType.SELECT, 216 } 217 218 DB_CREATABLES = { 219 TokenType.DATABASE, 220 TokenType.SCHEMA, 221 TokenType.TABLE, 222 TokenType.VIEW, 223 TokenType.DICTIONARY, 224 } 225 226 CREATABLES = { 227 TokenType.COLUMN, 228 TokenType.FUNCTION, 229 TokenType.INDEX, 230 TokenType.PROCEDURE, 231 *DB_CREATABLES, 232 } 233 234 # Tokens that can represent identifiers 235 ID_VAR_TOKENS = { 236 TokenType.VAR, 237 TokenType.ANTI, 238 TokenType.APPLY, 239 TokenType.ASC, 240 TokenType.AUTO_INCREMENT, 241 TokenType.BEGIN, 242 TokenType.CACHE, 243 TokenType.CASE, 244 TokenType.COLLATE, 245 TokenType.COMMAND, 246 TokenType.COMMENT, 247 TokenType.COMMIT, 248 TokenType.CONSTRAINT, 249 TokenType.DEFAULT, 250 TokenType.DELETE, 251 TokenType.DESC, 252 TokenType.DESCRIBE, 253 TokenType.DICTIONARY, 254 TokenType.DIV, 255 TokenType.END, 256 TokenType.EXECUTE, 257 TokenType.ESCAPE, 258 TokenType.FALSE, 259 TokenType.FIRST, 260 TokenType.FILTER, 261 TokenType.FORMAT, 262 TokenType.FULL, 263 TokenType.IS, 264 TokenType.ISNULL, 265 TokenType.INTERVAL, 266 TokenType.KEEP, 267 TokenType.LEFT, 268 TokenType.LOAD, 269 TokenType.MERGE, 270 TokenType.NATURAL, 271 TokenType.NEXT, 272 TokenType.OFFSET, 273 TokenType.ORDINALITY, 274 TokenType.OVERWRITE, 275 TokenType.PARTITION, 276 TokenType.PERCENT, 277 TokenType.PIVOT, 278 TokenType.PRAGMA, 279 TokenType.RANGE, 280 TokenType.REFERENCES, 281 TokenType.RIGHT, 282 TokenType.ROW, 283 TokenType.ROWS, 284 TokenType.SEMI, 285 TokenType.SET, 286 TokenType.SETTINGS, 287 TokenType.SHOW, 288 TokenType.TEMPORARY, 289 TokenType.TOP, 290 TokenType.TRUE, 291 TokenType.UNIQUE, 292 TokenType.UNPIVOT, 293 TokenType.UPDATE, 294 TokenType.VOLATILE, 295 TokenType.WINDOW, 296 *CREATABLES, 297 *SUBQUERY_PREDICATES, 298 *TYPE_TOKENS, 299 *NO_PAREN_FUNCTIONS, 300 } 301 302 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 303 304 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 305 TokenType.APPLY, 306 TokenType.ASOF, 307 TokenType.FULL, 308 TokenType.LEFT, 309 TokenType.LOCK, 310 TokenType.NATURAL, 311 TokenType.OFFSET, 312 TokenType.RIGHT, 313 TokenType.WINDOW, 314 } 315 316 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 317 318 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 319 320 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 321 322 FUNC_TOKENS = { 323 TokenType.COMMAND, 324 TokenType.CURRENT_DATE, 325 TokenType.CURRENT_DATETIME, 326 TokenType.CURRENT_TIMESTAMP, 327 TokenType.CURRENT_TIME, 328 TokenType.CURRENT_USER, 329 TokenType.FILTER, 330 TokenType.FIRST, 331 TokenType.FORMAT, 332 TokenType.GLOB, 333 TokenType.IDENTIFIER, 334 TokenType.INDEX, 335 TokenType.ISNULL, 336 TokenType.ILIKE, 337 TokenType.INSERT, 338 TokenType.LIKE, 339 TokenType.MERGE, 340 TokenType.OFFSET, 341 TokenType.PRIMARY_KEY, 342 TokenType.RANGE, 343 TokenType.REPLACE, 344 TokenType.RLIKE, 345 TokenType.ROW, 346 TokenType.UNNEST, 347 TokenType.VAR, 348 TokenType.LEFT, 349 TokenType.RIGHT, 350 TokenType.DATE, 351 TokenType.DATETIME, 352 TokenType.TABLE, 353 TokenType.TIMESTAMP, 354 TokenType.TIMESTAMPTZ, 355 TokenType.WINDOW, 356 TokenType.XOR, 357 *TYPE_TOKENS, 358 *SUBQUERY_PREDICATES, 359 } 360 361 CONJUNCTION = { 362 TokenType.AND: exp.And, 363 TokenType.OR: exp.Or, 364 } 365 366 EQUALITY = { 367 TokenType.EQ: exp.EQ, 368 TokenType.NEQ: exp.NEQ, 369 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 370 } 371 372 COMPARISON = { 373 TokenType.GT: exp.GT, 374 TokenType.GTE: exp.GTE, 375 TokenType.LT: exp.LT, 376 TokenType.LTE: exp.LTE, 377 } 378 379 BITWISE = { 380 TokenType.AMP: exp.BitwiseAnd, 381 TokenType.CARET: exp.BitwiseXor, 382 TokenType.PIPE: exp.BitwiseOr, 383 TokenType.DPIPE: exp.DPipe, 384 } 385 386 TERM = { 387 TokenType.DASH: exp.Sub, 388 TokenType.PLUS: exp.Add, 389 TokenType.MOD: exp.Mod, 390 TokenType.COLLATE: exp.Collate, 391 } 392 393 FACTOR = { 394 TokenType.DIV: exp.IntDiv, 395 TokenType.LR_ARROW: exp.Distance, 396 TokenType.SLASH: exp.Div, 397 TokenType.STAR: exp.Mul, 398 } 399 400 TIMES = { 401 TokenType.TIME, 402 TokenType.TIMETZ, 403 } 404 405 TIMESTAMPS = { 406 TokenType.TIMESTAMP, 407 TokenType.TIMESTAMPTZ, 408 TokenType.TIMESTAMPLTZ, 409 *TIMES, 410 } 411 412 SET_OPERATIONS = { 413 TokenType.UNION, 414 TokenType.INTERSECT, 415 TokenType.EXCEPT, 416 } 417 418 JOIN_METHODS = { 419 TokenType.NATURAL, 420 TokenType.ASOF, 421 } 422 423 JOIN_SIDES = { 424 TokenType.LEFT, 425 TokenType.RIGHT, 426 TokenType.FULL, 427 } 428 429 JOIN_KINDS = { 430 TokenType.INNER, 431 TokenType.OUTER, 432 TokenType.CROSS, 433 TokenType.SEMI, 434 TokenType.ANTI, 435 } 436 437 JOIN_HINTS: t.Set[str] = set() 438 439 LAMBDAS = { 440 TokenType.ARROW: lambda self, expressions: self.expression( 441 exp.Lambda, 442 this=self._replace_lambda( 443 self._parse_conjunction(), 444 {node.name for node in expressions}, 445 ), 446 expressions=expressions, 447 ), 448 TokenType.FARROW: lambda self, expressions: self.expression( 449 exp.Kwarg, 450 this=exp.var(expressions[0].name), 451 expression=self._parse_conjunction(), 452 ), 453 } 454 455 COLUMN_OPERATORS = { 456 TokenType.DOT: None, 457 TokenType.DCOLON: lambda self, this, to: self.expression( 458 exp.Cast if self.STRICT_CAST else exp.TryCast, 459 this=this, 460 to=to, 461 ), 462 TokenType.ARROW: lambda self, this, path: self.expression( 463 exp.JSONExtract, 464 this=this, 465 expression=path, 466 ), 467 TokenType.DARROW: lambda self, this, path: self.expression( 468 exp.JSONExtractScalar, 469 this=this, 470 expression=path, 471 ), 472 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 473 exp.JSONBExtract, 474 this=this, 475 expression=path, 476 ), 477 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 478 exp.JSONBExtractScalar, 479 this=this, 480 expression=path, 481 ), 482 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 483 exp.JSONBContains, 484 this=this, 485 expression=key, 486 ), 487 } 488 489 EXPRESSION_PARSERS = { 490 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 491 exp.Column: lambda self: self._parse_column(), 492 exp.Condition: lambda self: self._parse_conjunction(), 493 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 494 exp.Expression: lambda self: self._parse_statement(), 495 exp.From: lambda self: self._parse_from(), 496 exp.Group: lambda self: self._parse_group(), 497 exp.Having: lambda self: self._parse_having(), 498 exp.Identifier: lambda self: self._parse_id_var(), 499 exp.Join: lambda self: self._parse_join(), 500 exp.Lambda: lambda self: self._parse_lambda(), 501 exp.Lateral: lambda self: self._parse_lateral(), 502 exp.Limit: lambda self: self._parse_limit(), 503 exp.Offset: lambda self: self._parse_offset(), 504 exp.Order: lambda self: self._parse_order(), 505 exp.Ordered: lambda self: self._parse_ordered(), 506 exp.Properties: lambda self: self._parse_properties(), 507 exp.Qualify: lambda self: self._parse_qualify(), 508 exp.Returning: lambda self: self._parse_returning(), 509 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 510 exp.Table: lambda self: self._parse_table_parts(), 511 exp.TableAlias: lambda self: self._parse_table_alias(), 512 exp.Where: lambda self: self._parse_where(), 513 exp.Window: lambda self: self._parse_named_window(), 514 exp.With: lambda self: self._parse_with(), 515 "JOIN_TYPE": lambda self: self._parse_join_parts(), 516 } 517 518 STATEMENT_PARSERS = { 519 TokenType.ALTER: lambda self: self._parse_alter(), 520 TokenType.BEGIN: lambda self: self._parse_transaction(), 521 TokenType.CACHE: lambda self: self._parse_cache(), 522 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 523 TokenType.COMMENT: lambda self: self._parse_comment(), 524 TokenType.CREATE: lambda self: self._parse_create(), 525 TokenType.DELETE: lambda self: self._parse_delete(), 526 TokenType.DESC: lambda self: self._parse_describe(), 527 TokenType.DESCRIBE: lambda self: self._parse_describe(), 528 TokenType.DROP: lambda self: self._parse_drop(), 529 TokenType.FROM: lambda self: exp.select("*").from_( 530 t.cast(exp.From, self._parse_from(skip_from_token=True)) 531 ), 532 TokenType.INSERT: lambda self: self._parse_insert(), 533 TokenType.LOAD: lambda self: self._parse_load(), 534 TokenType.MERGE: lambda self: self._parse_merge(), 535 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 536 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 537 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 538 TokenType.SET: lambda self: self._parse_set(), 539 TokenType.UNCACHE: lambda self: self._parse_uncache(), 540 TokenType.UPDATE: lambda self: self._parse_update(), 541 TokenType.USE: lambda self: self.expression( 542 exp.Use, 543 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 544 and exp.var(self._prev.text), 545 this=self._parse_table(schema=False), 546 ), 547 } 548 549 UNARY_PARSERS = { 550 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 551 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 552 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 553 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 554 } 555 556 PRIMARY_PARSERS = { 557 TokenType.STRING: lambda self, token: self.expression( 558 exp.Literal, this=token.text, is_string=True 559 ), 560 TokenType.NUMBER: lambda self, token: self.expression( 561 exp.Literal, this=token.text, is_string=False 562 ), 563 TokenType.STAR: lambda self, _: self.expression( 564 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 565 ), 566 TokenType.NULL: lambda self, _: self.expression(exp.Null), 567 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 568 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 569 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 570 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 571 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 572 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 573 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 574 exp.National, this=token.text 575 ), 576 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 577 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 578 } 579 580 PLACEHOLDER_PARSERS = { 581 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 582 TokenType.PARAMETER: lambda self: self._parse_parameter(), 583 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 584 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 585 else None, 586 } 587 588 RANGE_PARSERS = { 589 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 590 TokenType.GLOB: binary_range_parser(exp.Glob), 591 TokenType.ILIKE: binary_range_parser(exp.ILike), 592 TokenType.IN: lambda self, this: self._parse_in(this), 593 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 594 TokenType.IS: lambda self, this: self._parse_is(this), 595 TokenType.LIKE: binary_range_parser(exp.Like), 596 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 597 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 598 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 599 } 600 601 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 602 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 603 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 604 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 605 "CHARACTER SET": lambda self: self._parse_character_set(), 606 "CHECKSUM": lambda self: self._parse_checksum(), 607 "CLUSTER BY": lambda self: self._parse_cluster(), 608 "CLUSTERED": lambda self: self._parse_clustered_by(), 609 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 610 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 611 "COPY": lambda self: self._parse_copy_property(), 612 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 613 "DEFINER": lambda self: self._parse_definer(), 614 "DETERMINISTIC": lambda self: self.expression( 615 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 616 ), 617 "DISTKEY": lambda self: self._parse_distkey(), 618 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 619 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 620 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 621 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 622 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 623 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 624 "FREESPACE": lambda self: self._parse_freespace(), 625 "HEAP": lambda self: self.expression(exp.HeapProperty), 626 "IMMUTABLE": lambda self: self.expression( 627 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 628 ), 629 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 630 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 631 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 632 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 633 "LIKE": lambda self: self._parse_create_like(), 634 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 635 "LOCK": lambda self: self._parse_locking(), 636 "LOCKING": lambda self: self._parse_locking(), 637 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 638 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 639 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 640 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 641 "NO": lambda self: self._parse_no_property(), 642 "ON": lambda self: self._parse_on_property(), 643 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 644 "PARTITION BY": lambda self: self._parse_partitioned_by(), 645 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 646 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 647 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 648 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 649 "RETURNS": lambda self: self._parse_returns(), 650 "ROW": lambda self: self._parse_row(), 651 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 652 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 653 "SETTINGS": lambda self: self.expression( 654 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 655 ), 656 "SORTKEY": lambda self: self._parse_sortkey(), 657 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 658 "STABLE": lambda self: self.expression( 659 exp.StabilityProperty, this=exp.Literal.string("STABLE") 660 ), 661 "STORED": lambda self: self._parse_stored(), 662 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 663 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 664 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 665 "TO": lambda self: self._parse_to_table(), 666 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 667 "TTL": lambda self: self._parse_ttl(), 668 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 669 "VOLATILE": lambda self: self._parse_volatile_property(), 670 "WITH": lambda self: self._parse_with_property(), 671 } 672 673 CONSTRAINT_PARSERS = { 674 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 675 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 676 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 677 "CHARACTER SET": lambda self: self.expression( 678 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 679 ), 680 "CHECK": lambda self: self.expression( 681 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 682 ), 683 "COLLATE": lambda self: self.expression( 684 exp.CollateColumnConstraint, this=self._parse_var() 685 ), 686 "COMMENT": lambda self: self.expression( 687 exp.CommentColumnConstraint, this=self._parse_string() 688 ), 689 "COMPRESS": lambda self: self._parse_compress(), 690 "DEFAULT": lambda self: self.expression( 691 exp.DefaultColumnConstraint, this=self._parse_bitwise() 692 ), 693 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 694 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 695 "FORMAT": lambda self: self.expression( 696 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 697 ), 698 "GENERATED": lambda self: self._parse_generated_as_identity(), 699 "IDENTITY": lambda self: self._parse_auto_increment(), 700 "INLINE": lambda self: self._parse_inline(), 701 "LIKE": lambda self: self._parse_create_like(), 702 "NOT": lambda self: self._parse_not_constraint(), 703 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 704 "ON": lambda self: self._match(TokenType.UPDATE) 705 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 706 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 707 "PRIMARY KEY": lambda self: self._parse_primary_key(), 708 "REFERENCES": lambda self: self._parse_references(match=False), 709 "TITLE": lambda self: self.expression( 710 exp.TitleColumnConstraint, this=self._parse_var_or_string() 711 ), 712 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 713 "UNIQUE": lambda self: self._parse_unique(), 714 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 715 } 716 717 ALTER_PARSERS = { 718 "ADD": lambda self: self._parse_alter_table_add(), 719 "ALTER": lambda self: self._parse_alter_table_alter(), 720 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 721 "DROP": lambda self: self._parse_alter_table_drop(), 722 "RENAME": lambda self: self._parse_alter_table_rename(), 723 } 724 725 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 726 727 NO_PAREN_FUNCTION_PARSERS = { 728 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 729 "CASE": lambda self: self._parse_case(), 730 "IF": lambda self: self._parse_if(), 731 "NEXT": lambda self: self._parse_next_value_for(), 732 } 733 734 INVALID_FUNC_NAME_TOKENS = { 735 TokenType.IDENTIFIER, 736 TokenType.STRING, 737 } 738 739 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 740 741 FUNCTION_PARSERS = { 742 "ANY_VALUE": lambda self: self._parse_any_value(), 743 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 744 "CONCAT": lambda self: self._parse_concat(), 745 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 746 "DECODE": lambda self: self._parse_decode(), 747 "EXTRACT": lambda self: self._parse_extract(), 748 "JSON_OBJECT": lambda self: self._parse_json_object(), 749 "LOG": lambda self: self._parse_logarithm(), 750 "MATCH": lambda self: self._parse_match_against(), 751 "OPENJSON": lambda self: self._parse_open_json(), 752 "POSITION": lambda self: self._parse_position(), 753 "SAFE_CAST": lambda self: self._parse_cast(False), 754 "STRING_AGG": lambda self: self._parse_string_agg(), 755 "SUBSTRING": lambda self: self._parse_substring(), 756 "TRIM": lambda self: self._parse_trim(), 757 "TRY_CAST": lambda self: self._parse_cast(False), 758 "TRY_CONVERT": lambda self: self._parse_convert(False), 759 } 760 761 QUERY_MODIFIER_PARSERS = { 762 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 763 TokenType.WHERE: lambda self: ("where", self._parse_where()), 764 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 765 TokenType.HAVING: lambda self: ("having", self._parse_having()), 766 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 767 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 768 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 769 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 770 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 771 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 772 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 773 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 774 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 775 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 776 TokenType.CLUSTER_BY: lambda self: ( 777 "cluster", 778 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 779 ), 780 TokenType.DISTRIBUTE_BY: lambda self: ( 781 "distribute", 782 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 783 ), 784 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 785 } 786 787 SET_PARSERS = { 788 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 789 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 790 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 791 "TRANSACTION": lambda self: self._parse_set_transaction(), 792 } 793 794 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 795 796 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 797 798 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 799 800 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 801 802 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 803 804 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 805 TRANSACTION_CHARACTERISTICS = { 806 "ISOLATION LEVEL REPEATABLE READ", 807 "ISOLATION LEVEL READ COMMITTED", 808 "ISOLATION LEVEL READ UNCOMMITTED", 809 "ISOLATION LEVEL SERIALIZABLE", 810 "READ WRITE", 811 "READ ONLY", 812 } 813 814 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 815 816 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 817 818 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 819 820 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 821 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 822 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 823 824 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 825 826 DISTINCT_TOKENS = {TokenType.DISTINCT} 827 828 STRICT_CAST = True 829 830 # A NULL arg in CONCAT yields NULL by default 831 CONCAT_NULL_OUTPUTS_STRING = False 832 833 PREFIXED_PIVOT_COLUMNS = False 834 IDENTIFY_PIVOT_STRINGS = False 835 836 LOG_BASE_FIRST = True 837 LOG_DEFAULTS_TO_LN = False 838 839 SUPPORTS_USER_DEFINED_TYPES = True 840 841 __slots__ = ( 842 "error_level", 843 "error_message_context", 844 "max_errors", 845 "sql", 846 "errors", 847 "_tokens", 848 "_index", 849 "_curr", 850 "_next", 851 "_prev", 852 "_prev_comments", 853 "_tokenizer", 854 ) 855 856 # Autofilled 857 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 858 INDEX_OFFSET: int = 0 859 UNNEST_COLUMN_ONLY: bool = False 860 ALIAS_POST_TABLESAMPLE: bool = False 861 STRICT_STRING_CONCAT = False 862 NORMALIZE_FUNCTIONS = "upper" 863 NULL_ORDERING: str = "nulls_are_small" 864 SHOW_TRIE: t.Dict = {} 865 SET_TRIE: t.Dict = {} 866 FORMAT_MAPPING: t.Dict[str, str] = {} 867 FORMAT_TRIE: t.Dict = {} 868 TIME_MAPPING: t.Dict[str, str] = {} 869 TIME_TRIE: t.Dict = {} 870 871 def __init__( 872 self, 873 error_level: t.Optional[ErrorLevel] = None, 874 error_message_context: int = 100, 875 max_errors: int = 3, 876 ): 877 self.error_level = error_level or ErrorLevel.IMMEDIATE 878 self.error_message_context = error_message_context 879 self.max_errors = max_errors 880 self._tokenizer = self.TOKENIZER_CLASS() 881 self.reset() 882 883 def reset(self): 884 self.sql = "" 885 self.errors = [] 886 self._tokens = [] 887 self._index = 0 888 self._curr = None 889 self._next = None 890 self._prev = None 891 self._prev_comments = None 892 893 def parse( 894 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 895 ) -> t.List[t.Optional[exp.Expression]]: 896 """ 897 Parses a list of tokens and returns a list of syntax trees, one tree 898 per parsed SQL statement. 899 900 Args: 901 raw_tokens: The list of tokens. 902 sql: The original SQL string, used to produce helpful debug messages. 903 904 Returns: 905 The list of the produced syntax trees. 906 """ 907 return self._parse( 908 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 909 ) 910 911 def parse_into( 912 self, 913 expression_types: exp.IntoType, 914 raw_tokens: t.List[Token], 915 sql: t.Optional[str] = None, 916 ) -> t.List[t.Optional[exp.Expression]]: 917 """ 918 Parses a list of tokens into a given Expression type. If a collection of Expression 919 types is given instead, this method will try to parse the token list into each one 920 of them, stopping at the first for which the parsing succeeds. 921 922 Args: 923 expression_types: The expression type(s) to try and parse the token list into. 924 raw_tokens: The list of tokens. 925 sql: The original SQL string, used to produce helpful debug messages. 926 927 Returns: 928 The target Expression. 929 """ 930 errors = [] 931 for expression_type in ensure_list(expression_types): 932 parser = self.EXPRESSION_PARSERS.get(expression_type) 933 if not parser: 934 raise TypeError(f"No parser registered for {expression_type}") 935 936 try: 937 return self._parse(parser, raw_tokens, sql) 938 except ParseError as e: 939 e.errors[0]["into_expression"] = expression_type 940 errors.append(e) 941 942 raise ParseError( 943 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 944 errors=merge_errors(errors), 945 ) from errors[-1] 946 947 def _parse( 948 self, 949 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 950 raw_tokens: t.List[Token], 951 sql: t.Optional[str] = None, 952 ) -> t.List[t.Optional[exp.Expression]]: 953 self.reset() 954 self.sql = sql or "" 955 956 total = len(raw_tokens) 957 chunks: t.List[t.List[Token]] = [[]] 958 959 for i, token in enumerate(raw_tokens): 960 if token.token_type == TokenType.SEMICOLON: 961 if i < total - 1: 962 chunks.append([]) 963 else: 964 chunks[-1].append(token) 965 966 expressions = [] 967 968 for tokens in chunks: 969 self._index = -1 970 self._tokens = tokens 971 self._advance() 972 973 expressions.append(parse_method(self)) 974 975 if self._index < len(self._tokens): 976 self.raise_error("Invalid expression / Unexpected token") 977 978 self.check_errors() 979 980 return expressions 981 982 def check_errors(self) -> None: 983 """Logs or raises any found errors, depending on the chosen error level setting.""" 984 if self.error_level == ErrorLevel.WARN: 985 for error in self.errors: 986 logger.error(str(error)) 987 elif self.error_level == ErrorLevel.RAISE and self.errors: 988 raise ParseError( 989 concat_messages(self.errors, self.max_errors), 990 errors=merge_errors(self.errors), 991 ) 992 993 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 994 """ 995 Appends an error in the list of recorded errors or raises it, depending on the chosen 996 error level setting. 997 """ 998 token = token or self._curr or self._prev or Token.string("") 999 start = token.start 1000 end = token.end + 1 1001 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1002 highlight = self.sql[start:end] 1003 end_context = self.sql[end : end + self.error_message_context] 1004 1005 error = ParseError.new( 1006 f"{message}. Line {token.line}, Col: {token.col}.\n" 1007 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1008 description=message, 1009 line=token.line, 1010 col=token.col, 1011 start_context=start_context, 1012 highlight=highlight, 1013 end_context=end_context, 1014 ) 1015 1016 if self.error_level == ErrorLevel.IMMEDIATE: 1017 raise error 1018 1019 self.errors.append(error) 1020 1021 def expression( 1022 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1023 ) -> E: 1024 """ 1025 Creates a new, validated Expression. 1026 1027 Args: 1028 exp_class: The expression class to instantiate. 1029 comments: An optional list of comments to attach to the expression. 1030 kwargs: The arguments to set for the expression along with their respective values. 1031 1032 Returns: 1033 The target expression. 1034 """ 1035 instance = exp_class(**kwargs) 1036 instance.add_comments(comments) if comments else self._add_comments(instance) 1037 return self.validate_expression(instance) 1038 1039 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1040 if expression and self._prev_comments: 1041 expression.add_comments(self._prev_comments) 1042 self._prev_comments = None 1043 1044 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1045 """ 1046 Validates an Expression, making sure that all its mandatory arguments are set. 1047 1048 Args: 1049 expression: The expression to validate. 1050 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1051 1052 Returns: 1053 The validated expression. 1054 """ 1055 if self.error_level != ErrorLevel.IGNORE: 1056 for error_message in expression.error_messages(args): 1057 self.raise_error(error_message) 1058 1059 return expression 1060 1061 def _find_sql(self, start: Token, end: Token) -> str: 1062 return self.sql[start.start : end.end + 1] 1063 1064 def _advance(self, times: int = 1) -> None: 1065 self._index += times 1066 self._curr = seq_get(self._tokens, self._index) 1067 self._next = seq_get(self._tokens, self._index + 1) 1068 1069 if self._index > 0: 1070 self._prev = self._tokens[self._index - 1] 1071 self._prev_comments = self._prev.comments 1072 else: 1073 self._prev = None 1074 self._prev_comments = None 1075 1076 def _retreat(self, index: int) -> None: 1077 if index != self._index: 1078 self._advance(index - self._index) 1079 1080 def _parse_command(self) -> exp.Command: 1081 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1082 1083 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1084 start = self._prev 1085 exists = self._parse_exists() if allow_exists else None 1086 1087 self._match(TokenType.ON) 1088 1089 kind = self._match_set(self.CREATABLES) and self._prev 1090 if not kind: 1091 return self._parse_as_command(start) 1092 1093 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1094 this = self._parse_user_defined_function(kind=kind.token_type) 1095 elif kind.token_type == TokenType.TABLE: 1096 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1097 elif kind.token_type == TokenType.COLUMN: 1098 this = self._parse_column() 1099 else: 1100 this = self._parse_id_var() 1101 1102 self._match(TokenType.IS) 1103 1104 return self.expression( 1105 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1106 ) 1107 1108 def _parse_to_table( 1109 self, 1110 ) -> exp.ToTableProperty: 1111 table = self._parse_table_parts(schema=True) 1112 return self.expression(exp.ToTableProperty, this=table) 1113 1114 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1115 def _parse_ttl(self) -> exp.Expression: 1116 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1117 this = self._parse_bitwise() 1118 1119 if self._match_text_seq("DELETE"): 1120 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1121 if self._match_text_seq("RECOMPRESS"): 1122 return self.expression( 1123 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1124 ) 1125 if self._match_text_seq("TO", "DISK"): 1126 return self.expression( 1127 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1128 ) 1129 if self._match_text_seq("TO", "VOLUME"): 1130 return self.expression( 1131 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1132 ) 1133 1134 return this 1135 1136 expressions = self._parse_csv(_parse_ttl_action) 1137 where = self._parse_where() 1138 group = self._parse_group() 1139 1140 aggregates = None 1141 if group and self._match(TokenType.SET): 1142 aggregates = self._parse_csv(self._parse_set_item) 1143 1144 return self.expression( 1145 exp.MergeTreeTTL, 1146 expressions=expressions, 1147 where=where, 1148 group=group, 1149 aggregates=aggregates, 1150 ) 1151 1152 def _parse_statement(self) -> t.Optional[exp.Expression]: 1153 if self._curr is None: 1154 return None 1155 1156 if self._match_set(self.STATEMENT_PARSERS): 1157 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1158 1159 if self._match_set(Tokenizer.COMMANDS): 1160 return self._parse_command() 1161 1162 expression = self._parse_expression() 1163 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1164 return self._parse_query_modifiers(expression) 1165 1166 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1167 start = self._prev 1168 temporary = self._match(TokenType.TEMPORARY) 1169 materialized = self._match_text_seq("MATERIALIZED") 1170 1171 kind = self._match_set(self.CREATABLES) and self._prev.text 1172 if not kind: 1173 return self._parse_as_command(start) 1174 1175 return self.expression( 1176 exp.Drop, 1177 comments=start.comments, 1178 exists=exists or self._parse_exists(), 1179 this=self._parse_table(schema=True), 1180 kind=kind, 1181 temporary=temporary, 1182 materialized=materialized, 1183 cascade=self._match_text_seq("CASCADE"), 1184 constraints=self._match_text_seq("CONSTRAINTS"), 1185 purge=self._match_text_seq("PURGE"), 1186 ) 1187 1188 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1189 return ( 1190 self._match_text_seq("IF") 1191 and (not not_ or self._match(TokenType.NOT)) 1192 and self._match(TokenType.EXISTS) 1193 ) 1194 1195 def _parse_create(self) -> exp.Create | exp.Command: 1196 # Note: this can't be None because we've matched a statement parser 1197 start = self._prev 1198 comments = self._prev_comments 1199 1200 replace = start.text.upper() == "REPLACE" or self._match_pair( 1201 TokenType.OR, TokenType.REPLACE 1202 ) 1203 unique = self._match(TokenType.UNIQUE) 1204 1205 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1206 self._advance() 1207 1208 properties = None 1209 create_token = self._match_set(self.CREATABLES) and self._prev 1210 1211 if not create_token: 1212 # exp.Properties.Location.POST_CREATE 1213 properties = self._parse_properties() 1214 create_token = self._match_set(self.CREATABLES) and self._prev 1215 1216 if not properties or not create_token: 1217 return self._parse_as_command(start) 1218 1219 exists = self._parse_exists(not_=True) 1220 this = None 1221 expression: t.Optional[exp.Expression] = None 1222 indexes = None 1223 no_schema_binding = None 1224 begin = None 1225 clone = None 1226 1227 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1228 nonlocal properties 1229 if properties and temp_props: 1230 properties.expressions.extend(temp_props.expressions) 1231 elif temp_props: 1232 properties = temp_props 1233 1234 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1235 this = self._parse_user_defined_function(kind=create_token.token_type) 1236 1237 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1238 extend_props(self._parse_properties()) 1239 1240 self._match(TokenType.ALIAS) 1241 1242 if self._match(TokenType.COMMAND): 1243 expression = self._parse_as_command(self._prev) 1244 else: 1245 begin = self._match(TokenType.BEGIN) 1246 return_ = self._match_text_seq("RETURN") 1247 expression = self._parse_statement() 1248 1249 if return_: 1250 expression = self.expression(exp.Return, this=expression) 1251 elif create_token.token_type == TokenType.INDEX: 1252 this = self._parse_index(index=self._parse_id_var()) 1253 elif create_token.token_type in self.DB_CREATABLES: 1254 table_parts = self._parse_table_parts(schema=True) 1255 1256 # exp.Properties.Location.POST_NAME 1257 self._match(TokenType.COMMA) 1258 extend_props(self._parse_properties(before=True)) 1259 1260 this = self._parse_schema(this=table_parts) 1261 1262 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1263 extend_props(self._parse_properties()) 1264 1265 self._match(TokenType.ALIAS) 1266 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1267 # exp.Properties.Location.POST_ALIAS 1268 extend_props(self._parse_properties()) 1269 1270 expression = self._parse_ddl_select() 1271 1272 if create_token.token_type == TokenType.TABLE: 1273 # exp.Properties.Location.POST_EXPRESSION 1274 extend_props(self._parse_properties()) 1275 1276 indexes = [] 1277 while True: 1278 index = self._parse_index() 1279 1280 # exp.Properties.Location.POST_INDEX 1281 extend_props(self._parse_properties()) 1282 1283 if not index: 1284 break 1285 else: 1286 self._match(TokenType.COMMA) 1287 indexes.append(index) 1288 elif create_token.token_type == TokenType.VIEW: 1289 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1290 no_schema_binding = True 1291 1292 if self._match_text_seq("CLONE"): 1293 clone = self._parse_table(schema=True) 1294 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1295 clone_kind = ( 1296 self._match(TokenType.L_PAREN) 1297 and self._match_texts(self.CLONE_KINDS) 1298 and self._prev.text.upper() 1299 ) 1300 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1301 self._match(TokenType.R_PAREN) 1302 clone = self.expression( 1303 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1304 ) 1305 1306 return self.expression( 1307 exp.Create, 1308 comments=comments, 1309 this=this, 1310 kind=create_token.text, 1311 replace=replace, 1312 unique=unique, 1313 expression=expression, 1314 exists=exists, 1315 properties=properties, 1316 indexes=indexes, 1317 no_schema_binding=no_schema_binding, 1318 begin=begin, 1319 clone=clone, 1320 ) 1321 1322 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1323 # only used for teradata currently 1324 self._match(TokenType.COMMA) 1325 1326 kwargs = { 1327 "no": self._match_text_seq("NO"), 1328 "dual": self._match_text_seq("DUAL"), 1329 "before": self._match_text_seq("BEFORE"), 1330 "default": self._match_text_seq("DEFAULT"), 1331 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1332 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1333 "after": self._match_text_seq("AFTER"), 1334 "minimum": self._match_texts(("MIN", "MINIMUM")), 1335 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1336 } 1337 1338 if self._match_texts(self.PROPERTY_PARSERS): 1339 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1340 try: 1341 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1342 except TypeError: 1343 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1344 1345 return None 1346 1347 def _parse_property(self) -> t.Optional[exp.Expression]: 1348 if self._match_texts(self.PROPERTY_PARSERS): 1349 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1350 1351 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1352 return self._parse_character_set(default=True) 1353 1354 if self._match_text_seq("COMPOUND", "SORTKEY"): 1355 return self._parse_sortkey(compound=True) 1356 1357 if self._match_text_seq("SQL", "SECURITY"): 1358 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1359 1360 assignment = self._match_pair( 1361 TokenType.VAR, TokenType.EQ, advance=False 1362 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1363 1364 if assignment: 1365 key = self._parse_var_or_string() 1366 self._match(TokenType.EQ) 1367 return self.expression(exp.Property, this=key, value=self._parse_column()) 1368 1369 return None 1370 1371 def _parse_stored(self) -> exp.FileFormatProperty: 1372 self._match(TokenType.ALIAS) 1373 1374 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1375 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1376 1377 return self.expression( 1378 exp.FileFormatProperty, 1379 this=self.expression( 1380 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1381 ) 1382 if input_format or output_format 1383 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1384 ) 1385 1386 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1387 self._match(TokenType.EQ) 1388 self._match(TokenType.ALIAS) 1389 return self.expression(exp_class, this=self._parse_field()) 1390 1391 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1392 properties = [] 1393 while True: 1394 if before: 1395 prop = self._parse_property_before() 1396 else: 1397 prop = self._parse_property() 1398 1399 if not prop: 1400 break 1401 for p in ensure_list(prop): 1402 properties.append(p) 1403 1404 if properties: 1405 return self.expression(exp.Properties, expressions=properties) 1406 1407 return None 1408 1409 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1410 return self.expression( 1411 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1412 ) 1413 1414 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1415 if self._index >= 2: 1416 pre_volatile_token = self._tokens[self._index - 2] 1417 else: 1418 pre_volatile_token = None 1419 1420 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1421 return exp.VolatileProperty() 1422 1423 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1424 1425 def _parse_with_property( 1426 self, 1427 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1428 if self._match(TokenType.L_PAREN, advance=False): 1429 return self._parse_wrapped_csv(self._parse_property) 1430 1431 if self._match_text_seq("JOURNAL"): 1432 return self._parse_withjournaltable() 1433 1434 if self._match_text_seq("DATA"): 1435 return self._parse_withdata(no=False) 1436 elif self._match_text_seq("NO", "DATA"): 1437 return self._parse_withdata(no=True) 1438 1439 if not self._next: 1440 return None 1441 1442 return self._parse_withisolatedloading() 1443 1444 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1445 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1446 self._match(TokenType.EQ) 1447 1448 user = self._parse_id_var() 1449 self._match(TokenType.PARAMETER) 1450 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1451 1452 if not user or not host: 1453 return None 1454 1455 return exp.DefinerProperty(this=f"{user}@{host}") 1456 1457 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1458 self._match(TokenType.TABLE) 1459 self._match(TokenType.EQ) 1460 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1461 1462 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1463 return self.expression(exp.LogProperty, no=no) 1464 1465 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1466 return self.expression(exp.JournalProperty, **kwargs) 1467 1468 def _parse_checksum(self) -> exp.ChecksumProperty: 1469 self._match(TokenType.EQ) 1470 1471 on = None 1472 if self._match(TokenType.ON): 1473 on = True 1474 elif self._match_text_seq("OFF"): 1475 on = False 1476 1477 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1478 1479 def _parse_cluster(self) -> exp.Cluster: 1480 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1481 1482 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1483 self._match_text_seq("BY") 1484 1485 self._match_l_paren() 1486 expressions = self._parse_csv(self._parse_column) 1487 self._match_r_paren() 1488 1489 if self._match_text_seq("SORTED", "BY"): 1490 self._match_l_paren() 1491 sorted_by = self._parse_csv(self._parse_ordered) 1492 self._match_r_paren() 1493 else: 1494 sorted_by = None 1495 1496 self._match(TokenType.INTO) 1497 buckets = self._parse_number() 1498 self._match_text_seq("BUCKETS") 1499 1500 return self.expression( 1501 exp.ClusteredByProperty, 1502 expressions=expressions, 1503 sorted_by=sorted_by, 1504 buckets=buckets, 1505 ) 1506 1507 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1508 if not self._match_text_seq("GRANTS"): 1509 self._retreat(self._index - 1) 1510 return None 1511 1512 return self.expression(exp.CopyGrantsProperty) 1513 1514 def _parse_freespace(self) -> exp.FreespaceProperty: 1515 self._match(TokenType.EQ) 1516 return self.expression( 1517 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1518 ) 1519 1520 def _parse_mergeblockratio( 1521 self, no: bool = False, default: bool = False 1522 ) -> exp.MergeBlockRatioProperty: 1523 if self._match(TokenType.EQ): 1524 return self.expression( 1525 exp.MergeBlockRatioProperty, 1526 this=self._parse_number(), 1527 percent=self._match(TokenType.PERCENT), 1528 ) 1529 1530 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1531 1532 def _parse_datablocksize( 1533 self, 1534 default: t.Optional[bool] = None, 1535 minimum: t.Optional[bool] = None, 1536 maximum: t.Optional[bool] = None, 1537 ) -> exp.DataBlocksizeProperty: 1538 self._match(TokenType.EQ) 1539 size = self._parse_number() 1540 1541 units = None 1542 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1543 units = self._prev.text 1544 1545 return self.expression( 1546 exp.DataBlocksizeProperty, 1547 size=size, 1548 units=units, 1549 default=default, 1550 minimum=minimum, 1551 maximum=maximum, 1552 ) 1553 1554 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1555 self._match(TokenType.EQ) 1556 always = self._match_text_seq("ALWAYS") 1557 manual = self._match_text_seq("MANUAL") 1558 never = self._match_text_seq("NEVER") 1559 default = self._match_text_seq("DEFAULT") 1560 1561 autotemp = None 1562 if self._match_text_seq("AUTOTEMP"): 1563 autotemp = self._parse_schema() 1564 1565 return self.expression( 1566 exp.BlockCompressionProperty, 1567 always=always, 1568 manual=manual, 1569 never=never, 1570 default=default, 1571 autotemp=autotemp, 1572 ) 1573 1574 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1575 no = self._match_text_seq("NO") 1576 concurrent = self._match_text_seq("CONCURRENT") 1577 self._match_text_seq("ISOLATED", "LOADING") 1578 for_all = self._match_text_seq("FOR", "ALL") 1579 for_insert = self._match_text_seq("FOR", "INSERT") 1580 for_none = self._match_text_seq("FOR", "NONE") 1581 return self.expression( 1582 exp.IsolatedLoadingProperty, 1583 no=no, 1584 concurrent=concurrent, 1585 for_all=for_all, 1586 for_insert=for_insert, 1587 for_none=for_none, 1588 ) 1589 1590 def _parse_locking(self) -> exp.LockingProperty: 1591 if self._match(TokenType.TABLE): 1592 kind = "TABLE" 1593 elif self._match(TokenType.VIEW): 1594 kind = "VIEW" 1595 elif self._match(TokenType.ROW): 1596 kind = "ROW" 1597 elif self._match_text_seq("DATABASE"): 1598 kind = "DATABASE" 1599 else: 1600 kind = None 1601 1602 if kind in ("DATABASE", "TABLE", "VIEW"): 1603 this = self._parse_table_parts() 1604 else: 1605 this = None 1606 1607 if self._match(TokenType.FOR): 1608 for_or_in = "FOR" 1609 elif self._match(TokenType.IN): 1610 for_or_in = "IN" 1611 else: 1612 for_or_in = None 1613 1614 if self._match_text_seq("ACCESS"): 1615 lock_type = "ACCESS" 1616 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1617 lock_type = "EXCLUSIVE" 1618 elif self._match_text_seq("SHARE"): 1619 lock_type = "SHARE" 1620 elif self._match_text_seq("READ"): 1621 lock_type = "READ" 1622 elif self._match_text_seq("WRITE"): 1623 lock_type = "WRITE" 1624 elif self._match_text_seq("CHECKSUM"): 1625 lock_type = "CHECKSUM" 1626 else: 1627 lock_type = None 1628 1629 override = self._match_text_seq("OVERRIDE") 1630 1631 return self.expression( 1632 exp.LockingProperty, 1633 this=this, 1634 kind=kind, 1635 for_or_in=for_or_in, 1636 lock_type=lock_type, 1637 override=override, 1638 ) 1639 1640 def _parse_partition_by(self) -> t.List[exp.Expression]: 1641 if self._match(TokenType.PARTITION_BY): 1642 return self._parse_csv(self._parse_conjunction) 1643 return [] 1644 1645 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1646 self._match(TokenType.EQ) 1647 return self.expression( 1648 exp.PartitionedByProperty, 1649 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1650 ) 1651 1652 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1653 if self._match_text_seq("AND", "STATISTICS"): 1654 statistics = True 1655 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1656 statistics = False 1657 else: 1658 statistics = None 1659 1660 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1661 1662 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1663 if self._match_text_seq("PRIMARY", "INDEX"): 1664 return exp.NoPrimaryIndexProperty() 1665 return None 1666 1667 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1668 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1669 return exp.OnCommitProperty() 1670 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1671 return exp.OnCommitProperty(delete=True) 1672 return None 1673 1674 def _parse_distkey(self) -> exp.DistKeyProperty: 1675 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1676 1677 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1678 table = self._parse_table(schema=True) 1679 1680 options = [] 1681 while self._match_texts(("INCLUDING", "EXCLUDING")): 1682 this = self._prev.text.upper() 1683 1684 id_var = self._parse_id_var() 1685 if not id_var: 1686 return None 1687 1688 options.append( 1689 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1690 ) 1691 1692 return self.expression(exp.LikeProperty, this=table, expressions=options) 1693 1694 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1695 return self.expression( 1696 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1697 ) 1698 1699 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1700 self._match(TokenType.EQ) 1701 return self.expression( 1702 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1703 ) 1704 1705 def _parse_returns(self) -> exp.ReturnsProperty: 1706 value: t.Optional[exp.Expression] 1707 is_table = self._match(TokenType.TABLE) 1708 1709 if is_table: 1710 if self._match(TokenType.LT): 1711 value = self.expression( 1712 exp.Schema, 1713 this="TABLE", 1714 expressions=self._parse_csv(self._parse_struct_types), 1715 ) 1716 if not self._match(TokenType.GT): 1717 self.raise_error("Expecting >") 1718 else: 1719 value = self._parse_schema(exp.var("TABLE")) 1720 else: 1721 value = self._parse_types() 1722 1723 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1724 1725 def _parse_describe(self) -> exp.Describe: 1726 kind = self._match_set(self.CREATABLES) and self._prev.text 1727 this = self._parse_table() 1728 return self.expression(exp.Describe, this=this, kind=kind) 1729 1730 def _parse_insert(self) -> exp.Insert: 1731 comments = ensure_list(self._prev_comments) 1732 overwrite = self._match(TokenType.OVERWRITE) 1733 ignore = self._match(TokenType.IGNORE) 1734 local = self._match_text_seq("LOCAL") 1735 alternative = None 1736 1737 if self._match_text_seq("DIRECTORY"): 1738 this: t.Optional[exp.Expression] = self.expression( 1739 exp.Directory, 1740 this=self._parse_var_or_string(), 1741 local=local, 1742 row_format=self._parse_row_format(match_row=True), 1743 ) 1744 else: 1745 if self._match(TokenType.OR): 1746 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1747 1748 self._match(TokenType.INTO) 1749 comments += ensure_list(self._prev_comments) 1750 self._match(TokenType.TABLE) 1751 this = self._parse_table(schema=True) 1752 1753 returning = self._parse_returning() 1754 1755 return self.expression( 1756 exp.Insert, 1757 comments=comments, 1758 this=this, 1759 exists=self._parse_exists(), 1760 partition=self._parse_partition(), 1761 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1762 and self._parse_conjunction(), 1763 expression=self._parse_ddl_select(), 1764 conflict=self._parse_on_conflict(), 1765 returning=returning or self._parse_returning(), 1766 overwrite=overwrite, 1767 alternative=alternative, 1768 ignore=ignore, 1769 ) 1770 1771 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1772 conflict = self._match_text_seq("ON", "CONFLICT") 1773 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1774 1775 if not conflict and not duplicate: 1776 return None 1777 1778 nothing = None 1779 expressions = None 1780 key = None 1781 constraint = None 1782 1783 if conflict: 1784 if self._match_text_seq("ON", "CONSTRAINT"): 1785 constraint = self._parse_id_var() 1786 else: 1787 key = self._parse_csv(self._parse_value) 1788 1789 self._match_text_seq("DO") 1790 if self._match_text_seq("NOTHING"): 1791 nothing = True 1792 else: 1793 self._match(TokenType.UPDATE) 1794 self._match(TokenType.SET) 1795 expressions = self._parse_csv(self._parse_equality) 1796 1797 return self.expression( 1798 exp.OnConflict, 1799 duplicate=duplicate, 1800 expressions=expressions, 1801 nothing=nothing, 1802 key=key, 1803 constraint=constraint, 1804 ) 1805 1806 def _parse_returning(self) -> t.Optional[exp.Returning]: 1807 if not self._match(TokenType.RETURNING): 1808 return None 1809 return self.expression( 1810 exp.Returning, 1811 expressions=self._parse_csv(self._parse_expression), 1812 into=self._match(TokenType.INTO) and self._parse_table_part(), 1813 ) 1814 1815 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1816 if not self._match(TokenType.FORMAT): 1817 return None 1818 return self._parse_row_format() 1819 1820 def _parse_row_format( 1821 self, match_row: bool = False 1822 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1823 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1824 return None 1825 1826 if self._match_text_seq("SERDE"): 1827 this = self._parse_string() 1828 1829 serde_properties = None 1830 if self._match(TokenType.SERDE_PROPERTIES): 1831 serde_properties = self.expression( 1832 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1833 ) 1834 1835 return self.expression( 1836 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1837 ) 1838 1839 self._match_text_seq("DELIMITED") 1840 1841 kwargs = {} 1842 1843 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1844 kwargs["fields"] = self._parse_string() 1845 if self._match_text_seq("ESCAPED", "BY"): 1846 kwargs["escaped"] = self._parse_string() 1847 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1848 kwargs["collection_items"] = self._parse_string() 1849 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1850 kwargs["map_keys"] = self._parse_string() 1851 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1852 kwargs["lines"] = self._parse_string() 1853 if self._match_text_seq("NULL", "DEFINED", "AS"): 1854 kwargs["null"] = self._parse_string() 1855 1856 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1857 1858 def _parse_load(self) -> exp.LoadData | exp.Command: 1859 if self._match_text_seq("DATA"): 1860 local = self._match_text_seq("LOCAL") 1861 self._match_text_seq("INPATH") 1862 inpath = self._parse_string() 1863 overwrite = self._match(TokenType.OVERWRITE) 1864 self._match_pair(TokenType.INTO, TokenType.TABLE) 1865 1866 return self.expression( 1867 exp.LoadData, 1868 this=self._parse_table(schema=True), 1869 local=local, 1870 overwrite=overwrite, 1871 inpath=inpath, 1872 partition=self._parse_partition(), 1873 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1874 serde=self._match_text_seq("SERDE") and self._parse_string(), 1875 ) 1876 return self._parse_as_command(self._prev) 1877 1878 def _parse_delete(self) -> exp.Delete: 1879 # This handles MySQL's "Multiple-Table Syntax" 1880 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1881 tables = None 1882 comments = self._prev_comments 1883 if not self._match(TokenType.FROM, advance=False): 1884 tables = self._parse_csv(self._parse_table) or None 1885 1886 returning = self._parse_returning() 1887 1888 return self.expression( 1889 exp.Delete, 1890 comments=comments, 1891 tables=tables, 1892 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1893 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1894 where=self._parse_where(), 1895 returning=returning or self._parse_returning(), 1896 limit=self._parse_limit(), 1897 ) 1898 1899 def _parse_update(self) -> exp.Update: 1900 comments = self._prev_comments 1901 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1902 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1903 returning = self._parse_returning() 1904 return self.expression( 1905 exp.Update, 1906 comments=comments, 1907 **{ # type: ignore 1908 "this": this, 1909 "expressions": expressions, 1910 "from": self._parse_from(joins=True), 1911 "where": self._parse_where(), 1912 "returning": returning or self._parse_returning(), 1913 "limit": self._parse_limit(), 1914 }, 1915 ) 1916 1917 def _parse_uncache(self) -> exp.Uncache: 1918 if not self._match(TokenType.TABLE): 1919 self.raise_error("Expecting TABLE after UNCACHE") 1920 1921 return self.expression( 1922 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1923 ) 1924 1925 def _parse_cache(self) -> exp.Cache: 1926 lazy = self._match_text_seq("LAZY") 1927 self._match(TokenType.TABLE) 1928 table = self._parse_table(schema=True) 1929 1930 options = [] 1931 if self._match_text_seq("OPTIONS"): 1932 self._match_l_paren() 1933 k = self._parse_string() 1934 self._match(TokenType.EQ) 1935 v = self._parse_string() 1936 options = [k, v] 1937 self._match_r_paren() 1938 1939 self._match(TokenType.ALIAS) 1940 return self.expression( 1941 exp.Cache, 1942 this=table, 1943 lazy=lazy, 1944 options=options, 1945 expression=self._parse_select(nested=True), 1946 ) 1947 1948 def _parse_partition(self) -> t.Optional[exp.Partition]: 1949 if not self._match(TokenType.PARTITION): 1950 return None 1951 1952 return self.expression( 1953 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1954 ) 1955 1956 def _parse_value(self) -> exp.Tuple: 1957 if self._match(TokenType.L_PAREN): 1958 expressions = self._parse_csv(self._parse_conjunction) 1959 self._match_r_paren() 1960 return self.expression(exp.Tuple, expressions=expressions) 1961 1962 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1963 # https://prestodb.io/docs/current/sql/values.html 1964 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1965 1966 def _parse_projections(self) -> t.List[exp.Expression]: 1967 return self._parse_expressions() 1968 1969 def _parse_select( 1970 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1971 ) -> t.Optional[exp.Expression]: 1972 cte = self._parse_with() 1973 if cte: 1974 this = self._parse_statement() 1975 1976 if not this: 1977 self.raise_error("Failed to parse any statement following CTE") 1978 return cte 1979 1980 if "with" in this.arg_types: 1981 this.set("with", cte) 1982 else: 1983 self.raise_error(f"{this.key} does not support CTE") 1984 this = cte 1985 elif self._match(TokenType.SELECT): 1986 comments = self._prev_comments 1987 1988 hint = self._parse_hint() 1989 all_ = self._match(TokenType.ALL) 1990 distinct = self._match_set(self.DISTINCT_TOKENS) 1991 1992 kind = ( 1993 self._match(TokenType.ALIAS) 1994 and self._match_texts(("STRUCT", "VALUE")) 1995 and self._prev.text 1996 ) 1997 1998 if distinct: 1999 distinct = self.expression( 2000 exp.Distinct, 2001 on=self._parse_value() if self._match(TokenType.ON) else None, 2002 ) 2003 2004 if all_ and distinct: 2005 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2006 2007 limit = self._parse_limit(top=True) 2008 projections = self._parse_projections() 2009 2010 this = self.expression( 2011 exp.Select, 2012 kind=kind, 2013 hint=hint, 2014 distinct=distinct, 2015 expressions=projections, 2016 limit=limit, 2017 ) 2018 this.comments = comments 2019 2020 into = self._parse_into() 2021 if into: 2022 this.set("into", into) 2023 2024 from_ = self._parse_from() 2025 if from_: 2026 this.set("from", from_) 2027 2028 this = self._parse_query_modifiers(this) 2029 elif (table or nested) and self._match(TokenType.L_PAREN): 2030 if self._match(TokenType.PIVOT): 2031 this = self._parse_simplified_pivot() 2032 elif self._match(TokenType.FROM): 2033 this = exp.select("*").from_( 2034 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2035 ) 2036 else: 2037 this = self._parse_table() if table else self._parse_select(nested=True) 2038 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2039 2040 self._match_r_paren() 2041 2042 # We return early here so that the UNION isn't attached to the subquery by the 2043 # following call to _parse_set_operations, but instead becomes the parent node 2044 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2045 elif self._match(TokenType.VALUES): 2046 this = self.expression( 2047 exp.Values, 2048 expressions=self._parse_csv(self._parse_value), 2049 alias=self._parse_table_alias(), 2050 ) 2051 else: 2052 this = None 2053 2054 return self._parse_set_operations(this) 2055 2056 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2057 if not skip_with_token and not self._match(TokenType.WITH): 2058 return None 2059 2060 comments = self._prev_comments 2061 recursive = self._match(TokenType.RECURSIVE) 2062 2063 expressions = [] 2064 while True: 2065 expressions.append(self._parse_cte()) 2066 2067 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2068 break 2069 else: 2070 self._match(TokenType.WITH) 2071 2072 return self.expression( 2073 exp.With, comments=comments, expressions=expressions, recursive=recursive 2074 ) 2075 2076 def _parse_cte(self) -> exp.CTE: 2077 alias = self._parse_table_alias() 2078 if not alias or not alias.this: 2079 self.raise_error("Expected CTE to have alias") 2080 2081 self._match(TokenType.ALIAS) 2082 return self.expression( 2083 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2084 ) 2085 2086 def _parse_table_alias( 2087 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2088 ) -> t.Optional[exp.TableAlias]: 2089 any_token = self._match(TokenType.ALIAS) 2090 alias = ( 2091 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2092 or self._parse_string_as_identifier() 2093 ) 2094 2095 index = self._index 2096 if self._match(TokenType.L_PAREN): 2097 columns = self._parse_csv(self._parse_function_parameter) 2098 self._match_r_paren() if columns else self._retreat(index) 2099 else: 2100 columns = None 2101 2102 if not alias and not columns: 2103 return None 2104 2105 return self.expression(exp.TableAlias, this=alias, columns=columns) 2106 2107 def _parse_subquery( 2108 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2109 ) -> t.Optional[exp.Subquery]: 2110 if not this: 2111 return None 2112 2113 return self.expression( 2114 exp.Subquery, 2115 this=this, 2116 pivots=self._parse_pivots(), 2117 alias=self._parse_table_alias() if parse_alias else None, 2118 ) 2119 2120 def _parse_query_modifiers( 2121 self, this: t.Optional[exp.Expression] 2122 ) -> t.Optional[exp.Expression]: 2123 if isinstance(this, self.MODIFIABLES): 2124 for join in iter(self._parse_join, None): 2125 this.append("joins", join) 2126 for lateral in iter(self._parse_lateral, None): 2127 this.append("laterals", lateral) 2128 2129 while True: 2130 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2131 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2132 key, expression = parser(self) 2133 2134 if expression: 2135 this.set(key, expression) 2136 if key == "limit": 2137 offset = expression.args.pop("offset", None) 2138 if offset: 2139 this.set("offset", exp.Offset(expression=offset)) 2140 continue 2141 break 2142 return this 2143 2144 def _parse_hint(self) -> t.Optional[exp.Hint]: 2145 if self._match(TokenType.HINT): 2146 hints = [] 2147 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2148 hints.extend(hint) 2149 2150 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2151 self.raise_error("Expected */ after HINT") 2152 2153 return self.expression(exp.Hint, expressions=hints) 2154 2155 return None 2156 2157 def _parse_into(self) -> t.Optional[exp.Into]: 2158 if not self._match(TokenType.INTO): 2159 return None 2160 2161 temp = self._match(TokenType.TEMPORARY) 2162 unlogged = self._match_text_seq("UNLOGGED") 2163 self._match(TokenType.TABLE) 2164 2165 return self.expression( 2166 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2167 ) 2168 2169 def _parse_from( 2170 self, joins: bool = False, skip_from_token: bool = False 2171 ) -> t.Optional[exp.From]: 2172 if not skip_from_token and not self._match(TokenType.FROM): 2173 return None 2174 2175 return self.expression( 2176 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2177 ) 2178 2179 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2180 if not self._match(TokenType.MATCH_RECOGNIZE): 2181 return None 2182 2183 self._match_l_paren() 2184 2185 partition = self._parse_partition_by() 2186 order = self._parse_order() 2187 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2188 2189 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2190 rows = exp.var("ONE ROW PER MATCH") 2191 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2192 text = "ALL ROWS PER MATCH" 2193 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2194 text += f" SHOW EMPTY MATCHES" 2195 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2196 text += f" OMIT EMPTY MATCHES" 2197 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2198 text += f" WITH UNMATCHED ROWS" 2199 rows = exp.var(text) 2200 else: 2201 rows = None 2202 2203 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2204 text = "AFTER MATCH SKIP" 2205 if self._match_text_seq("PAST", "LAST", "ROW"): 2206 text += f" PAST LAST ROW" 2207 elif self._match_text_seq("TO", "NEXT", "ROW"): 2208 text += f" TO NEXT ROW" 2209 elif self._match_text_seq("TO", "FIRST"): 2210 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2211 elif self._match_text_seq("TO", "LAST"): 2212 text += f" TO LAST {self._advance_any().text}" # type: ignore 2213 after = exp.var(text) 2214 else: 2215 after = None 2216 2217 if self._match_text_seq("PATTERN"): 2218 self._match_l_paren() 2219 2220 if not self._curr: 2221 self.raise_error("Expecting )", self._curr) 2222 2223 paren = 1 2224 start = self._curr 2225 2226 while self._curr and paren > 0: 2227 if self._curr.token_type == TokenType.L_PAREN: 2228 paren += 1 2229 if self._curr.token_type == TokenType.R_PAREN: 2230 paren -= 1 2231 2232 end = self._prev 2233 self._advance() 2234 2235 if paren > 0: 2236 self.raise_error("Expecting )", self._curr) 2237 2238 pattern = exp.var(self._find_sql(start, end)) 2239 else: 2240 pattern = None 2241 2242 define = ( 2243 self._parse_csv( 2244 lambda: self.expression( 2245 exp.Alias, 2246 alias=self._parse_id_var(any_token=True), 2247 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2248 ) 2249 ) 2250 if self._match_text_seq("DEFINE") 2251 else None 2252 ) 2253 2254 self._match_r_paren() 2255 2256 return self.expression( 2257 exp.MatchRecognize, 2258 partition_by=partition, 2259 order=order, 2260 measures=measures, 2261 rows=rows, 2262 after=after, 2263 pattern=pattern, 2264 define=define, 2265 alias=self._parse_table_alias(), 2266 ) 2267 2268 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2269 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2270 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2271 2272 if outer_apply or cross_apply: 2273 this = self._parse_select(table=True) 2274 view = None 2275 outer = not cross_apply 2276 elif self._match(TokenType.LATERAL): 2277 this = self._parse_select(table=True) 2278 view = self._match(TokenType.VIEW) 2279 outer = self._match(TokenType.OUTER) 2280 else: 2281 return None 2282 2283 if not this: 2284 this = ( 2285 self._parse_unnest() 2286 or self._parse_function() 2287 or self._parse_id_var(any_token=False) 2288 ) 2289 2290 while self._match(TokenType.DOT): 2291 this = exp.Dot( 2292 this=this, 2293 expression=self._parse_function() or self._parse_id_var(any_token=False), 2294 ) 2295 2296 if view: 2297 table = self._parse_id_var(any_token=False) 2298 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2299 table_alias: t.Optional[exp.TableAlias] = self.expression( 2300 exp.TableAlias, this=table, columns=columns 2301 ) 2302 elif isinstance(this, exp.Subquery) and this.alias: 2303 # Ensures parity between the Subquery's and the Lateral's "alias" args 2304 table_alias = this.args["alias"].copy() 2305 else: 2306 table_alias = self._parse_table_alias() 2307 2308 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2309 2310 def _parse_join_parts( 2311 self, 2312 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2313 return ( 2314 self._match_set(self.JOIN_METHODS) and self._prev, 2315 self._match_set(self.JOIN_SIDES) and self._prev, 2316 self._match_set(self.JOIN_KINDS) and self._prev, 2317 ) 2318 2319 def _parse_join( 2320 self, skip_join_token: bool = False, parse_bracket: bool = False 2321 ) -> t.Optional[exp.Join]: 2322 if self._match(TokenType.COMMA): 2323 return self.expression(exp.Join, this=self._parse_table()) 2324 2325 index = self._index 2326 method, side, kind = self._parse_join_parts() 2327 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2328 join = self._match(TokenType.JOIN) 2329 2330 if not skip_join_token and not join: 2331 self._retreat(index) 2332 kind = None 2333 method = None 2334 side = None 2335 2336 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2337 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2338 2339 if not skip_join_token and not join and not outer_apply and not cross_apply: 2340 return None 2341 2342 if outer_apply: 2343 side = Token(TokenType.LEFT, "LEFT") 2344 2345 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2346 2347 if method: 2348 kwargs["method"] = method.text 2349 if side: 2350 kwargs["side"] = side.text 2351 if kind: 2352 kwargs["kind"] = kind.text 2353 if hint: 2354 kwargs["hint"] = hint 2355 2356 if self._match(TokenType.ON): 2357 kwargs["on"] = self._parse_conjunction() 2358 elif self._match(TokenType.USING): 2359 kwargs["using"] = self._parse_wrapped_id_vars() 2360 elif not (kind and kind.token_type == TokenType.CROSS): 2361 index = self._index 2362 joins = self._parse_joins() 2363 2364 if joins and self._match(TokenType.ON): 2365 kwargs["on"] = self._parse_conjunction() 2366 elif joins and self._match(TokenType.USING): 2367 kwargs["using"] = self._parse_wrapped_id_vars() 2368 else: 2369 joins = None 2370 self._retreat(index) 2371 2372 kwargs["this"].set("joins", joins) 2373 2374 comments = [c for token in (method, side, kind) if token for c in token.comments] 2375 return self.expression(exp.Join, comments=comments, **kwargs) 2376 2377 def _parse_index( 2378 self, 2379 index: t.Optional[exp.Expression] = None, 2380 ) -> t.Optional[exp.Index]: 2381 if index: 2382 unique = None 2383 primary = None 2384 amp = None 2385 2386 self._match(TokenType.ON) 2387 self._match(TokenType.TABLE) # hive 2388 table = self._parse_table_parts(schema=True) 2389 else: 2390 unique = self._match(TokenType.UNIQUE) 2391 primary = self._match_text_seq("PRIMARY") 2392 amp = self._match_text_seq("AMP") 2393 2394 if not self._match(TokenType.INDEX): 2395 return None 2396 2397 index = self._parse_id_var() 2398 table = None 2399 2400 using = self._parse_field() if self._match(TokenType.USING) else None 2401 2402 if self._match(TokenType.L_PAREN, advance=False): 2403 columns = self._parse_wrapped_csv(self._parse_ordered) 2404 else: 2405 columns = None 2406 2407 return self.expression( 2408 exp.Index, 2409 this=index, 2410 table=table, 2411 using=using, 2412 columns=columns, 2413 unique=unique, 2414 primary=primary, 2415 amp=amp, 2416 partition_by=self._parse_partition_by(), 2417 ) 2418 2419 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2420 hints: t.List[exp.Expression] = [] 2421 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2422 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2423 hints.append( 2424 self.expression( 2425 exp.WithTableHint, 2426 expressions=self._parse_csv( 2427 lambda: self._parse_function() or self._parse_var(any_token=True) 2428 ), 2429 ) 2430 ) 2431 self._match_r_paren() 2432 else: 2433 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2434 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2435 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2436 2437 self._match_texts({"INDEX", "KEY"}) 2438 if self._match(TokenType.FOR): 2439 hint.set("target", self._advance_any() and self._prev.text.upper()) 2440 2441 hint.set("expressions", self._parse_wrapped_id_vars()) 2442 hints.append(hint) 2443 2444 return hints or None 2445 2446 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2447 return ( 2448 (not schema and self._parse_function(optional_parens=False)) 2449 or self._parse_id_var(any_token=False) 2450 or self._parse_string_as_identifier() 2451 or self._parse_placeholder() 2452 ) 2453 2454 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2455 catalog = None 2456 db = None 2457 table = self._parse_table_part(schema=schema) 2458 2459 while self._match(TokenType.DOT): 2460 if catalog: 2461 # This allows nesting the table in arbitrarily many dot expressions if needed 2462 table = self.expression( 2463 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2464 ) 2465 else: 2466 catalog = db 2467 db = table 2468 table = self._parse_table_part(schema=schema) 2469 2470 if not table: 2471 self.raise_error(f"Expected table name but got {self._curr}") 2472 2473 return self.expression( 2474 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2475 ) 2476 2477 def _parse_table( 2478 self, 2479 schema: bool = False, 2480 joins: bool = False, 2481 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2482 parse_bracket: bool = False, 2483 ) -> t.Optional[exp.Expression]: 2484 lateral = self._parse_lateral() 2485 if lateral: 2486 return lateral 2487 2488 unnest = self._parse_unnest() 2489 if unnest: 2490 return unnest 2491 2492 values = self._parse_derived_table_values() 2493 if values: 2494 return values 2495 2496 subquery = self._parse_select(table=True) 2497 if subquery: 2498 if not subquery.args.get("pivots"): 2499 subquery.set("pivots", self._parse_pivots()) 2500 return subquery 2501 2502 bracket = parse_bracket and self._parse_bracket(None) 2503 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2504 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2505 2506 if schema: 2507 return self._parse_schema(this=this) 2508 2509 if self.ALIAS_POST_TABLESAMPLE: 2510 table_sample = self._parse_table_sample() 2511 2512 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2513 if alias: 2514 this.set("alias", alias) 2515 2516 if not this.args.get("pivots"): 2517 this.set("pivots", self._parse_pivots()) 2518 2519 this.set("hints", self._parse_table_hints()) 2520 2521 if not self.ALIAS_POST_TABLESAMPLE: 2522 table_sample = self._parse_table_sample() 2523 2524 if table_sample: 2525 table_sample.set("this", this) 2526 this = table_sample 2527 2528 if joins: 2529 for join in iter(self._parse_join, None): 2530 this.append("joins", join) 2531 2532 return this 2533 2534 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2535 if not self._match(TokenType.UNNEST): 2536 return None 2537 2538 expressions = self._parse_wrapped_csv(self._parse_type) 2539 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2540 2541 alias = self._parse_table_alias() if with_alias else None 2542 2543 if alias and self.UNNEST_COLUMN_ONLY: 2544 if alias.args.get("columns"): 2545 self.raise_error("Unexpected extra column alias in unnest.") 2546 2547 alias.set("columns", [alias.this]) 2548 alias.set("this", None) 2549 2550 offset = None 2551 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2552 self._match(TokenType.ALIAS) 2553 offset = self._parse_id_var() or exp.to_identifier("offset") 2554 2555 return self.expression( 2556 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2557 ) 2558 2559 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2560 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2561 if not is_derived and not self._match(TokenType.VALUES): 2562 return None 2563 2564 expressions = self._parse_csv(self._parse_value) 2565 alias = self._parse_table_alias() 2566 2567 if is_derived: 2568 self._match_r_paren() 2569 2570 return self.expression( 2571 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2572 ) 2573 2574 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2575 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2576 as_modifier and self._match_text_seq("USING", "SAMPLE") 2577 ): 2578 return None 2579 2580 bucket_numerator = None 2581 bucket_denominator = None 2582 bucket_field = None 2583 percent = None 2584 rows = None 2585 size = None 2586 seed = None 2587 2588 kind = ( 2589 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2590 ) 2591 method = self._parse_var(tokens=(TokenType.ROW,)) 2592 2593 self._match(TokenType.L_PAREN) 2594 2595 num = self._parse_number() 2596 2597 if self._match_text_seq("BUCKET"): 2598 bucket_numerator = self._parse_number() 2599 self._match_text_seq("OUT", "OF") 2600 bucket_denominator = bucket_denominator = self._parse_number() 2601 self._match(TokenType.ON) 2602 bucket_field = self._parse_field() 2603 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2604 percent = num 2605 elif self._match(TokenType.ROWS): 2606 rows = num 2607 else: 2608 size = num 2609 2610 self._match(TokenType.R_PAREN) 2611 2612 if self._match(TokenType.L_PAREN): 2613 method = self._parse_var() 2614 seed = self._match(TokenType.COMMA) and self._parse_number() 2615 self._match_r_paren() 2616 elif self._match_texts(("SEED", "REPEATABLE")): 2617 seed = self._parse_wrapped(self._parse_number) 2618 2619 return self.expression( 2620 exp.TableSample, 2621 method=method, 2622 bucket_numerator=bucket_numerator, 2623 bucket_denominator=bucket_denominator, 2624 bucket_field=bucket_field, 2625 percent=percent, 2626 rows=rows, 2627 size=size, 2628 seed=seed, 2629 kind=kind, 2630 ) 2631 2632 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2633 return list(iter(self._parse_pivot, None)) or None 2634 2635 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2636 return list(iter(self._parse_join, None)) or None 2637 2638 # https://duckdb.org/docs/sql/statements/pivot 2639 def _parse_simplified_pivot(self) -> exp.Pivot: 2640 def _parse_on() -> t.Optional[exp.Expression]: 2641 this = self._parse_bitwise() 2642 return self._parse_in(this) if self._match(TokenType.IN) else this 2643 2644 this = self._parse_table() 2645 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2646 using = self._match(TokenType.USING) and self._parse_csv( 2647 lambda: self._parse_alias(self._parse_function()) 2648 ) 2649 group = self._parse_group() 2650 return self.expression( 2651 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2652 ) 2653 2654 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2655 index = self._index 2656 include_nulls = None 2657 2658 if self._match(TokenType.PIVOT): 2659 unpivot = False 2660 elif self._match(TokenType.UNPIVOT): 2661 unpivot = True 2662 2663 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2664 if self._match_text_seq("INCLUDE", "NULLS"): 2665 include_nulls = True 2666 elif self._match_text_seq("EXCLUDE", "NULLS"): 2667 include_nulls = False 2668 else: 2669 return None 2670 2671 expressions = [] 2672 field = None 2673 2674 if not self._match(TokenType.L_PAREN): 2675 self._retreat(index) 2676 return None 2677 2678 if unpivot: 2679 expressions = self._parse_csv(self._parse_column) 2680 else: 2681 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2682 2683 if not expressions: 2684 self.raise_error("Failed to parse PIVOT's aggregation list") 2685 2686 if not self._match(TokenType.FOR): 2687 self.raise_error("Expecting FOR") 2688 2689 value = self._parse_column() 2690 2691 if not self._match(TokenType.IN): 2692 self.raise_error("Expecting IN") 2693 2694 field = self._parse_in(value, alias=True) 2695 2696 self._match_r_paren() 2697 2698 pivot = self.expression( 2699 exp.Pivot, 2700 expressions=expressions, 2701 field=field, 2702 unpivot=unpivot, 2703 include_nulls=include_nulls, 2704 ) 2705 2706 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2707 pivot.set("alias", self._parse_table_alias()) 2708 2709 if not unpivot: 2710 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2711 2712 columns: t.List[exp.Expression] = [] 2713 for fld in pivot.args["field"].expressions: 2714 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2715 for name in names: 2716 if self.PREFIXED_PIVOT_COLUMNS: 2717 name = f"{name}_{field_name}" if name else field_name 2718 else: 2719 name = f"{field_name}_{name}" if name else field_name 2720 2721 columns.append(exp.to_identifier(name)) 2722 2723 pivot.set("columns", columns) 2724 2725 return pivot 2726 2727 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2728 return [agg.alias for agg in aggregations] 2729 2730 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2731 if not skip_where_token and not self._match(TokenType.WHERE): 2732 return None 2733 2734 return self.expression( 2735 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2736 ) 2737 2738 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2739 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2740 return None 2741 2742 elements = defaultdict(list) 2743 2744 if self._match(TokenType.ALL): 2745 return self.expression(exp.Group, all=True) 2746 2747 while True: 2748 expressions = self._parse_csv(self._parse_conjunction) 2749 if expressions: 2750 elements["expressions"].extend(expressions) 2751 2752 grouping_sets = self._parse_grouping_sets() 2753 if grouping_sets: 2754 elements["grouping_sets"].extend(grouping_sets) 2755 2756 rollup = None 2757 cube = None 2758 totals = None 2759 2760 with_ = self._match(TokenType.WITH) 2761 if self._match(TokenType.ROLLUP): 2762 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2763 elements["rollup"].extend(ensure_list(rollup)) 2764 2765 if self._match(TokenType.CUBE): 2766 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2767 elements["cube"].extend(ensure_list(cube)) 2768 2769 if self._match_text_seq("TOTALS"): 2770 totals = True 2771 elements["totals"] = True # type: ignore 2772 2773 if not (grouping_sets or rollup or cube or totals): 2774 break 2775 2776 return self.expression(exp.Group, **elements) # type: ignore 2777 2778 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2779 if not self._match(TokenType.GROUPING_SETS): 2780 return None 2781 2782 return self._parse_wrapped_csv(self._parse_grouping_set) 2783 2784 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2785 if self._match(TokenType.L_PAREN): 2786 grouping_set = self._parse_csv(self._parse_column) 2787 self._match_r_paren() 2788 return self.expression(exp.Tuple, expressions=grouping_set) 2789 2790 return self._parse_column() 2791 2792 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2793 if not skip_having_token and not self._match(TokenType.HAVING): 2794 return None 2795 return self.expression(exp.Having, this=self._parse_conjunction()) 2796 2797 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2798 if not self._match(TokenType.QUALIFY): 2799 return None 2800 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2801 2802 def _parse_order( 2803 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2804 ) -> t.Optional[exp.Expression]: 2805 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2806 return this 2807 2808 return self.expression( 2809 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2810 ) 2811 2812 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2813 if not self._match(token): 2814 return None 2815 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2816 2817 def _parse_ordered(self) -> exp.Ordered: 2818 this = self._parse_conjunction() 2819 self._match(TokenType.ASC) 2820 2821 is_desc = self._match(TokenType.DESC) 2822 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2823 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2824 desc = is_desc or False 2825 asc = not desc 2826 nulls_first = is_nulls_first or False 2827 explicitly_null_ordered = is_nulls_first or is_nulls_last 2828 2829 if ( 2830 not explicitly_null_ordered 2831 and ( 2832 (asc and self.NULL_ORDERING == "nulls_are_small") 2833 or (desc and self.NULL_ORDERING != "nulls_are_small") 2834 ) 2835 and self.NULL_ORDERING != "nulls_are_last" 2836 ): 2837 nulls_first = True 2838 2839 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2840 2841 def _parse_limit( 2842 self, this: t.Optional[exp.Expression] = None, top: bool = False 2843 ) -> t.Optional[exp.Expression]: 2844 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2845 comments = self._prev_comments 2846 if top: 2847 limit_paren = self._match(TokenType.L_PAREN) 2848 expression = self._parse_number() 2849 2850 if limit_paren: 2851 self._match_r_paren() 2852 else: 2853 expression = self._parse_term() 2854 2855 if self._match(TokenType.COMMA): 2856 offset = expression 2857 expression = self._parse_term() 2858 else: 2859 offset = None 2860 2861 limit_exp = self.expression( 2862 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2863 ) 2864 2865 return limit_exp 2866 2867 if self._match(TokenType.FETCH): 2868 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2869 direction = self._prev.text if direction else "FIRST" 2870 2871 count = self._parse_number() 2872 percent = self._match(TokenType.PERCENT) 2873 2874 self._match_set((TokenType.ROW, TokenType.ROWS)) 2875 2876 only = self._match_text_seq("ONLY") 2877 with_ties = self._match_text_seq("WITH", "TIES") 2878 2879 if only and with_ties: 2880 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2881 2882 return self.expression( 2883 exp.Fetch, 2884 direction=direction, 2885 count=count, 2886 percent=percent, 2887 with_ties=with_ties, 2888 ) 2889 2890 return this 2891 2892 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2893 if not self._match(TokenType.OFFSET): 2894 return this 2895 2896 count = self._parse_term() 2897 self._match_set((TokenType.ROW, TokenType.ROWS)) 2898 return self.expression(exp.Offset, this=this, expression=count) 2899 2900 def _parse_locks(self) -> t.List[exp.Lock]: 2901 locks = [] 2902 while True: 2903 if self._match_text_seq("FOR", "UPDATE"): 2904 update = True 2905 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2906 "LOCK", "IN", "SHARE", "MODE" 2907 ): 2908 update = False 2909 else: 2910 break 2911 2912 expressions = None 2913 if self._match_text_seq("OF"): 2914 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2915 2916 wait: t.Optional[bool | exp.Expression] = None 2917 if self._match_text_seq("NOWAIT"): 2918 wait = True 2919 elif self._match_text_seq("WAIT"): 2920 wait = self._parse_primary() 2921 elif self._match_text_seq("SKIP", "LOCKED"): 2922 wait = False 2923 2924 locks.append( 2925 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2926 ) 2927 2928 return locks 2929 2930 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2931 if not self._match_set(self.SET_OPERATIONS): 2932 return this 2933 2934 token_type = self._prev.token_type 2935 2936 if token_type == TokenType.UNION: 2937 expression = exp.Union 2938 elif token_type == TokenType.EXCEPT: 2939 expression = exp.Except 2940 else: 2941 expression = exp.Intersect 2942 2943 return self.expression( 2944 expression, 2945 this=this, 2946 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2947 expression=self._parse_set_operations(self._parse_select(nested=True)), 2948 ) 2949 2950 def _parse_expression(self) -> t.Optional[exp.Expression]: 2951 return self._parse_alias(self._parse_conjunction()) 2952 2953 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2954 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2955 2956 def _parse_equality(self) -> t.Optional[exp.Expression]: 2957 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2958 2959 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2960 return self._parse_tokens(self._parse_range, self.COMPARISON) 2961 2962 def _parse_range(self) -> t.Optional[exp.Expression]: 2963 this = self._parse_bitwise() 2964 negate = self._match(TokenType.NOT) 2965 2966 if self._match_set(self.RANGE_PARSERS): 2967 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2968 if not expression: 2969 return this 2970 2971 this = expression 2972 elif self._match(TokenType.ISNULL): 2973 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2974 2975 # Postgres supports ISNULL and NOTNULL for conditions. 2976 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2977 if self._match(TokenType.NOTNULL): 2978 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2979 this = self.expression(exp.Not, this=this) 2980 2981 if negate: 2982 this = self.expression(exp.Not, this=this) 2983 2984 if self._match(TokenType.IS): 2985 this = self._parse_is(this) 2986 2987 return this 2988 2989 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2990 index = self._index - 1 2991 negate = self._match(TokenType.NOT) 2992 2993 if self._match_text_seq("DISTINCT", "FROM"): 2994 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2995 return self.expression(klass, this=this, expression=self._parse_expression()) 2996 2997 expression = self._parse_null() or self._parse_boolean() 2998 if not expression: 2999 self._retreat(index) 3000 return None 3001 3002 this = self.expression(exp.Is, this=this, expression=expression) 3003 return self.expression(exp.Not, this=this) if negate else this 3004 3005 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3006 unnest = self._parse_unnest(with_alias=False) 3007 if unnest: 3008 this = self.expression(exp.In, this=this, unnest=unnest) 3009 elif self._match(TokenType.L_PAREN): 3010 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3011 3012 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3013 this = self.expression(exp.In, this=this, query=expressions[0]) 3014 else: 3015 this = self.expression(exp.In, this=this, expressions=expressions) 3016 3017 self._match_r_paren(this) 3018 else: 3019 this = self.expression(exp.In, this=this, field=self._parse_field()) 3020 3021 return this 3022 3023 def _parse_between(self, this: exp.Expression) -> exp.Between: 3024 low = self._parse_bitwise() 3025 self._match(TokenType.AND) 3026 high = self._parse_bitwise() 3027 return self.expression(exp.Between, this=this, low=low, high=high) 3028 3029 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3030 if not self._match(TokenType.ESCAPE): 3031 return this 3032 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3033 3034 def _parse_interval(self) -> t.Optional[exp.Interval]: 3035 index = self._index 3036 3037 if not self._match(TokenType.INTERVAL): 3038 return None 3039 3040 if self._match(TokenType.STRING, advance=False): 3041 this = self._parse_primary() 3042 else: 3043 this = self._parse_term() 3044 3045 if not this: 3046 self._retreat(index) 3047 return None 3048 3049 unit = self._parse_function() or self._parse_var() 3050 3051 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3052 # each INTERVAL expression into this canonical form so it's easy to transpile 3053 if this and this.is_number: 3054 this = exp.Literal.string(this.name) 3055 elif this and this.is_string: 3056 parts = this.name.split() 3057 3058 if len(parts) == 2: 3059 if unit: 3060 # this is not actually a unit, it's something else 3061 unit = None 3062 self._retreat(self._index - 1) 3063 else: 3064 this = exp.Literal.string(parts[0]) 3065 unit = self.expression(exp.Var, this=parts[1]) 3066 3067 return self.expression(exp.Interval, this=this, unit=unit) 3068 3069 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3070 this = self._parse_term() 3071 3072 while True: 3073 if self._match_set(self.BITWISE): 3074 this = self.expression( 3075 self.BITWISE[self._prev.token_type], 3076 this=this, 3077 expression=self._parse_term(), 3078 ) 3079 elif self._match(TokenType.DQMARK): 3080 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3081 elif self._match_pair(TokenType.LT, TokenType.LT): 3082 this = self.expression( 3083 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3084 ) 3085 elif self._match_pair(TokenType.GT, TokenType.GT): 3086 this = self.expression( 3087 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3088 ) 3089 else: 3090 break 3091 3092 return this 3093 3094 def _parse_term(self) -> t.Optional[exp.Expression]: 3095 return self._parse_tokens(self._parse_factor, self.TERM) 3096 3097 def _parse_factor(self) -> t.Optional[exp.Expression]: 3098 return self._parse_tokens(self._parse_unary, self.FACTOR) 3099 3100 def _parse_unary(self) -> t.Optional[exp.Expression]: 3101 if self._match_set(self.UNARY_PARSERS): 3102 return self.UNARY_PARSERS[self._prev.token_type](self) 3103 return self._parse_at_time_zone(self._parse_type()) 3104 3105 def _parse_type(self) -> t.Optional[exp.Expression]: 3106 interval = self._parse_interval() 3107 if interval: 3108 return interval 3109 3110 index = self._index 3111 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3112 this = self._parse_column() 3113 3114 if data_type: 3115 if isinstance(this, exp.Literal): 3116 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3117 if parser: 3118 return parser(self, this, data_type) 3119 return self.expression(exp.Cast, this=this, to=data_type) 3120 if not data_type.expressions: 3121 self._retreat(index) 3122 return self._parse_column() 3123 return self._parse_column_ops(data_type) 3124 3125 return this 3126 3127 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3128 this = self._parse_type() 3129 if not this: 3130 return None 3131 3132 return self.expression( 3133 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3134 ) 3135 3136 def _parse_types( 3137 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3138 ) -> t.Optional[exp.Expression]: 3139 index = self._index 3140 3141 prefix = self._match_text_seq("SYSUDTLIB", ".") 3142 3143 if not self._match_set(self.TYPE_TOKENS): 3144 identifier = allow_identifiers and self._parse_id_var( 3145 any_token=False, tokens=(TokenType.VAR,) 3146 ) 3147 3148 if identifier: 3149 tokens = self._tokenizer.tokenize(identifier.name) 3150 3151 if len(tokens) != 1: 3152 self.raise_error("Unexpected identifier", self._prev) 3153 3154 if tokens[0].token_type in self.TYPE_TOKENS: 3155 self._prev = tokens[0] 3156 elif self.SUPPORTS_USER_DEFINED_TYPES: 3157 return identifier 3158 else: 3159 return None 3160 else: 3161 return None 3162 3163 type_token = self._prev.token_type 3164 3165 if type_token == TokenType.PSEUDO_TYPE: 3166 return self.expression(exp.PseudoType, this=self._prev.text) 3167 3168 nested = type_token in self.NESTED_TYPE_TOKENS 3169 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3170 expressions = None 3171 maybe_func = False 3172 3173 if self._match(TokenType.L_PAREN): 3174 if is_struct: 3175 expressions = self._parse_csv(self._parse_struct_types) 3176 elif nested: 3177 expressions = self._parse_csv( 3178 lambda: self._parse_types( 3179 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3180 ) 3181 ) 3182 elif type_token in self.ENUM_TYPE_TOKENS: 3183 expressions = self._parse_csv(self._parse_equality) 3184 else: 3185 expressions = self._parse_csv(self._parse_type_size) 3186 3187 if not expressions or not self._match(TokenType.R_PAREN): 3188 self._retreat(index) 3189 return None 3190 3191 maybe_func = True 3192 3193 this: t.Optional[exp.Expression] = None 3194 values: t.Optional[t.List[exp.Expression]] = None 3195 3196 if nested and self._match(TokenType.LT): 3197 if is_struct: 3198 expressions = self._parse_csv(self._parse_struct_types) 3199 else: 3200 expressions = self._parse_csv( 3201 lambda: self._parse_types( 3202 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3203 ) 3204 ) 3205 3206 if not self._match(TokenType.GT): 3207 self.raise_error("Expecting >") 3208 3209 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3210 values = self._parse_csv(self._parse_conjunction) 3211 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3212 3213 if type_token in self.TIMESTAMPS: 3214 if self._match_text_seq("WITH", "TIME", "ZONE"): 3215 maybe_func = False 3216 tz_type = ( 3217 exp.DataType.Type.TIMETZ 3218 if type_token in self.TIMES 3219 else exp.DataType.Type.TIMESTAMPTZ 3220 ) 3221 this = exp.DataType(this=tz_type, expressions=expressions) 3222 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3223 maybe_func = False 3224 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3225 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3226 maybe_func = False 3227 elif type_token == TokenType.INTERVAL: 3228 if self._match_text_seq("YEAR", "TO", "MONTH"): 3229 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3230 elif self._match_text_seq("DAY", "TO", "SECOND"): 3231 span = [exp.IntervalDayToSecondSpan()] 3232 else: 3233 span = None 3234 3235 unit = not span and self._parse_var() 3236 if not unit: 3237 this = self.expression( 3238 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3239 ) 3240 else: 3241 this = self.expression(exp.Interval, unit=unit) 3242 3243 if maybe_func and check_func: 3244 index2 = self._index 3245 peek = self._parse_string() 3246 3247 if not peek: 3248 self._retreat(index) 3249 return None 3250 3251 self._retreat(index2) 3252 3253 if not this: 3254 this = exp.DataType( 3255 this=exp.DataType.Type[type_token.value], 3256 expressions=expressions, 3257 nested=nested, 3258 values=values, 3259 prefix=prefix, 3260 ) 3261 3262 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3263 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3264 3265 return this 3266 3267 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3268 this = self._parse_type() or self._parse_id_var() 3269 self._match(TokenType.COLON) 3270 return self._parse_column_def(this) 3271 3272 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3273 if not self._match_text_seq("AT", "TIME", "ZONE"): 3274 return this 3275 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3276 3277 def _parse_column(self) -> t.Optional[exp.Expression]: 3278 this = self._parse_field() 3279 if isinstance(this, exp.Identifier): 3280 this = self.expression(exp.Column, this=this) 3281 elif not this: 3282 return self._parse_bracket(this) 3283 return self._parse_column_ops(this) 3284 3285 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3286 this = self._parse_bracket(this) 3287 3288 while self._match_set(self.COLUMN_OPERATORS): 3289 op_token = self._prev.token_type 3290 op = self.COLUMN_OPERATORS.get(op_token) 3291 3292 if op_token == TokenType.DCOLON: 3293 field = self._parse_types() 3294 if not field: 3295 self.raise_error("Expected type") 3296 elif op and self._curr: 3297 self._advance() 3298 value = self._prev.text 3299 field = ( 3300 exp.Literal.number(value) 3301 if self._prev.token_type == TokenType.NUMBER 3302 else exp.Literal.string(value) 3303 ) 3304 else: 3305 field = self._parse_field(anonymous_func=True, any_token=True) 3306 3307 if isinstance(field, exp.Func): 3308 # bigquery allows function calls like x.y.count(...) 3309 # SAFE.SUBSTR(...) 3310 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3311 this = self._replace_columns_with_dots(this) 3312 3313 if op: 3314 this = op(self, this, field) 3315 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3316 this = self.expression( 3317 exp.Column, 3318 this=field, 3319 table=this.this, 3320 db=this.args.get("table"), 3321 catalog=this.args.get("db"), 3322 ) 3323 else: 3324 this = self.expression(exp.Dot, this=this, expression=field) 3325 this = self._parse_bracket(this) 3326 return this 3327 3328 def _parse_primary(self) -> t.Optional[exp.Expression]: 3329 if self._match_set(self.PRIMARY_PARSERS): 3330 token_type = self._prev.token_type 3331 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3332 3333 if token_type == TokenType.STRING: 3334 expressions = [primary] 3335 while self._match(TokenType.STRING): 3336 expressions.append(exp.Literal.string(self._prev.text)) 3337 3338 if len(expressions) > 1: 3339 return self.expression(exp.Concat, expressions=expressions) 3340 3341 return primary 3342 3343 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3344 return exp.Literal.number(f"0.{self._prev.text}") 3345 3346 if self._match(TokenType.L_PAREN): 3347 comments = self._prev_comments 3348 query = self._parse_select() 3349 3350 if query: 3351 expressions = [query] 3352 else: 3353 expressions = self._parse_expressions() 3354 3355 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3356 3357 if isinstance(this, exp.Subqueryable): 3358 this = self._parse_set_operations( 3359 self._parse_subquery(this=this, parse_alias=False) 3360 ) 3361 elif len(expressions) > 1: 3362 this = self.expression(exp.Tuple, expressions=expressions) 3363 else: 3364 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3365 3366 if this: 3367 this.add_comments(comments) 3368 3369 self._match_r_paren(expression=this) 3370 return this 3371 3372 return None 3373 3374 def _parse_field( 3375 self, 3376 any_token: bool = False, 3377 tokens: t.Optional[t.Collection[TokenType]] = None, 3378 anonymous_func: bool = False, 3379 ) -> t.Optional[exp.Expression]: 3380 return ( 3381 self._parse_primary() 3382 or self._parse_function(anonymous=anonymous_func) 3383 or self._parse_id_var(any_token=any_token, tokens=tokens) 3384 ) 3385 3386 def _parse_function( 3387 self, 3388 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3389 anonymous: bool = False, 3390 optional_parens: bool = True, 3391 ) -> t.Optional[exp.Expression]: 3392 if not self._curr: 3393 return None 3394 3395 token_type = self._curr.token_type 3396 this = self._curr.text 3397 upper = this.upper() 3398 3399 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3400 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3401 self._advance() 3402 return parser(self) 3403 3404 if not self._next or self._next.token_type != TokenType.L_PAREN: 3405 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3406 self._advance() 3407 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3408 3409 return None 3410 3411 if token_type not in self.FUNC_TOKENS: 3412 return None 3413 3414 self._advance(2) 3415 3416 parser = self.FUNCTION_PARSERS.get(upper) 3417 if parser and not anonymous: 3418 this = parser(self) 3419 else: 3420 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3421 3422 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3423 this = self.expression(subquery_predicate, this=self._parse_select()) 3424 self._match_r_paren() 3425 return this 3426 3427 if functions is None: 3428 functions = self.FUNCTIONS 3429 3430 function = functions.get(upper) 3431 3432 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3433 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3434 3435 if function and not anonymous: 3436 func = self.validate_expression(function(args), args) 3437 if not self.NORMALIZE_FUNCTIONS: 3438 func.meta["name"] = this 3439 this = func 3440 else: 3441 this = self.expression(exp.Anonymous, this=this, expressions=args) 3442 3443 self._match_r_paren(this) 3444 return self._parse_window(this) 3445 3446 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3447 return self._parse_column_def(self._parse_id_var()) 3448 3449 def _parse_user_defined_function( 3450 self, kind: t.Optional[TokenType] = None 3451 ) -> t.Optional[exp.Expression]: 3452 this = self._parse_id_var() 3453 3454 while self._match(TokenType.DOT): 3455 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3456 3457 if not self._match(TokenType.L_PAREN): 3458 return this 3459 3460 expressions = self._parse_csv(self._parse_function_parameter) 3461 self._match_r_paren() 3462 return self.expression( 3463 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3464 ) 3465 3466 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3467 literal = self._parse_primary() 3468 if literal: 3469 return self.expression(exp.Introducer, this=token.text, expression=literal) 3470 3471 return self.expression(exp.Identifier, this=token.text) 3472 3473 def _parse_session_parameter(self) -> exp.SessionParameter: 3474 kind = None 3475 this = self._parse_id_var() or self._parse_primary() 3476 3477 if this and self._match(TokenType.DOT): 3478 kind = this.name 3479 this = self._parse_var() or self._parse_primary() 3480 3481 return self.expression(exp.SessionParameter, this=this, kind=kind) 3482 3483 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3484 index = self._index 3485 3486 if self._match(TokenType.L_PAREN): 3487 expressions = t.cast( 3488 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3489 ) 3490 3491 if not self._match(TokenType.R_PAREN): 3492 self._retreat(index) 3493 else: 3494 expressions = [self._parse_id_var()] 3495 3496 if self._match_set(self.LAMBDAS): 3497 return self.LAMBDAS[self._prev.token_type](self, expressions) 3498 3499 self._retreat(index) 3500 3501 this: t.Optional[exp.Expression] 3502 3503 if self._match(TokenType.DISTINCT): 3504 this = self.expression( 3505 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3506 ) 3507 else: 3508 this = self._parse_select_or_expression(alias=alias) 3509 3510 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3511 3512 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3513 index = self._index 3514 3515 if not self.errors: 3516 try: 3517 if self._parse_select(nested=True): 3518 return this 3519 except ParseError: 3520 pass 3521 finally: 3522 self.errors.clear() 3523 self._retreat(index) 3524 3525 if not self._match(TokenType.L_PAREN): 3526 return this 3527 3528 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3529 3530 self._match_r_paren() 3531 return self.expression(exp.Schema, this=this, expressions=args) 3532 3533 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3534 return self._parse_column_def(self._parse_field(any_token=True)) 3535 3536 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3537 # column defs are not really columns, they're identifiers 3538 if isinstance(this, exp.Column): 3539 this = this.this 3540 3541 kind = self._parse_types(schema=True) 3542 3543 if self._match_text_seq("FOR", "ORDINALITY"): 3544 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3545 3546 constraints: t.List[exp.Expression] = [] 3547 3548 if not kind and self._match(TokenType.ALIAS): 3549 constraints.append( 3550 self.expression( 3551 exp.ComputedColumnConstraint, 3552 this=self._parse_conjunction(), 3553 persisted=self._match_text_seq("PERSISTED"), 3554 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3555 ) 3556 ) 3557 3558 while True: 3559 constraint = self._parse_column_constraint() 3560 if not constraint: 3561 break 3562 constraints.append(constraint) 3563 3564 if not kind and not constraints: 3565 return this 3566 3567 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3568 3569 def _parse_auto_increment( 3570 self, 3571 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3572 start = None 3573 increment = None 3574 3575 if self._match(TokenType.L_PAREN, advance=False): 3576 args = self._parse_wrapped_csv(self._parse_bitwise) 3577 start = seq_get(args, 0) 3578 increment = seq_get(args, 1) 3579 elif self._match_text_seq("START"): 3580 start = self._parse_bitwise() 3581 self._match_text_seq("INCREMENT") 3582 increment = self._parse_bitwise() 3583 3584 if start and increment: 3585 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3586 3587 return exp.AutoIncrementColumnConstraint() 3588 3589 def _parse_compress(self) -> exp.CompressColumnConstraint: 3590 if self._match(TokenType.L_PAREN, advance=False): 3591 return self.expression( 3592 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3593 ) 3594 3595 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3596 3597 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3598 if self._match_text_seq("BY", "DEFAULT"): 3599 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3600 this = self.expression( 3601 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3602 ) 3603 else: 3604 self._match_text_seq("ALWAYS") 3605 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3606 3607 self._match(TokenType.ALIAS) 3608 identity = self._match_text_seq("IDENTITY") 3609 3610 if self._match(TokenType.L_PAREN): 3611 if self._match_text_seq("START", "WITH"): 3612 this.set("start", self._parse_bitwise()) 3613 if self._match_text_seq("INCREMENT", "BY"): 3614 this.set("increment", self._parse_bitwise()) 3615 if self._match_text_seq("MINVALUE"): 3616 this.set("minvalue", self._parse_bitwise()) 3617 if self._match_text_seq("MAXVALUE"): 3618 this.set("maxvalue", self._parse_bitwise()) 3619 3620 if self._match_text_seq("CYCLE"): 3621 this.set("cycle", True) 3622 elif self._match_text_seq("NO", "CYCLE"): 3623 this.set("cycle", False) 3624 3625 if not identity: 3626 this.set("expression", self._parse_bitwise()) 3627 3628 self._match_r_paren() 3629 3630 return this 3631 3632 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3633 self._match_text_seq("LENGTH") 3634 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3635 3636 def _parse_not_constraint( 3637 self, 3638 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3639 if self._match_text_seq("NULL"): 3640 return self.expression(exp.NotNullColumnConstraint) 3641 if self._match_text_seq("CASESPECIFIC"): 3642 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3643 return None 3644 3645 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3646 if self._match(TokenType.CONSTRAINT): 3647 this = self._parse_id_var() 3648 else: 3649 this = None 3650 3651 if self._match_texts(self.CONSTRAINT_PARSERS): 3652 return self.expression( 3653 exp.ColumnConstraint, 3654 this=this, 3655 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3656 ) 3657 3658 return this 3659 3660 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3661 if not self._match(TokenType.CONSTRAINT): 3662 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3663 3664 this = self._parse_id_var() 3665 expressions = [] 3666 3667 while True: 3668 constraint = self._parse_unnamed_constraint() or self._parse_function() 3669 if not constraint: 3670 break 3671 expressions.append(constraint) 3672 3673 return self.expression(exp.Constraint, this=this, expressions=expressions) 3674 3675 def _parse_unnamed_constraint( 3676 self, constraints: t.Optional[t.Collection[str]] = None 3677 ) -> t.Optional[exp.Expression]: 3678 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3679 return None 3680 3681 constraint = self._prev.text.upper() 3682 if constraint not in self.CONSTRAINT_PARSERS: 3683 self.raise_error(f"No parser found for schema constraint {constraint}.") 3684 3685 return self.CONSTRAINT_PARSERS[constraint](self) 3686 3687 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3688 self._match_text_seq("KEY") 3689 return self.expression( 3690 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3691 ) 3692 3693 def _parse_key_constraint_options(self) -> t.List[str]: 3694 options = [] 3695 while True: 3696 if not self._curr: 3697 break 3698 3699 if self._match(TokenType.ON): 3700 action = None 3701 on = self._advance_any() and self._prev.text 3702 3703 if self._match_text_seq("NO", "ACTION"): 3704 action = "NO ACTION" 3705 elif self._match_text_seq("CASCADE"): 3706 action = "CASCADE" 3707 elif self._match_pair(TokenType.SET, TokenType.NULL): 3708 action = "SET NULL" 3709 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3710 action = "SET DEFAULT" 3711 else: 3712 self.raise_error("Invalid key constraint") 3713 3714 options.append(f"ON {on} {action}") 3715 elif self._match_text_seq("NOT", "ENFORCED"): 3716 options.append("NOT ENFORCED") 3717 elif self._match_text_seq("DEFERRABLE"): 3718 options.append("DEFERRABLE") 3719 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3720 options.append("INITIALLY DEFERRED") 3721 elif self._match_text_seq("NORELY"): 3722 options.append("NORELY") 3723 elif self._match_text_seq("MATCH", "FULL"): 3724 options.append("MATCH FULL") 3725 else: 3726 break 3727 3728 return options 3729 3730 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3731 if match and not self._match(TokenType.REFERENCES): 3732 return None 3733 3734 expressions = None 3735 this = self._parse_table(schema=True) 3736 options = self._parse_key_constraint_options() 3737 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3738 3739 def _parse_foreign_key(self) -> exp.ForeignKey: 3740 expressions = self._parse_wrapped_id_vars() 3741 reference = self._parse_references() 3742 options = {} 3743 3744 while self._match(TokenType.ON): 3745 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3746 self.raise_error("Expected DELETE or UPDATE") 3747 3748 kind = self._prev.text.lower() 3749 3750 if self._match_text_seq("NO", "ACTION"): 3751 action = "NO ACTION" 3752 elif self._match(TokenType.SET): 3753 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3754 action = "SET " + self._prev.text.upper() 3755 else: 3756 self._advance() 3757 action = self._prev.text.upper() 3758 3759 options[kind] = action 3760 3761 return self.expression( 3762 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3763 ) 3764 3765 def _parse_primary_key( 3766 self, wrapped_optional: bool = False, in_props: bool = False 3767 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3768 desc = ( 3769 self._match_set((TokenType.ASC, TokenType.DESC)) 3770 and self._prev.token_type == TokenType.DESC 3771 ) 3772 3773 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3774 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3775 3776 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3777 options = self._parse_key_constraint_options() 3778 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3779 3780 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3781 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3782 return this 3783 3784 bracket_kind = self._prev.token_type 3785 3786 if self._match(TokenType.COLON): 3787 expressions: t.List[exp.Expression] = [ 3788 self.expression(exp.Slice, expression=self._parse_conjunction()) 3789 ] 3790 else: 3791 expressions = self._parse_csv( 3792 lambda: self._parse_slice( 3793 self._parse_alias(self._parse_conjunction(), explicit=True) 3794 ) 3795 ) 3796 3797 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3798 if bracket_kind == TokenType.L_BRACE: 3799 this = self.expression(exp.Struct, expressions=expressions) 3800 elif not this or this.name.upper() == "ARRAY": 3801 this = self.expression(exp.Array, expressions=expressions) 3802 else: 3803 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3804 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3805 3806 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3807 self.raise_error("Expected ]") 3808 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3809 self.raise_error("Expected }") 3810 3811 self._add_comments(this) 3812 return self._parse_bracket(this) 3813 3814 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3815 if self._match(TokenType.COLON): 3816 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3817 return this 3818 3819 def _parse_case(self) -> t.Optional[exp.Expression]: 3820 ifs = [] 3821 default = None 3822 3823 comments = self._prev_comments 3824 expression = self._parse_conjunction() 3825 3826 while self._match(TokenType.WHEN): 3827 this = self._parse_conjunction() 3828 self._match(TokenType.THEN) 3829 then = self._parse_conjunction() 3830 ifs.append(self.expression(exp.If, this=this, true=then)) 3831 3832 if self._match(TokenType.ELSE): 3833 default = self._parse_conjunction() 3834 3835 if not self._match(TokenType.END): 3836 self.raise_error("Expected END after CASE", self._prev) 3837 3838 return self._parse_window( 3839 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3840 ) 3841 3842 def _parse_if(self) -> t.Optional[exp.Expression]: 3843 if self._match(TokenType.L_PAREN): 3844 args = self._parse_csv(self._parse_conjunction) 3845 this = self.validate_expression(exp.If.from_arg_list(args), args) 3846 self._match_r_paren() 3847 else: 3848 index = self._index - 1 3849 condition = self._parse_conjunction() 3850 3851 if not condition: 3852 self._retreat(index) 3853 return None 3854 3855 self._match(TokenType.THEN) 3856 true = self._parse_conjunction() 3857 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3858 self._match(TokenType.END) 3859 this = self.expression(exp.If, this=condition, true=true, false=false) 3860 3861 return self._parse_window(this) 3862 3863 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3864 if not self._match_text_seq("VALUE", "FOR"): 3865 self._retreat(self._index - 1) 3866 return None 3867 3868 return self.expression( 3869 exp.NextValueFor, 3870 this=self._parse_column(), 3871 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3872 ) 3873 3874 def _parse_extract(self) -> exp.Extract: 3875 this = self._parse_function() or self._parse_var() or self._parse_type() 3876 3877 if self._match(TokenType.FROM): 3878 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3879 3880 if not self._match(TokenType.COMMA): 3881 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3882 3883 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3884 3885 def _parse_any_value(self) -> exp.AnyValue: 3886 this = self._parse_lambda() 3887 is_max = None 3888 having = None 3889 3890 if self._match(TokenType.HAVING): 3891 self._match_texts(("MAX", "MIN")) 3892 is_max = self._prev.text == "MAX" 3893 having = self._parse_column() 3894 3895 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3896 3897 def _parse_cast(self, strict: bool) -> exp.Expression: 3898 this = self._parse_conjunction() 3899 3900 if not self._match(TokenType.ALIAS): 3901 if self._match(TokenType.COMMA): 3902 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3903 3904 self.raise_error("Expected AS after CAST") 3905 3906 fmt = None 3907 to = self._parse_types() 3908 3909 if not to: 3910 self.raise_error("Expected TYPE after CAST") 3911 elif isinstance(to, exp.Identifier): 3912 to = exp.DataType.build(to.name, udt=True) 3913 elif to.this == exp.DataType.Type.CHAR: 3914 if self._match(TokenType.CHARACTER_SET): 3915 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3916 elif self._match(TokenType.FORMAT): 3917 fmt_string = self._parse_string() 3918 fmt = self._parse_at_time_zone(fmt_string) 3919 3920 if to.this in exp.DataType.TEMPORAL_TYPES: 3921 this = self.expression( 3922 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3923 this=this, 3924 format=exp.Literal.string( 3925 format_time( 3926 fmt_string.this if fmt_string else "", 3927 self.FORMAT_MAPPING or self.TIME_MAPPING, 3928 self.FORMAT_TRIE or self.TIME_TRIE, 3929 ) 3930 ), 3931 ) 3932 3933 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3934 this.set("zone", fmt.args["zone"]) 3935 3936 return this 3937 3938 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3939 3940 def _parse_concat(self) -> t.Optional[exp.Expression]: 3941 args = self._parse_csv(self._parse_conjunction) 3942 if self.CONCAT_NULL_OUTPUTS_STRING: 3943 args = [ 3944 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3945 for arg in args 3946 if arg 3947 ] 3948 3949 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3950 # we find such a call we replace it with its argument. 3951 if len(args) == 1: 3952 return args[0] 3953 3954 return self.expression( 3955 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3956 ) 3957 3958 def _parse_string_agg(self) -> exp.Expression: 3959 if self._match(TokenType.DISTINCT): 3960 args: t.List[t.Optional[exp.Expression]] = [ 3961 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3962 ] 3963 if self._match(TokenType.COMMA): 3964 args.extend(self._parse_csv(self._parse_conjunction)) 3965 else: 3966 args = self._parse_csv(self._parse_conjunction) # type: ignore 3967 3968 index = self._index 3969 if not self._match(TokenType.R_PAREN) and args: 3970 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3971 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3972 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3973 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3974 3975 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3976 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3977 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3978 if not self._match_text_seq("WITHIN", "GROUP"): 3979 self._retreat(index) 3980 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3981 3982 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3983 order = self._parse_order(this=seq_get(args, 0)) 3984 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3985 3986 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3987 this = self._parse_bitwise() 3988 3989 if self._match(TokenType.USING): 3990 to: t.Optional[exp.Expression] = self.expression( 3991 exp.CharacterSet, this=self._parse_var() 3992 ) 3993 elif self._match(TokenType.COMMA): 3994 to = self._parse_types() 3995 else: 3996 to = None 3997 3998 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3999 4000 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4001 """ 4002 There are generally two variants of the DECODE function: 4003 4004 - DECODE(bin, charset) 4005 - DECODE(expression, search, result [, search, result] ... [, default]) 4006 4007 The second variant will always be parsed into a CASE expression. Note that NULL 4008 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4009 instead of relying on pattern matching. 4010 """ 4011 args = self._parse_csv(self._parse_conjunction) 4012 4013 if len(args) < 3: 4014 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4015 4016 expression, *expressions = args 4017 if not expression: 4018 return None 4019 4020 ifs = [] 4021 for search, result in zip(expressions[::2], expressions[1::2]): 4022 if not search or not result: 4023 return None 4024 4025 if isinstance(search, exp.Literal): 4026 ifs.append( 4027 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4028 ) 4029 elif isinstance(search, exp.Null): 4030 ifs.append( 4031 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4032 ) 4033 else: 4034 cond = exp.or_( 4035 exp.EQ(this=expression.copy(), expression=search), 4036 exp.and_( 4037 exp.Is(this=expression.copy(), expression=exp.Null()), 4038 exp.Is(this=search.copy(), expression=exp.Null()), 4039 copy=False, 4040 ), 4041 copy=False, 4042 ) 4043 ifs.append(exp.If(this=cond, true=result)) 4044 4045 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4046 4047 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4048 self._match_text_seq("KEY") 4049 key = self._parse_field() 4050 self._match(TokenType.COLON) 4051 self._match_text_seq("VALUE") 4052 value = self._parse_field() 4053 4054 if not key and not value: 4055 return None 4056 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4057 4058 def _parse_json_object(self) -> exp.JSONObject: 4059 star = self._parse_star() 4060 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4061 4062 null_handling = None 4063 if self._match_text_seq("NULL", "ON", "NULL"): 4064 null_handling = "NULL ON NULL" 4065 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4066 null_handling = "ABSENT ON NULL" 4067 4068 unique_keys = None 4069 if self._match_text_seq("WITH", "UNIQUE"): 4070 unique_keys = True 4071 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4072 unique_keys = False 4073 4074 self._match_text_seq("KEYS") 4075 4076 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4077 format_json = self._match_text_seq("FORMAT", "JSON") 4078 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4079 4080 return self.expression( 4081 exp.JSONObject, 4082 expressions=expressions, 4083 null_handling=null_handling, 4084 unique_keys=unique_keys, 4085 return_type=return_type, 4086 format_json=format_json, 4087 encoding=encoding, 4088 ) 4089 4090 def _parse_logarithm(self) -> exp.Func: 4091 # Default argument order is base, expression 4092 args = self._parse_csv(self._parse_range) 4093 4094 if len(args) > 1: 4095 if not self.LOG_BASE_FIRST: 4096 args.reverse() 4097 return exp.Log.from_arg_list(args) 4098 4099 return self.expression( 4100 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4101 ) 4102 4103 def _parse_match_against(self) -> exp.MatchAgainst: 4104 expressions = self._parse_csv(self._parse_column) 4105 4106 self._match_text_seq(")", "AGAINST", "(") 4107 4108 this = self._parse_string() 4109 4110 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4111 modifier = "IN NATURAL LANGUAGE MODE" 4112 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4113 modifier = f"{modifier} WITH QUERY EXPANSION" 4114 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4115 modifier = "IN BOOLEAN MODE" 4116 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4117 modifier = "WITH QUERY EXPANSION" 4118 else: 4119 modifier = None 4120 4121 return self.expression( 4122 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4123 ) 4124 4125 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4126 def _parse_open_json(self) -> exp.OpenJSON: 4127 this = self._parse_bitwise() 4128 path = self._match(TokenType.COMMA) and self._parse_string() 4129 4130 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4131 this = self._parse_field(any_token=True) 4132 kind = self._parse_types() 4133 path = self._parse_string() 4134 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4135 4136 return self.expression( 4137 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4138 ) 4139 4140 expressions = None 4141 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4142 self._match_l_paren() 4143 expressions = self._parse_csv(_parse_open_json_column_def) 4144 4145 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4146 4147 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4148 args = self._parse_csv(self._parse_bitwise) 4149 4150 if self._match(TokenType.IN): 4151 return self.expression( 4152 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4153 ) 4154 4155 if haystack_first: 4156 haystack = seq_get(args, 0) 4157 needle = seq_get(args, 1) 4158 else: 4159 needle = seq_get(args, 0) 4160 haystack = seq_get(args, 1) 4161 4162 return self.expression( 4163 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4164 ) 4165 4166 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4167 args = self._parse_csv(self._parse_table) 4168 return exp.JoinHint(this=func_name.upper(), expressions=args) 4169 4170 def _parse_substring(self) -> exp.Substring: 4171 # Postgres supports the form: substring(string [from int] [for int]) 4172 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4173 4174 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4175 4176 if self._match(TokenType.FROM): 4177 args.append(self._parse_bitwise()) 4178 if self._match(TokenType.FOR): 4179 args.append(self._parse_bitwise()) 4180 4181 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4182 4183 def _parse_trim(self) -> exp.Trim: 4184 # https://www.w3resource.com/sql/character-functions/trim.php 4185 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4186 4187 position = None 4188 collation = None 4189 4190 if self._match_texts(self.TRIM_TYPES): 4191 position = self._prev.text.upper() 4192 4193 expression = self._parse_bitwise() 4194 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4195 this = self._parse_bitwise() 4196 else: 4197 this = expression 4198 expression = None 4199 4200 if self._match(TokenType.COLLATE): 4201 collation = self._parse_bitwise() 4202 4203 return self.expression( 4204 exp.Trim, this=this, position=position, expression=expression, collation=collation 4205 ) 4206 4207 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4208 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4209 4210 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4211 return self._parse_window(self._parse_id_var(), alias=True) 4212 4213 def _parse_respect_or_ignore_nulls( 4214 self, this: t.Optional[exp.Expression] 4215 ) -> t.Optional[exp.Expression]: 4216 if self._match_text_seq("IGNORE", "NULLS"): 4217 return self.expression(exp.IgnoreNulls, this=this) 4218 if self._match_text_seq("RESPECT", "NULLS"): 4219 return self.expression(exp.RespectNulls, this=this) 4220 return this 4221 4222 def _parse_window( 4223 self, this: t.Optional[exp.Expression], alias: bool = False 4224 ) -> t.Optional[exp.Expression]: 4225 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4226 self._match(TokenType.WHERE) 4227 this = self.expression( 4228 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4229 ) 4230 self._match_r_paren() 4231 4232 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4233 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4234 if self._match_text_seq("WITHIN", "GROUP"): 4235 order = self._parse_wrapped(self._parse_order) 4236 this = self.expression(exp.WithinGroup, this=this, expression=order) 4237 4238 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4239 # Some dialects choose to implement and some do not. 4240 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4241 4242 # There is some code above in _parse_lambda that handles 4243 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4244 4245 # The below changes handle 4246 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4247 4248 # Oracle allows both formats 4249 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4250 # and Snowflake chose to do the same for familiarity 4251 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4252 this = self._parse_respect_or_ignore_nulls(this) 4253 4254 # bigquery select from window x AS (partition by ...) 4255 if alias: 4256 over = None 4257 self._match(TokenType.ALIAS) 4258 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4259 return this 4260 else: 4261 over = self._prev.text.upper() 4262 4263 if not self._match(TokenType.L_PAREN): 4264 return self.expression( 4265 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4266 ) 4267 4268 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4269 4270 first = self._match(TokenType.FIRST) 4271 if self._match_text_seq("LAST"): 4272 first = False 4273 4274 partition, order = self._parse_partition_and_order() 4275 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4276 4277 if kind: 4278 self._match(TokenType.BETWEEN) 4279 start = self._parse_window_spec() 4280 self._match(TokenType.AND) 4281 end = self._parse_window_spec() 4282 4283 spec = self.expression( 4284 exp.WindowSpec, 4285 kind=kind, 4286 start=start["value"], 4287 start_side=start["side"], 4288 end=end["value"], 4289 end_side=end["side"], 4290 ) 4291 else: 4292 spec = None 4293 4294 self._match_r_paren() 4295 4296 window = self.expression( 4297 exp.Window, 4298 this=this, 4299 partition_by=partition, 4300 order=order, 4301 spec=spec, 4302 alias=window_alias, 4303 over=over, 4304 first=first, 4305 ) 4306 4307 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4308 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4309 return self._parse_window(window, alias=alias) 4310 4311 return window 4312 4313 def _parse_partition_and_order( 4314 self, 4315 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4316 return self._parse_partition_by(), self._parse_order() 4317 4318 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4319 self._match(TokenType.BETWEEN) 4320 4321 return { 4322 "value": ( 4323 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4324 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4325 or self._parse_bitwise() 4326 ), 4327 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4328 } 4329 4330 def _parse_alias( 4331 self, this: t.Optional[exp.Expression], explicit: bool = False 4332 ) -> t.Optional[exp.Expression]: 4333 any_token = self._match(TokenType.ALIAS) 4334 4335 if explicit and not any_token: 4336 return this 4337 4338 if self._match(TokenType.L_PAREN): 4339 aliases = self.expression( 4340 exp.Aliases, 4341 this=this, 4342 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4343 ) 4344 self._match_r_paren(aliases) 4345 return aliases 4346 4347 alias = self._parse_id_var(any_token) 4348 4349 if alias: 4350 return self.expression(exp.Alias, this=this, alias=alias) 4351 4352 return this 4353 4354 def _parse_id_var( 4355 self, 4356 any_token: bool = True, 4357 tokens: t.Optional[t.Collection[TokenType]] = None, 4358 ) -> t.Optional[exp.Expression]: 4359 identifier = self._parse_identifier() 4360 4361 if identifier: 4362 return identifier 4363 4364 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4365 quoted = self._prev.token_type == TokenType.STRING 4366 return exp.Identifier(this=self._prev.text, quoted=quoted) 4367 4368 return None 4369 4370 def _parse_string(self) -> t.Optional[exp.Expression]: 4371 if self._match(TokenType.STRING): 4372 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4373 return self._parse_placeholder() 4374 4375 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4376 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4377 4378 def _parse_number(self) -> t.Optional[exp.Expression]: 4379 if self._match(TokenType.NUMBER): 4380 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4381 return self._parse_placeholder() 4382 4383 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4384 if self._match(TokenType.IDENTIFIER): 4385 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4386 return self._parse_placeholder() 4387 4388 def _parse_var( 4389 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4390 ) -> t.Optional[exp.Expression]: 4391 if ( 4392 (any_token and self._advance_any()) 4393 or self._match(TokenType.VAR) 4394 or (self._match_set(tokens) if tokens else False) 4395 ): 4396 return self.expression(exp.Var, this=self._prev.text) 4397 return self._parse_placeholder() 4398 4399 def _advance_any(self) -> t.Optional[Token]: 4400 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4401 self._advance() 4402 return self._prev 4403 return None 4404 4405 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4406 return self._parse_var() or self._parse_string() 4407 4408 def _parse_null(self) -> t.Optional[exp.Expression]: 4409 if self._match(TokenType.NULL): 4410 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4411 return self._parse_placeholder() 4412 4413 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4414 if self._match(TokenType.TRUE): 4415 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4416 if self._match(TokenType.FALSE): 4417 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4418 return self._parse_placeholder() 4419 4420 def _parse_star(self) -> t.Optional[exp.Expression]: 4421 if self._match(TokenType.STAR): 4422 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4423 return self._parse_placeholder() 4424 4425 def _parse_parameter(self) -> exp.Parameter: 4426 wrapped = self._match(TokenType.L_BRACE) 4427 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4428 self._match(TokenType.R_BRACE) 4429 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4430 4431 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4432 if self._match_set(self.PLACEHOLDER_PARSERS): 4433 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4434 if placeholder: 4435 return placeholder 4436 self._advance(-1) 4437 return None 4438 4439 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4440 if not self._match(TokenType.EXCEPT): 4441 return None 4442 if self._match(TokenType.L_PAREN, advance=False): 4443 return self._parse_wrapped_csv(self._parse_column) 4444 return self._parse_csv(self._parse_column) 4445 4446 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4447 if not self._match(TokenType.REPLACE): 4448 return None 4449 if self._match(TokenType.L_PAREN, advance=False): 4450 return self._parse_wrapped_csv(self._parse_expression) 4451 return self._parse_expressions() 4452 4453 def _parse_csv( 4454 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4455 ) -> t.List[exp.Expression]: 4456 parse_result = parse_method() 4457 items = [parse_result] if parse_result is not None else [] 4458 4459 while self._match(sep): 4460 self._add_comments(parse_result) 4461 parse_result = parse_method() 4462 if parse_result is not None: 4463 items.append(parse_result) 4464 4465 return items 4466 4467 def _parse_tokens( 4468 self, parse_method: t.Callable, expressions: t.Dict 4469 ) -> t.Optional[exp.Expression]: 4470 this = parse_method() 4471 4472 while self._match_set(expressions): 4473 this = self.expression( 4474 expressions[self._prev.token_type], 4475 this=this, 4476 comments=self._prev_comments, 4477 expression=parse_method(), 4478 ) 4479 4480 return this 4481 4482 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4483 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4484 4485 def _parse_wrapped_csv( 4486 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4487 ) -> t.List[exp.Expression]: 4488 return self._parse_wrapped( 4489 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4490 ) 4491 4492 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4493 wrapped = self._match(TokenType.L_PAREN) 4494 if not wrapped and not optional: 4495 self.raise_error("Expecting (") 4496 parse_result = parse_method() 4497 if wrapped: 4498 self._match_r_paren() 4499 return parse_result 4500 4501 def _parse_expressions(self) -> t.List[exp.Expression]: 4502 return self._parse_csv(self._parse_expression) 4503 4504 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4505 return self._parse_select() or self._parse_set_operations( 4506 self._parse_expression() if alias else self._parse_conjunction() 4507 ) 4508 4509 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4510 return self._parse_query_modifiers( 4511 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4512 ) 4513 4514 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4515 this = None 4516 if self._match_texts(self.TRANSACTION_KIND): 4517 this = self._prev.text 4518 4519 self._match_texts({"TRANSACTION", "WORK"}) 4520 4521 modes = [] 4522 while True: 4523 mode = [] 4524 while self._match(TokenType.VAR): 4525 mode.append(self._prev.text) 4526 4527 if mode: 4528 modes.append(" ".join(mode)) 4529 if not self._match(TokenType.COMMA): 4530 break 4531 4532 return self.expression(exp.Transaction, this=this, modes=modes) 4533 4534 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4535 chain = None 4536 savepoint = None 4537 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4538 4539 self._match_texts({"TRANSACTION", "WORK"}) 4540 4541 if self._match_text_seq("TO"): 4542 self._match_text_seq("SAVEPOINT") 4543 savepoint = self._parse_id_var() 4544 4545 if self._match(TokenType.AND): 4546 chain = not self._match_text_seq("NO") 4547 self._match_text_seq("CHAIN") 4548 4549 if is_rollback: 4550 return self.expression(exp.Rollback, savepoint=savepoint) 4551 4552 return self.expression(exp.Commit, chain=chain) 4553 4554 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4555 if not self._match_text_seq("ADD"): 4556 return None 4557 4558 self._match(TokenType.COLUMN) 4559 exists_column = self._parse_exists(not_=True) 4560 expression = self._parse_field_def() 4561 4562 if expression: 4563 expression.set("exists", exists_column) 4564 4565 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4566 if self._match_texts(("FIRST", "AFTER")): 4567 position = self._prev.text 4568 column_position = self.expression( 4569 exp.ColumnPosition, this=self._parse_column(), position=position 4570 ) 4571 expression.set("position", column_position) 4572 4573 return expression 4574 4575 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4576 drop = self._match(TokenType.DROP) and self._parse_drop() 4577 if drop and not isinstance(drop, exp.Command): 4578 drop.set("kind", drop.args.get("kind", "COLUMN")) 4579 return drop 4580 4581 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4582 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4583 return self.expression( 4584 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4585 ) 4586 4587 def _parse_add_constraint(self) -> exp.AddConstraint: 4588 this = None 4589 kind = self._prev.token_type 4590 4591 if kind == TokenType.CONSTRAINT: 4592 this = self._parse_id_var() 4593 4594 if self._match_text_seq("CHECK"): 4595 expression = self._parse_wrapped(self._parse_conjunction) 4596 enforced = self._match_text_seq("ENFORCED") 4597 4598 return self.expression( 4599 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4600 ) 4601 4602 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4603 expression = self._parse_foreign_key() 4604 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4605 expression = self._parse_primary_key() 4606 else: 4607 expression = None 4608 4609 return self.expression(exp.AddConstraint, this=this, expression=expression) 4610 4611 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4612 index = self._index - 1 4613 4614 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4615 return self._parse_csv(self._parse_add_constraint) 4616 4617 self._retreat(index) 4618 return self._parse_csv(self._parse_add_column) 4619 4620 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4621 self._match(TokenType.COLUMN) 4622 column = self._parse_field(any_token=True) 4623 4624 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4625 return self.expression(exp.AlterColumn, this=column, drop=True) 4626 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4627 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4628 4629 self._match_text_seq("SET", "DATA") 4630 return self.expression( 4631 exp.AlterColumn, 4632 this=column, 4633 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4634 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4635 using=self._match(TokenType.USING) and self._parse_conjunction(), 4636 ) 4637 4638 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4639 index = self._index - 1 4640 4641 partition_exists = self._parse_exists() 4642 if self._match(TokenType.PARTITION, advance=False): 4643 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4644 4645 self._retreat(index) 4646 return self._parse_csv(self._parse_drop_column) 4647 4648 def _parse_alter_table_rename(self) -> exp.RenameTable: 4649 self._match_text_seq("TO") 4650 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4651 4652 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4653 start = self._prev 4654 4655 if not self._match(TokenType.TABLE): 4656 return self._parse_as_command(start) 4657 4658 exists = self._parse_exists() 4659 this = self._parse_table(schema=True) 4660 4661 if self._next: 4662 self._advance() 4663 4664 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4665 if parser: 4666 actions = ensure_list(parser(self)) 4667 4668 if not self._curr: 4669 return self.expression( 4670 exp.AlterTable, 4671 this=this, 4672 exists=exists, 4673 actions=actions, 4674 ) 4675 return self._parse_as_command(start) 4676 4677 def _parse_merge(self) -> exp.Merge: 4678 self._match(TokenType.INTO) 4679 target = self._parse_table() 4680 4681 if target and self._match(TokenType.ALIAS, advance=False): 4682 target.set("alias", self._parse_table_alias()) 4683 4684 self._match(TokenType.USING) 4685 using = self._parse_table() 4686 4687 self._match(TokenType.ON) 4688 on = self._parse_conjunction() 4689 4690 whens = [] 4691 while self._match(TokenType.WHEN): 4692 matched = not self._match(TokenType.NOT) 4693 self._match_text_seq("MATCHED") 4694 source = ( 4695 False 4696 if self._match_text_seq("BY", "TARGET") 4697 else self._match_text_seq("BY", "SOURCE") 4698 ) 4699 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4700 4701 self._match(TokenType.THEN) 4702 4703 if self._match(TokenType.INSERT): 4704 _this = self._parse_star() 4705 if _this: 4706 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4707 else: 4708 then = self.expression( 4709 exp.Insert, 4710 this=self._parse_value(), 4711 expression=self._match(TokenType.VALUES) and self._parse_value(), 4712 ) 4713 elif self._match(TokenType.UPDATE): 4714 expressions = self._parse_star() 4715 if expressions: 4716 then = self.expression(exp.Update, expressions=expressions) 4717 else: 4718 then = self.expression( 4719 exp.Update, 4720 expressions=self._match(TokenType.SET) 4721 and self._parse_csv(self._parse_equality), 4722 ) 4723 elif self._match(TokenType.DELETE): 4724 then = self.expression(exp.Var, this=self._prev.text) 4725 else: 4726 then = None 4727 4728 whens.append( 4729 self.expression( 4730 exp.When, 4731 matched=matched, 4732 source=source, 4733 condition=condition, 4734 then=then, 4735 ) 4736 ) 4737 4738 return self.expression( 4739 exp.Merge, 4740 this=target, 4741 using=using, 4742 on=on, 4743 expressions=whens, 4744 ) 4745 4746 def _parse_show(self) -> t.Optional[exp.Expression]: 4747 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4748 if parser: 4749 return parser(self) 4750 self._advance() 4751 return self.expression(exp.Show, this=self._prev.text.upper()) 4752 4753 def _parse_set_item_assignment( 4754 self, kind: t.Optional[str] = None 4755 ) -> t.Optional[exp.Expression]: 4756 index = self._index 4757 4758 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4759 return self._parse_set_transaction(global_=kind == "GLOBAL") 4760 4761 left = self._parse_primary() or self._parse_id_var() 4762 4763 if not self._match_texts(("=", "TO")): 4764 self._retreat(index) 4765 return None 4766 4767 right = self._parse_statement() or self._parse_id_var() 4768 this = self.expression(exp.EQ, this=left, expression=right) 4769 4770 return self.expression(exp.SetItem, this=this, kind=kind) 4771 4772 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4773 self._match_text_seq("TRANSACTION") 4774 characteristics = self._parse_csv( 4775 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4776 ) 4777 return self.expression( 4778 exp.SetItem, 4779 expressions=characteristics, 4780 kind="TRANSACTION", 4781 **{"global": global_}, # type: ignore 4782 ) 4783 4784 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4785 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4786 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4787 4788 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4789 index = self._index 4790 set_ = self.expression( 4791 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4792 ) 4793 4794 if self._curr: 4795 self._retreat(index) 4796 return self._parse_as_command(self._prev) 4797 4798 return set_ 4799 4800 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4801 for option in options: 4802 if self._match_text_seq(*option.split(" ")): 4803 return exp.var(option) 4804 return None 4805 4806 def _parse_as_command(self, start: Token) -> exp.Command: 4807 while self._curr: 4808 self._advance() 4809 text = self._find_sql(start, self._prev) 4810 size = len(start.text) 4811 return exp.Command(this=text[:size], expression=text[size:]) 4812 4813 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4814 settings = [] 4815 4816 self._match_l_paren() 4817 kind = self._parse_id_var() 4818 4819 if self._match(TokenType.L_PAREN): 4820 while True: 4821 key = self._parse_id_var() 4822 value = self._parse_primary() 4823 4824 if not key and value is None: 4825 break 4826 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4827 self._match(TokenType.R_PAREN) 4828 4829 self._match_r_paren() 4830 4831 return self.expression( 4832 exp.DictProperty, 4833 this=this, 4834 kind=kind.this if kind else None, 4835 settings=settings, 4836 ) 4837 4838 def _parse_dict_range(self, this: str) -> exp.DictRange: 4839 self._match_l_paren() 4840 has_min = self._match_text_seq("MIN") 4841 if has_min: 4842 min = self._parse_var() or self._parse_primary() 4843 self._match_text_seq("MAX") 4844 max = self._parse_var() or self._parse_primary() 4845 else: 4846 max = self._parse_var() or self._parse_primary() 4847 min = exp.Literal.number(0) 4848 self._match_r_paren() 4849 return self.expression(exp.DictRange, this=this, min=min, max=max) 4850 4851 def _find_parser( 4852 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4853 ) -> t.Optional[t.Callable]: 4854 if not self._curr: 4855 return None 4856 4857 index = self._index 4858 this = [] 4859 while True: 4860 # The current token might be multiple words 4861 curr = self._curr.text.upper() 4862 key = curr.split(" ") 4863 this.append(curr) 4864 4865 self._advance() 4866 result, trie = in_trie(trie, key) 4867 if result == TrieResult.FAILED: 4868 break 4869 4870 if result == TrieResult.EXISTS: 4871 subparser = parsers[" ".join(this)] 4872 return subparser 4873 4874 self._retreat(index) 4875 return None 4876 4877 def _match(self, token_type, advance=True, expression=None): 4878 if not self._curr: 4879 return None 4880 4881 if self._curr.token_type == token_type: 4882 if advance: 4883 self._advance() 4884 self._add_comments(expression) 4885 return True 4886 4887 return None 4888 4889 def _match_set(self, types, advance=True): 4890 if not self._curr: 4891 return None 4892 4893 if self._curr.token_type in types: 4894 if advance: 4895 self._advance() 4896 return True 4897 4898 return None 4899 4900 def _match_pair(self, token_type_a, token_type_b, advance=True): 4901 if not self._curr or not self._next: 4902 return None 4903 4904 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4905 if advance: 4906 self._advance(2) 4907 return True 4908 4909 return None 4910 4911 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4912 if not self._match(TokenType.L_PAREN, expression=expression): 4913 self.raise_error("Expecting (") 4914 4915 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4916 if not self._match(TokenType.R_PAREN, expression=expression): 4917 self.raise_error("Expecting )") 4918 4919 def _match_texts(self, texts, advance=True): 4920 if self._curr and self._curr.text.upper() in texts: 4921 if advance: 4922 self._advance() 4923 return True 4924 return False 4925 4926 def _match_text_seq(self, *texts, advance=True): 4927 index = self._index 4928 for text in texts: 4929 if self._curr and self._curr.text.upper() == text: 4930 self._advance() 4931 else: 4932 self._retreat(index) 4933 return False 4934 4935 if not advance: 4936 self._retreat(index) 4937 4938 return True 4939 4940 @t.overload 4941 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4942 ... 4943 4944 @t.overload 4945 def _replace_columns_with_dots( 4946 self, this: t.Optional[exp.Expression] 4947 ) -> t.Optional[exp.Expression]: 4948 ... 4949 4950 def _replace_columns_with_dots(self, this): 4951 if isinstance(this, exp.Dot): 4952 exp.replace_children(this, self._replace_columns_with_dots) 4953 elif isinstance(this, exp.Column): 4954 exp.replace_children(this, self._replace_columns_with_dots) 4955 table = this.args.get("table") 4956 this = ( 4957 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4958 ) 4959 4960 return this 4961 4962 def _replace_lambda( 4963 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4964 ) -> t.Optional[exp.Expression]: 4965 if not node: 4966 return node 4967 4968 for column in node.find_all(exp.Column): 4969 if column.parts[0].name in lambda_variables: 4970 dot_or_id = column.to_dot() if column.table else column.this 4971 parent = column.parent 4972 4973 while isinstance(parent, exp.Dot): 4974 if not isinstance(parent.parent, exp.Dot): 4975 parent.replace(dot_or_id) 4976 break 4977 parent = parent.parent 4978 else: 4979 if column is node: 4980 node = dot_or_id 4981 else: 4982 column.replace(dot_or_id) 4983 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.FIXEDSTRING, 141 TokenType.FLOAT, 142 TokenType.DOUBLE, 143 TokenType.CHAR, 144 TokenType.NCHAR, 145 TokenType.VARCHAR, 146 TokenType.NVARCHAR, 147 TokenType.TEXT, 148 TokenType.MEDIUMTEXT, 149 TokenType.LONGTEXT, 150 TokenType.MEDIUMBLOB, 151 TokenType.LONGBLOB, 152 TokenType.BINARY, 153 TokenType.VARBINARY, 154 TokenType.JSON, 155 TokenType.JSONB, 156 TokenType.INTERVAL, 157 TokenType.TIME, 158 TokenType.TIMETZ, 159 TokenType.TIMESTAMP, 160 TokenType.TIMESTAMPTZ, 161 TokenType.TIMESTAMPLTZ, 162 TokenType.DATETIME, 163 TokenType.DATETIME64, 164 TokenType.DATE, 165 TokenType.INT4RANGE, 166 TokenType.INT4MULTIRANGE, 167 TokenType.INT8RANGE, 168 TokenType.INT8MULTIRANGE, 169 TokenType.NUMRANGE, 170 TokenType.NUMMULTIRANGE, 171 TokenType.TSRANGE, 172 TokenType.TSMULTIRANGE, 173 TokenType.TSTZRANGE, 174 TokenType.TSTZMULTIRANGE, 175 TokenType.DATERANGE, 176 TokenType.DATEMULTIRANGE, 177 TokenType.DECIMAL, 178 TokenType.BIGDECIMAL, 179 TokenType.UUID, 180 TokenType.GEOGRAPHY, 181 TokenType.GEOMETRY, 182 TokenType.HLLSKETCH, 183 TokenType.HSTORE, 184 TokenType.PSEUDO_TYPE, 185 TokenType.SUPER, 186 TokenType.SERIAL, 187 TokenType.SMALLSERIAL, 188 TokenType.BIGSERIAL, 189 TokenType.XML, 190 TokenType.UNIQUEIDENTIFIER, 191 TokenType.USERDEFINED, 192 TokenType.MONEY, 193 TokenType.SMALLMONEY, 194 TokenType.ROWVERSION, 195 TokenType.IMAGE, 196 TokenType.VARIANT, 197 TokenType.OBJECT, 198 TokenType.INET, 199 TokenType.IPADDRESS, 200 TokenType.IPPREFIX, 201 TokenType.UNKNOWN, 202 TokenType.NULL, 203 *ENUM_TYPE_TOKENS, 204 *NESTED_TYPE_TOKENS, 205 } 206 207 SUBQUERY_PREDICATES = { 208 TokenType.ANY: exp.Any, 209 TokenType.ALL: exp.All, 210 TokenType.EXISTS: exp.Exists, 211 TokenType.SOME: exp.Any, 212 } 213 214 RESERVED_KEYWORDS = { 215 *Tokenizer.SINGLE_TOKENS.values(), 216 TokenType.SELECT, 217 } 218 219 DB_CREATABLES = { 220 TokenType.DATABASE, 221 TokenType.SCHEMA, 222 TokenType.TABLE, 223 TokenType.VIEW, 224 TokenType.DICTIONARY, 225 } 226 227 CREATABLES = { 228 TokenType.COLUMN, 229 TokenType.FUNCTION, 230 TokenType.INDEX, 231 TokenType.PROCEDURE, 232 *DB_CREATABLES, 233 } 234 235 # Tokens that can represent identifiers 236 ID_VAR_TOKENS = { 237 TokenType.VAR, 238 TokenType.ANTI, 239 TokenType.APPLY, 240 TokenType.ASC, 241 TokenType.AUTO_INCREMENT, 242 TokenType.BEGIN, 243 TokenType.CACHE, 244 TokenType.CASE, 245 TokenType.COLLATE, 246 TokenType.COMMAND, 247 TokenType.COMMENT, 248 TokenType.COMMIT, 249 TokenType.CONSTRAINT, 250 TokenType.DEFAULT, 251 TokenType.DELETE, 252 TokenType.DESC, 253 TokenType.DESCRIBE, 254 TokenType.DICTIONARY, 255 TokenType.DIV, 256 TokenType.END, 257 TokenType.EXECUTE, 258 TokenType.ESCAPE, 259 TokenType.FALSE, 260 TokenType.FIRST, 261 TokenType.FILTER, 262 TokenType.FORMAT, 263 TokenType.FULL, 264 TokenType.IS, 265 TokenType.ISNULL, 266 TokenType.INTERVAL, 267 TokenType.KEEP, 268 TokenType.LEFT, 269 TokenType.LOAD, 270 TokenType.MERGE, 271 TokenType.NATURAL, 272 TokenType.NEXT, 273 TokenType.OFFSET, 274 TokenType.ORDINALITY, 275 TokenType.OVERWRITE, 276 TokenType.PARTITION, 277 TokenType.PERCENT, 278 TokenType.PIVOT, 279 TokenType.PRAGMA, 280 TokenType.RANGE, 281 TokenType.REFERENCES, 282 TokenType.RIGHT, 283 TokenType.ROW, 284 TokenType.ROWS, 285 TokenType.SEMI, 286 TokenType.SET, 287 TokenType.SETTINGS, 288 TokenType.SHOW, 289 TokenType.TEMPORARY, 290 TokenType.TOP, 291 TokenType.TRUE, 292 TokenType.UNIQUE, 293 TokenType.UNPIVOT, 294 TokenType.UPDATE, 295 TokenType.VOLATILE, 296 TokenType.WINDOW, 297 *CREATABLES, 298 *SUBQUERY_PREDICATES, 299 *TYPE_TOKENS, 300 *NO_PAREN_FUNCTIONS, 301 } 302 303 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 304 305 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 306 TokenType.APPLY, 307 TokenType.ASOF, 308 TokenType.FULL, 309 TokenType.LEFT, 310 TokenType.LOCK, 311 TokenType.NATURAL, 312 TokenType.OFFSET, 313 TokenType.RIGHT, 314 TokenType.WINDOW, 315 } 316 317 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 318 319 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 320 321 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 322 323 FUNC_TOKENS = { 324 TokenType.COMMAND, 325 TokenType.CURRENT_DATE, 326 TokenType.CURRENT_DATETIME, 327 TokenType.CURRENT_TIMESTAMP, 328 TokenType.CURRENT_TIME, 329 TokenType.CURRENT_USER, 330 TokenType.FILTER, 331 TokenType.FIRST, 332 TokenType.FORMAT, 333 TokenType.GLOB, 334 TokenType.IDENTIFIER, 335 TokenType.INDEX, 336 TokenType.ISNULL, 337 TokenType.ILIKE, 338 TokenType.INSERT, 339 TokenType.LIKE, 340 TokenType.MERGE, 341 TokenType.OFFSET, 342 TokenType.PRIMARY_KEY, 343 TokenType.RANGE, 344 TokenType.REPLACE, 345 TokenType.RLIKE, 346 TokenType.ROW, 347 TokenType.UNNEST, 348 TokenType.VAR, 349 TokenType.LEFT, 350 TokenType.RIGHT, 351 TokenType.DATE, 352 TokenType.DATETIME, 353 TokenType.TABLE, 354 TokenType.TIMESTAMP, 355 TokenType.TIMESTAMPTZ, 356 TokenType.WINDOW, 357 TokenType.XOR, 358 *TYPE_TOKENS, 359 *SUBQUERY_PREDICATES, 360 } 361 362 CONJUNCTION = { 363 TokenType.AND: exp.And, 364 TokenType.OR: exp.Or, 365 } 366 367 EQUALITY = { 368 TokenType.EQ: exp.EQ, 369 TokenType.NEQ: exp.NEQ, 370 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 371 } 372 373 COMPARISON = { 374 TokenType.GT: exp.GT, 375 TokenType.GTE: exp.GTE, 376 TokenType.LT: exp.LT, 377 TokenType.LTE: exp.LTE, 378 } 379 380 BITWISE = { 381 TokenType.AMP: exp.BitwiseAnd, 382 TokenType.CARET: exp.BitwiseXor, 383 TokenType.PIPE: exp.BitwiseOr, 384 TokenType.DPIPE: exp.DPipe, 385 } 386 387 TERM = { 388 TokenType.DASH: exp.Sub, 389 TokenType.PLUS: exp.Add, 390 TokenType.MOD: exp.Mod, 391 TokenType.COLLATE: exp.Collate, 392 } 393 394 FACTOR = { 395 TokenType.DIV: exp.IntDiv, 396 TokenType.LR_ARROW: exp.Distance, 397 TokenType.SLASH: exp.Div, 398 TokenType.STAR: exp.Mul, 399 } 400 401 TIMES = { 402 TokenType.TIME, 403 TokenType.TIMETZ, 404 } 405 406 TIMESTAMPS = { 407 TokenType.TIMESTAMP, 408 TokenType.TIMESTAMPTZ, 409 TokenType.TIMESTAMPLTZ, 410 *TIMES, 411 } 412 413 SET_OPERATIONS = { 414 TokenType.UNION, 415 TokenType.INTERSECT, 416 TokenType.EXCEPT, 417 } 418 419 JOIN_METHODS = { 420 TokenType.NATURAL, 421 TokenType.ASOF, 422 } 423 424 JOIN_SIDES = { 425 TokenType.LEFT, 426 TokenType.RIGHT, 427 TokenType.FULL, 428 } 429 430 JOIN_KINDS = { 431 TokenType.INNER, 432 TokenType.OUTER, 433 TokenType.CROSS, 434 TokenType.SEMI, 435 TokenType.ANTI, 436 } 437 438 JOIN_HINTS: t.Set[str] = set() 439 440 LAMBDAS = { 441 TokenType.ARROW: lambda self, expressions: self.expression( 442 exp.Lambda, 443 this=self._replace_lambda( 444 self._parse_conjunction(), 445 {node.name for node in expressions}, 446 ), 447 expressions=expressions, 448 ), 449 TokenType.FARROW: lambda self, expressions: self.expression( 450 exp.Kwarg, 451 this=exp.var(expressions[0].name), 452 expression=self._parse_conjunction(), 453 ), 454 } 455 456 COLUMN_OPERATORS = { 457 TokenType.DOT: None, 458 TokenType.DCOLON: lambda self, this, to: self.expression( 459 exp.Cast if self.STRICT_CAST else exp.TryCast, 460 this=this, 461 to=to, 462 ), 463 TokenType.ARROW: lambda self, this, path: self.expression( 464 exp.JSONExtract, 465 this=this, 466 expression=path, 467 ), 468 TokenType.DARROW: lambda self, this, path: self.expression( 469 exp.JSONExtractScalar, 470 this=this, 471 expression=path, 472 ), 473 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 474 exp.JSONBExtract, 475 this=this, 476 expression=path, 477 ), 478 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 479 exp.JSONBExtractScalar, 480 this=this, 481 expression=path, 482 ), 483 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 484 exp.JSONBContains, 485 this=this, 486 expression=key, 487 ), 488 } 489 490 EXPRESSION_PARSERS = { 491 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 492 exp.Column: lambda self: self._parse_column(), 493 exp.Condition: lambda self: self._parse_conjunction(), 494 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 495 exp.Expression: lambda self: self._parse_statement(), 496 exp.From: lambda self: self._parse_from(), 497 exp.Group: lambda self: self._parse_group(), 498 exp.Having: lambda self: self._parse_having(), 499 exp.Identifier: lambda self: self._parse_id_var(), 500 exp.Join: lambda self: self._parse_join(), 501 exp.Lambda: lambda self: self._parse_lambda(), 502 exp.Lateral: lambda self: self._parse_lateral(), 503 exp.Limit: lambda self: self._parse_limit(), 504 exp.Offset: lambda self: self._parse_offset(), 505 exp.Order: lambda self: self._parse_order(), 506 exp.Ordered: lambda self: self._parse_ordered(), 507 exp.Properties: lambda self: self._parse_properties(), 508 exp.Qualify: lambda self: self._parse_qualify(), 509 exp.Returning: lambda self: self._parse_returning(), 510 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 511 exp.Table: lambda self: self._parse_table_parts(), 512 exp.TableAlias: lambda self: self._parse_table_alias(), 513 exp.Where: lambda self: self._parse_where(), 514 exp.Window: lambda self: self._parse_named_window(), 515 exp.With: lambda self: self._parse_with(), 516 "JOIN_TYPE": lambda self: self._parse_join_parts(), 517 } 518 519 STATEMENT_PARSERS = { 520 TokenType.ALTER: lambda self: self._parse_alter(), 521 TokenType.BEGIN: lambda self: self._parse_transaction(), 522 TokenType.CACHE: lambda self: self._parse_cache(), 523 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 524 TokenType.COMMENT: lambda self: self._parse_comment(), 525 TokenType.CREATE: lambda self: self._parse_create(), 526 TokenType.DELETE: lambda self: self._parse_delete(), 527 TokenType.DESC: lambda self: self._parse_describe(), 528 TokenType.DESCRIBE: lambda self: self._parse_describe(), 529 TokenType.DROP: lambda self: self._parse_drop(), 530 TokenType.FROM: lambda self: exp.select("*").from_( 531 t.cast(exp.From, self._parse_from(skip_from_token=True)) 532 ), 533 TokenType.INSERT: lambda self: self._parse_insert(), 534 TokenType.LOAD: lambda self: self._parse_load(), 535 TokenType.MERGE: lambda self: self._parse_merge(), 536 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 537 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 538 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 539 TokenType.SET: lambda self: self._parse_set(), 540 TokenType.UNCACHE: lambda self: self._parse_uncache(), 541 TokenType.UPDATE: lambda self: self._parse_update(), 542 TokenType.USE: lambda self: self.expression( 543 exp.Use, 544 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 545 and exp.var(self._prev.text), 546 this=self._parse_table(schema=False), 547 ), 548 } 549 550 UNARY_PARSERS = { 551 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 552 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 553 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 554 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 555 } 556 557 PRIMARY_PARSERS = { 558 TokenType.STRING: lambda self, token: self.expression( 559 exp.Literal, this=token.text, is_string=True 560 ), 561 TokenType.NUMBER: lambda self, token: self.expression( 562 exp.Literal, this=token.text, is_string=False 563 ), 564 TokenType.STAR: lambda self, _: self.expression( 565 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 566 ), 567 TokenType.NULL: lambda self, _: self.expression(exp.Null), 568 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 569 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 570 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 571 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 572 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 573 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 574 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 575 exp.National, this=token.text 576 ), 577 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 578 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 579 } 580 581 PLACEHOLDER_PARSERS = { 582 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 583 TokenType.PARAMETER: lambda self: self._parse_parameter(), 584 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 585 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 586 else None, 587 } 588 589 RANGE_PARSERS = { 590 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 591 TokenType.GLOB: binary_range_parser(exp.Glob), 592 TokenType.ILIKE: binary_range_parser(exp.ILike), 593 TokenType.IN: lambda self, this: self._parse_in(this), 594 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 595 TokenType.IS: lambda self, this: self._parse_is(this), 596 TokenType.LIKE: binary_range_parser(exp.Like), 597 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 598 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 599 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 600 } 601 602 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 603 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 604 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 605 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 606 "CHARACTER SET": lambda self: self._parse_character_set(), 607 "CHECKSUM": lambda self: self._parse_checksum(), 608 "CLUSTER BY": lambda self: self._parse_cluster(), 609 "CLUSTERED": lambda self: self._parse_clustered_by(), 610 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 611 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 612 "COPY": lambda self: self._parse_copy_property(), 613 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 614 "DEFINER": lambda self: self._parse_definer(), 615 "DETERMINISTIC": lambda self: self.expression( 616 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 617 ), 618 "DISTKEY": lambda self: self._parse_distkey(), 619 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 620 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 621 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 622 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 623 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 624 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 625 "FREESPACE": lambda self: self._parse_freespace(), 626 "HEAP": lambda self: self.expression(exp.HeapProperty), 627 "IMMUTABLE": lambda self: self.expression( 628 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 629 ), 630 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 631 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 632 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 633 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 634 "LIKE": lambda self: self._parse_create_like(), 635 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 636 "LOCK": lambda self: self._parse_locking(), 637 "LOCKING": lambda self: self._parse_locking(), 638 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 639 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 640 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 641 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 642 "NO": lambda self: self._parse_no_property(), 643 "ON": lambda self: self._parse_on_property(), 644 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 645 "PARTITION BY": lambda self: self._parse_partitioned_by(), 646 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 647 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 648 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 649 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 650 "RETURNS": lambda self: self._parse_returns(), 651 "ROW": lambda self: self._parse_row(), 652 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 653 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 654 "SETTINGS": lambda self: self.expression( 655 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 656 ), 657 "SORTKEY": lambda self: self._parse_sortkey(), 658 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 659 "STABLE": lambda self: self.expression( 660 exp.StabilityProperty, this=exp.Literal.string("STABLE") 661 ), 662 "STORED": lambda self: self._parse_stored(), 663 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 664 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 665 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 666 "TO": lambda self: self._parse_to_table(), 667 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 668 "TTL": lambda self: self._parse_ttl(), 669 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 670 "VOLATILE": lambda self: self._parse_volatile_property(), 671 "WITH": lambda self: self._parse_with_property(), 672 } 673 674 CONSTRAINT_PARSERS = { 675 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 676 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 677 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 678 "CHARACTER SET": lambda self: self.expression( 679 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 680 ), 681 "CHECK": lambda self: self.expression( 682 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 683 ), 684 "COLLATE": lambda self: self.expression( 685 exp.CollateColumnConstraint, this=self._parse_var() 686 ), 687 "COMMENT": lambda self: self.expression( 688 exp.CommentColumnConstraint, this=self._parse_string() 689 ), 690 "COMPRESS": lambda self: self._parse_compress(), 691 "DEFAULT": lambda self: self.expression( 692 exp.DefaultColumnConstraint, this=self._parse_bitwise() 693 ), 694 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 695 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 696 "FORMAT": lambda self: self.expression( 697 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 698 ), 699 "GENERATED": lambda self: self._parse_generated_as_identity(), 700 "IDENTITY": lambda self: self._parse_auto_increment(), 701 "INLINE": lambda self: self._parse_inline(), 702 "LIKE": lambda self: self._parse_create_like(), 703 "NOT": lambda self: self._parse_not_constraint(), 704 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 705 "ON": lambda self: self._match(TokenType.UPDATE) 706 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 707 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 708 "PRIMARY KEY": lambda self: self._parse_primary_key(), 709 "REFERENCES": lambda self: self._parse_references(match=False), 710 "TITLE": lambda self: self.expression( 711 exp.TitleColumnConstraint, this=self._parse_var_or_string() 712 ), 713 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 714 "UNIQUE": lambda self: self._parse_unique(), 715 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 716 } 717 718 ALTER_PARSERS = { 719 "ADD": lambda self: self._parse_alter_table_add(), 720 "ALTER": lambda self: self._parse_alter_table_alter(), 721 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 722 "DROP": lambda self: self._parse_alter_table_drop(), 723 "RENAME": lambda self: self._parse_alter_table_rename(), 724 } 725 726 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 727 728 NO_PAREN_FUNCTION_PARSERS = { 729 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 730 "CASE": lambda self: self._parse_case(), 731 "IF": lambda self: self._parse_if(), 732 "NEXT": lambda self: self._parse_next_value_for(), 733 } 734 735 INVALID_FUNC_NAME_TOKENS = { 736 TokenType.IDENTIFIER, 737 TokenType.STRING, 738 } 739 740 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 741 742 FUNCTION_PARSERS = { 743 "ANY_VALUE": lambda self: self._parse_any_value(), 744 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 745 "CONCAT": lambda self: self._parse_concat(), 746 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 747 "DECODE": lambda self: self._parse_decode(), 748 "EXTRACT": lambda self: self._parse_extract(), 749 "JSON_OBJECT": lambda self: self._parse_json_object(), 750 "LOG": lambda self: self._parse_logarithm(), 751 "MATCH": lambda self: self._parse_match_against(), 752 "OPENJSON": lambda self: self._parse_open_json(), 753 "POSITION": lambda self: self._parse_position(), 754 "SAFE_CAST": lambda self: self._parse_cast(False), 755 "STRING_AGG": lambda self: self._parse_string_agg(), 756 "SUBSTRING": lambda self: self._parse_substring(), 757 "TRIM": lambda self: self._parse_trim(), 758 "TRY_CAST": lambda self: self._parse_cast(False), 759 "TRY_CONVERT": lambda self: self._parse_convert(False), 760 } 761 762 QUERY_MODIFIER_PARSERS = { 763 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 764 TokenType.WHERE: lambda self: ("where", self._parse_where()), 765 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 766 TokenType.HAVING: lambda self: ("having", self._parse_having()), 767 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 768 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 769 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 770 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 771 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 772 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 773 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 774 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 775 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 776 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 777 TokenType.CLUSTER_BY: lambda self: ( 778 "cluster", 779 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 780 ), 781 TokenType.DISTRIBUTE_BY: lambda self: ( 782 "distribute", 783 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 784 ), 785 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 786 } 787 788 SET_PARSERS = { 789 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 790 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 791 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 792 "TRANSACTION": lambda self: self._parse_set_transaction(), 793 } 794 795 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 796 797 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 798 799 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 800 801 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 802 803 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 804 805 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 806 TRANSACTION_CHARACTERISTICS = { 807 "ISOLATION LEVEL REPEATABLE READ", 808 "ISOLATION LEVEL READ COMMITTED", 809 "ISOLATION LEVEL READ UNCOMMITTED", 810 "ISOLATION LEVEL SERIALIZABLE", 811 "READ WRITE", 812 "READ ONLY", 813 } 814 815 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 816 817 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 818 819 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 820 821 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 822 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 823 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 824 825 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 826 827 DISTINCT_TOKENS = {TokenType.DISTINCT} 828 829 STRICT_CAST = True 830 831 # A NULL arg in CONCAT yields NULL by default 832 CONCAT_NULL_OUTPUTS_STRING = False 833 834 PREFIXED_PIVOT_COLUMNS = False 835 IDENTIFY_PIVOT_STRINGS = False 836 837 LOG_BASE_FIRST = True 838 LOG_DEFAULTS_TO_LN = False 839 840 SUPPORTS_USER_DEFINED_TYPES = True 841 842 __slots__ = ( 843 "error_level", 844 "error_message_context", 845 "max_errors", 846 "sql", 847 "errors", 848 "_tokens", 849 "_index", 850 "_curr", 851 "_next", 852 "_prev", 853 "_prev_comments", 854 "_tokenizer", 855 ) 856 857 # Autofilled 858 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 859 INDEX_OFFSET: int = 0 860 UNNEST_COLUMN_ONLY: bool = False 861 ALIAS_POST_TABLESAMPLE: bool = False 862 STRICT_STRING_CONCAT = False 863 NORMALIZE_FUNCTIONS = "upper" 864 NULL_ORDERING: str = "nulls_are_small" 865 SHOW_TRIE: t.Dict = {} 866 SET_TRIE: t.Dict = {} 867 FORMAT_MAPPING: t.Dict[str, str] = {} 868 FORMAT_TRIE: t.Dict = {} 869 TIME_MAPPING: t.Dict[str, str] = {} 870 TIME_TRIE: t.Dict = {} 871 872 def __init__( 873 self, 874 error_level: t.Optional[ErrorLevel] = None, 875 error_message_context: int = 100, 876 max_errors: int = 3, 877 ): 878 self.error_level = error_level or ErrorLevel.IMMEDIATE 879 self.error_message_context = error_message_context 880 self.max_errors = max_errors 881 self._tokenizer = self.TOKENIZER_CLASS() 882 self.reset() 883 884 def reset(self): 885 self.sql = "" 886 self.errors = [] 887 self._tokens = [] 888 self._index = 0 889 self._curr = None 890 self._next = None 891 self._prev = None 892 self._prev_comments = None 893 894 def parse( 895 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 896 ) -> t.List[t.Optional[exp.Expression]]: 897 """ 898 Parses a list of tokens and returns a list of syntax trees, one tree 899 per parsed SQL statement. 900 901 Args: 902 raw_tokens: The list of tokens. 903 sql: The original SQL string, used to produce helpful debug messages. 904 905 Returns: 906 The list of the produced syntax trees. 907 """ 908 return self._parse( 909 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 910 ) 911 912 def parse_into( 913 self, 914 expression_types: exp.IntoType, 915 raw_tokens: t.List[Token], 916 sql: t.Optional[str] = None, 917 ) -> t.List[t.Optional[exp.Expression]]: 918 """ 919 Parses a list of tokens into a given Expression type. If a collection of Expression 920 types is given instead, this method will try to parse the token list into each one 921 of them, stopping at the first for which the parsing succeeds. 922 923 Args: 924 expression_types: The expression type(s) to try and parse the token list into. 925 raw_tokens: The list of tokens. 926 sql: The original SQL string, used to produce helpful debug messages. 927 928 Returns: 929 The target Expression. 930 """ 931 errors = [] 932 for expression_type in ensure_list(expression_types): 933 parser = self.EXPRESSION_PARSERS.get(expression_type) 934 if not parser: 935 raise TypeError(f"No parser registered for {expression_type}") 936 937 try: 938 return self._parse(parser, raw_tokens, sql) 939 except ParseError as e: 940 e.errors[0]["into_expression"] = expression_type 941 errors.append(e) 942 943 raise ParseError( 944 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 945 errors=merge_errors(errors), 946 ) from errors[-1] 947 948 def _parse( 949 self, 950 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 951 raw_tokens: t.List[Token], 952 sql: t.Optional[str] = None, 953 ) -> t.List[t.Optional[exp.Expression]]: 954 self.reset() 955 self.sql = sql or "" 956 957 total = len(raw_tokens) 958 chunks: t.List[t.List[Token]] = [[]] 959 960 for i, token in enumerate(raw_tokens): 961 if token.token_type == TokenType.SEMICOLON: 962 if i < total - 1: 963 chunks.append([]) 964 else: 965 chunks[-1].append(token) 966 967 expressions = [] 968 969 for tokens in chunks: 970 self._index = -1 971 self._tokens = tokens 972 self._advance() 973 974 expressions.append(parse_method(self)) 975 976 if self._index < len(self._tokens): 977 self.raise_error("Invalid expression / Unexpected token") 978 979 self.check_errors() 980 981 return expressions 982 983 def check_errors(self) -> None: 984 """Logs or raises any found errors, depending on the chosen error level setting.""" 985 if self.error_level == ErrorLevel.WARN: 986 for error in self.errors: 987 logger.error(str(error)) 988 elif self.error_level == ErrorLevel.RAISE and self.errors: 989 raise ParseError( 990 concat_messages(self.errors, self.max_errors), 991 errors=merge_errors(self.errors), 992 ) 993 994 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 995 """ 996 Appends an error in the list of recorded errors or raises it, depending on the chosen 997 error level setting. 998 """ 999 token = token or self._curr or self._prev or Token.string("") 1000 start = token.start 1001 end = token.end + 1 1002 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1003 highlight = self.sql[start:end] 1004 end_context = self.sql[end : end + self.error_message_context] 1005 1006 error = ParseError.new( 1007 f"{message}. Line {token.line}, Col: {token.col}.\n" 1008 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1009 description=message, 1010 line=token.line, 1011 col=token.col, 1012 start_context=start_context, 1013 highlight=highlight, 1014 end_context=end_context, 1015 ) 1016 1017 if self.error_level == ErrorLevel.IMMEDIATE: 1018 raise error 1019 1020 self.errors.append(error) 1021 1022 def expression( 1023 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1024 ) -> E: 1025 """ 1026 Creates a new, validated Expression. 1027 1028 Args: 1029 exp_class: The expression class to instantiate. 1030 comments: An optional list of comments to attach to the expression. 1031 kwargs: The arguments to set for the expression along with their respective values. 1032 1033 Returns: 1034 The target expression. 1035 """ 1036 instance = exp_class(**kwargs) 1037 instance.add_comments(comments) if comments else self._add_comments(instance) 1038 return self.validate_expression(instance) 1039 1040 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1041 if expression and self._prev_comments: 1042 expression.add_comments(self._prev_comments) 1043 self._prev_comments = None 1044 1045 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1046 """ 1047 Validates an Expression, making sure that all its mandatory arguments are set. 1048 1049 Args: 1050 expression: The expression to validate. 1051 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1052 1053 Returns: 1054 The validated expression. 1055 """ 1056 if self.error_level != ErrorLevel.IGNORE: 1057 for error_message in expression.error_messages(args): 1058 self.raise_error(error_message) 1059 1060 return expression 1061 1062 def _find_sql(self, start: Token, end: Token) -> str: 1063 return self.sql[start.start : end.end + 1] 1064 1065 def _advance(self, times: int = 1) -> None: 1066 self._index += times 1067 self._curr = seq_get(self._tokens, self._index) 1068 self._next = seq_get(self._tokens, self._index + 1) 1069 1070 if self._index > 0: 1071 self._prev = self._tokens[self._index - 1] 1072 self._prev_comments = self._prev.comments 1073 else: 1074 self._prev = None 1075 self._prev_comments = None 1076 1077 def _retreat(self, index: int) -> None: 1078 if index != self._index: 1079 self._advance(index - self._index) 1080 1081 def _parse_command(self) -> exp.Command: 1082 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1083 1084 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1085 start = self._prev 1086 exists = self._parse_exists() if allow_exists else None 1087 1088 self._match(TokenType.ON) 1089 1090 kind = self._match_set(self.CREATABLES) and self._prev 1091 if not kind: 1092 return self._parse_as_command(start) 1093 1094 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1095 this = self._parse_user_defined_function(kind=kind.token_type) 1096 elif kind.token_type == TokenType.TABLE: 1097 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1098 elif kind.token_type == TokenType.COLUMN: 1099 this = self._parse_column() 1100 else: 1101 this = self._parse_id_var() 1102 1103 self._match(TokenType.IS) 1104 1105 return self.expression( 1106 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1107 ) 1108 1109 def _parse_to_table( 1110 self, 1111 ) -> exp.ToTableProperty: 1112 table = self._parse_table_parts(schema=True) 1113 return self.expression(exp.ToTableProperty, this=table) 1114 1115 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1116 def _parse_ttl(self) -> exp.Expression: 1117 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1118 this = self._parse_bitwise() 1119 1120 if self._match_text_seq("DELETE"): 1121 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1122 if self._match_text_seq("RECOMPRESS"): 1123 return self.expression( 1124 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1125 ) 1126 if self._match_text_seq("TO", "DISK"): 1127 return self.expression( 1128 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1129 ) 1130 if self._match_text_seq("TO", "VOLUME"): 1131 return self.expression( 1132 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1133 ) 1134 1135 return this 1136 1137 expressions = self._parse_csv(_parse_ttl_action) 1138 where = self._parse_where() 1139 group = self._parse_group() 1140 1141 aggregates = None 1142 if group and self._match(TokenType.SET): 1143 aggregates = self._parse_csv(self._parse_set_item) 1144 1145 return self.expression( 1146 exp.MergeTreeTTL, 1147 expressions=expressions, 1148 where=where, 1149 group=group, 1150 aggregates=aggregates, 1151 ) 1152 1153 def _parse_statement(self) -> t.Optional[exp.Expression]: 1154 if self._curr is None: 1155 return None 1156 1157 if self._match_set(self.STATEMENT_PARSERS): 1158 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1159 1160 if self._match_set(Tokenizer.COMMANDS): 1161 return self._parse_command() 1162 1163 expression = self._parse_expression() 1164 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1165 return self._parse_query_modifiers(expression) 1166 1167 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1168 start = self._prev 1169 temporary = self._match(TokenType.TEMPORARY) 1170 materialized = self._match_text_seq("MATERIALIZED") 1171 1172 kind = self._match_set(self.CREATABLES) and self._prev.text 1173 if not kind: 1174 return self._parse_as_command(start) 1175 1176 return self.expression( 1177 exp.Drop, 1178 comments=start.comments, 1179 exists=exists or self._parse_exists(), 1180 this=self._parse_table(schema=True), 1181 kind=kind, 1182 temporary=temporary, 1183 materialized=materialized, 1184 cascade=self._match_text_seq("CASCADE"), 1185 constraints=self._match_text_seq("CONSTRAINTS"), 1186 purge=self._match_text_seq("PURGE"), 1187 ) 1188 1189 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1190 return ( 1191 self._match_text_seq("IF") 1192 and (not not_ or self._match(TokenType.NOT)) 1193 and self._match(TokenType.EXISTS) 1194 ) 1195 1196 def _parse_create(self) -> exp.Create | exp.Command: 1197 # Note: this can't be None because we've matched a statement parser 1198 start = self._prev 1199 comments = self._prev_comments 1200 1201 replace = start.text.upper() == "REPLACE" or self._match_pair( 1202 TokenType.OR, TokenType.REPLACE 1203 ) 1204 unique = self._match(TokenType.UNIQUE) 1205 1206 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1207 self._advance() 1208 1209 properties = None 1210 create_token = self._match_set(self.CREATABLES) and self._prev 1211 1212 if not create_token: 1213 # exp.Properties.Location.POST_CREATE 1214 properties = self._parse_properties() 1215 create_token = self._match_set(self.CREATABLES) and self._prev 1216 1217 if not properties or not create_token: 1218 return self._parse_as_command(start) 1219 1220 exists = self._parse_exists(not_=True) 1221 this = None 1222 expression: t.Optional[exp.Expression] = None 1223 indexes = None 1224 no_schema_binding = None 1225 begin = None 1226 clone = None 1227 1228 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1229 nonlocal properties 1230 if properties and temp_props: 1231 properties.expressions.extend(temp_props.expressions) 1232 elif temp_props: 1233 properties = temp_props 1234 1235 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1236 this = self._parse_user_defined_function(kind=create_token.token_type) 1237 1238 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1239 extend_props(self._parse_properties()) 1240 1241 self._match(TokenType.ALIAS) 1242 1243 if self._match(TokenType.COMMAND): 1244 expression = self._parse_as_command(self._prev) 1245 else: 1246 begin = self._match(TokenType.BEGIN) 1247 return_ = self._match_text_seq("RETURN") 1248 expression = self._parse_statement() 1249 1250 if return_: 1251 expression = self.expression(exp.Return, this=expression) 1252 elif create_token.token_type == TokenType.INDEX: 1253 this = self._parse_index(index=self._parse_id_var()) 1254 elif create_token.token_type in self.DB_CREATABLES: 1255 table_parts = self._parse_table_parts(schema=True) 1256 1257 # exp.Properties.Location.POST_NAME 1258 self._match(TokenType.COMMA) 1259 extend_props(self._parse_properties(before=True)) 1260 1261 this = self._parse_schema(this=table_parts) 1262 1263 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1264 extend_props(self._parse_properties()) 1265 1266 self._match(TokenType.ALIAS) 1267 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1268 # exp.Properties.Location.POST_ALIAS 1269 extend_props(self._parse_properties()) 1270 1271 expression = self._parse_ddl_select() 1272 1273 if create_token.token_type == TokenType.TABLE: 1274 # exp.Properties.Location.POST_EXPRESSION 1275 extend_props(self._parse_properties()) 1276 1277 indexes = [] 1278 while True: 1279 index = self._parse_index() 1280 1281 # exp.Properties.Location.POST_INDEX 1282 extend_props(self._parse_properties()) 1283 1284 if not index: 1285 break 1286 else: 1287 self._match(TokenType.COMMA) 1288 indexes.append(index) 1289 elif create_token.token_type == TokenType.VIEW: 1290 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1291 no_schema_binding = True 1292 1293 if self._match_text_seq("CLONE"): 1294 clone = self._parse_table(schema=True) 1295 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1296 clone_kind = ( 1297 self._match(TokenType.L_PAREN) 1298 and self._match_texts(self.CLONE_KINDS) 1299 and self._prev.text.upper() 1300 ) 1301 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1302 self._match(TokenType.R_PAREN) 1303 clone = self.expression( 1304 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1305 ) 1306 1307 return self.expression( 1308 exp.Create, 1309 comments=comments, 1310 this=this, 1311 kind=create_token.text, 1312 replace=replace, 1313 unique=unique, 1314 expression=expression, 1315 exists=exists, 1316 properties=properties, 1317 indexes=indexes, 1318 no_schema_binding=no_schema_binding, 1319 begin=begin, 1320 clone=clone, 1321 ) 1322 1323 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1324 # only used for teradata currently 1325 self._match(TokenType.COMMA) 1326 1327 kwargs = { 1328 "no": self._match_text_seq("NO"), 1329 "dual": self._match_text_seq("DUAL"), 1330 "before": self._match_text_seq("BEFORE"), 1331 "default": self._match_text_seq("DEFAULT"), 1332 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1333 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1334 "after": self._match_text_seq("AFTER"), 1335 "minimum": self._match_texts(("MIN", "MINIMUM")), 1336 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1337 } 1338 1339 if self._match_texts(self.PROPERTY_PARSERS): 1340 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1341 try: 1342 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1343 except TypeError: 1344 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1345 1346 return None 1347 1348 def _parse_property(self) -> t.Optional[exp.Expression]: 1349 if self._match_texts(self.PROPERTY_PARSERS): 1350 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1351 1352 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1353 return self._parse_character_set(default=True) 1354 1355 if self._match_text_seq("COMPOUND", "SORTKEY"): 1356 return self._parse_sortkey(compound=True) 1357 1358 if self._match_text_seq("SQL", "SECURITY"): 1359 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1360 1361 assignment = self._match_pair( 1362 TokenType.VAR, TokenType.EQ, advance=False 1363 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1364 1365 if assignment: 1366 key = self._parse_var_or_string() 1367 self._match(TokenType.EQ) 1368 return self.expression(exp.Property, this=key, value=self._parse_column()) 1369 1370 return None 1371 1372 def _parse_stored(self) -> exp.FileFormatProperty: 1373 self._match(TokenType.ALIAS) 1374 1375 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1376 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1377 1378 return self.expression( 1379 exp.FileFormatProperty, 1380 this=self.expression( 1381 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1382 ) 1383 if input_format or output_format 1384 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1385 ) 1386 1387 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1388 self._match(TokenType.EQ) 1389 self._match(TokenType.ALIAS) 1390 return self.expression(exp_class, this=self._parse_field()) 1391 1392 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1393 properties = [] 1394 while True: 1395 if before: 1396 prop = self._parse_property_before() 1397 else: 1398 prop = self._parse_property() 1399 1400 if not prop: 1401 break 1402 for p in ensure_list(prop): 1403 properties.append(p) 1404 1405 if properties: 1406 return self.expression(exp.Properties, expressions=properties) 1407 1408 return None 1409 1410 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1411 return self.expression( 1412 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1413 ) 1414 1415 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1416 if self._index >= 2: 1417 pre_volatile_token = self._tokens[self._index - 2] 1418 else: 1419 pre_volatile_token = None 1420 1421 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1422 return exp.VolatileProperty() 1423 1424 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1425 1426 def _parse_with_property( 1427 self, 1428 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1429 if self._match(TokenType.L_PAREN, advance=False): 1430 return self._parse_wrapped_csv(self._parse_property) 1431 1432 if self._match_text_seq("JOURNAL"): 1433 return self._parse_withjournaltable() 1434 1435 if self._match_text_seq("DATA"): 1436 return self._parse_withdata(no=False) 1437 elif self._match_text_seq("NO", "DATA"): 1438 return self._parse_withdata(no=True) 1439 1440 if not self._next: 1441 return None 1442 1443 return self._parse_withisolatedloading() 1444 1445 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1446 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1447 self._match(TokenType.EQ) 1448 1449 user = self._parse_id_var() 1450 self._match(TokenType.PARAMETER) 1451 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1452 1453 if not user or not host: 1454 return None 1455 1456 return exp.DefinerProperty(this=f"{user}@{host}") 1457 1458 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1459 self._match(TokenType.TABLE) 1460 self._match(TokenType.EQ) 1461 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1462 1463 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1464 return self.expression(exp.LogProperty, no=no) 1465 1466 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1467 return self.expression(exp.JournalProperty, **kwargs) 1468 1469 def _parse_checksum(self) -> exp.ChecksumProperty: 1470 self._match(TokenType.EQ) 1471 1472 on = None 1473 if self._match(TokenType.ON): 1474 on = True 1475 elif self._match_text_seq("OFF"): 1476 on = False 1477 1478 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1479 1480 def _parse_cluster(self) -> exp.Cluster: 1481 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1482 1483 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1484 self._match_text_seq("BY") 1485 1486 self._match_l_paren() 1487 expressions = self._parse_csv(self._parse_column) 1488 self._match_r_paren() 1489 1490 if self._match_text_seq("SORTED", "BY"): 1491 self._match_l_paren() 1492 sorted_by = self._parse_csv(self._parse_ordered) 1493 self._match_r_paren() 1494 else: 1495 sorted_by = None 1496 1497 self._match(TokenType.INTO) 1498 buckets = self._parse_number() 1499 self._match_text_seq("BUCKETS") 1500 1501 return self.expression( 1502 exp.ClusteredByProperty, 1503 expressions=expressions, 1504 sorted_by=sorted_by, 1505 buckets=buckets, 1506 ) 1507 1508 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1509 if not self._match_text_seq("GRANTS"): 1510 self._retreat(self._index - 1) 1511 return None 1512 1513 return self.expression(exp.CopyGrantsProperty) 1514 1515 def _parse_freespace(self) -> exp.FreespaceProperty: 1516 self._match(TokenType.EQ) 1517 return self.expression( 1518 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1519 ) 1520 1521 def _parse_mergeblockratio( 1522 self, no: bool = False, default: bool = False 1523 ) -> exp.MergeBlockRatioProperty: 1524 if self._match(TokenType.EQ): 1525 return self.expression( 1526 exp.MergeBlockRatioProperty, 1527 this=self._parse_number(), 1528 percent=self._match(TokenType.PERCENT), 1529 ) 1530 1531 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1532 1533 def _parse_datablocksize( 1534 self, 1535 default: t.Optional[bool] = None, 1536 minimum: t.Optional[bool] = None, 1537 maximum: t.Optional[bool] = None, 1538 ) -> exp.DataBlocksizeProperty: 1539 self._match(TokenType.EQ) 1540 size = self._parse_number() 1541 1542 units = None 1543 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1544 units = self._prev.text 1545 1546 return self.expression( 1547 exp.DataBlocksizeProperty, 1548 size=size, 1549 units=units, 1550 default=default, 1551 minimum=minimum, 1552 maximum=maximum, 1553 ) 1554 1555 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1556 self._match(TokenType.EQ) 1557 always = self._match_text_seq("ALWAYS") 1558 manual = self._match_text_seq("MANUAL") 1559 never = self._match_text_seq("NEVER") 1560 default = self._match_text_seq("DEFAULT") 1561 1562 autotemp = None 1563 if self._match_text_seq("AUTOTEMP"): 1564 autotemp = self._parse_schema() 1565 1566 return self.expression( 1567 exp.BlockCompressionProperty, 1568 always=always, 1569 manual=manual, 1570 never=never, 1571 default=default, 1572 autotemp=autotemp, 1573 ) 1574 1575 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1576 no = self._match_text_seq("NO") 1577 concurrent = self._match_text_seq("CONCURRENT") 1578 self._match_text_seq("ISOLATED", "LOADING") 1579 for_all = self._match_text_seq("FOR", "ALL") 1580 for_insert = self._match_text_seq("FOR", "INSERT") 1581 for_none = self._match_text_seq("FOR", "NONE") 1582 return self.expression( 1583 exp.IsolatedLoadingProperty, 1584 no=no, 1585 concurrent=concurrent, 1586 for_all=for_all, 1587 for_insert=for_insert, 1588 for_none=for_none, 1589 ) 1590 1591 def _parse_locking(self) -> exp.LockingProperty: 1592 if self._match(TokenType.TABLE): 1593 kind = "TABLE" 1594 elif self._match(TokenType.VIEW): 1595 kind = "VIEW" 1596 elif self._match(TokenType.ROW): 1597 kind = "ROW" 1598 elif self._match_text_seq("DATABASE"): 1599 kind = "DATABASE" 1600 else: 1601 kind = None 1602 1603 if kind in ("DATABASE", "TABLE", "VIEW"): 1604 this = self._parse_table_parts() 1605 else: 1606 this = None 1607 1608 if self._match(TokenType.FOR): 1609 for_or_in = "FOR" 1610 elif self._match(TokenType.IN): 1611 for_or_in = "IN" 1612 else: 1613 for_or_in = None 1614 1615 if self._match_text_seq("ACCESS"): 1616 lock_type = "ACCESS" 1617 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1618 lock_type = "EXCLUSIVE" 1619 elif self._match_text_seq("SHARE"): 1620 lock_type = "SHARE" 1621 elif self._match_text_seq("READ"): 1622 lock_type = "READ" 1623 elif self._match_text_seq("WRITE"): 1624 lock_type = "WRITE" 1625 elif self._match_text_seq("CHECKSUM"): 1626 lock_type = "CHECKSUM" 1627 else: 1628 lock_type = None 1629 1630 override = self._match_text_seq("OVERRIDE") 1631 1632 return self.expression( 1633 exp.LockingProperty, 1634 this=this, 1635 kind=kind, 1636 for_or_in=for_or_in, 1637 lock_type=lock_type, 1638 override=override, 1639 ) 1640 1641 def _parse_partition_by(self) -> t.List[exp.Expression]: 1642 if self._match(TokenType.PARTITION_BY): 1643 return self._parse_csv(self._parse_conjunction) 1644 return [] 1645 1646 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1647 self._match(TokenType.EQ) 1648 return self.expression( 1649 exp.PartitionedByProperty, 1650 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1651 ) 1652 1653 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1654 if self._match_text_seq("AND", "STATISTICS"): 1655 statistics = True 1656 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1657 statistics = False 1658 else: 1659 statistics = None 1660 1661 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1662 1663 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1664 if self._match_text_seq("PRIMARY", "INDEX"): 1665 return exp.NoPrimaryIndexProperty() 1666 return None 1667 1668 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1669 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1670 return exp.OnCommitProperty() 1671 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1672 return exp.OnCommitProperty(delete=True) 1673 return None 1674 1675 def _parse_distkey(self) -> exp.DistKeyProperty: 1676 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1677 1678 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1679 table = self._parse_table(schema=True) 1680 1681 options = [] 1682 while self._match_texts(("INCLUDING", "EXCLUDING")): 1683 this = self._prev.text.upper() 1684 1685 id_var = self._parse_id_var() 1686 if not id_var: 1687 return None 1688 1689 options.append( 1690 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1691 ) 1692 1693 return self.expression(exp.LikeProperty, this=table, expressions=options) 1694 1695 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1696 return self.expression( 1697 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1698 ) 1699 1700 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1701 self._match(TokenType.EQ) 1702 return self.expression( 1703 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1704 ) 1705 1706 def _parse_returns(self) -> exp.ReturnsProperty: 1707 value: t.Optional[exp.Expression] 1708 is_table = self._match(TokenType.TABLE) 1709 1710 if is_table: 1711 if self._match(TokenType.LT): 1712 value = self.expression( 1713 exp.Schema, 1714 this="TABLE", 1715 expressions=self._parse_csv(self._parse_struct_types), 1716 ) 1717 if not self._match(TokenType.GT): 1718 self.raise_error("Expecting >") 1719 else: 1720 value = self._parse_schema(exp.var("TABLE")) 1721 else: 1722 value = self._parse_types() 1723 1724 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1725 1726 def _parse_describe(self) -> exp.Describe: 1727 kind = self._match_set(self.CREATABLES) and self._prev.text 1728 this = self._parse_table() 1729 return self.expression(exp.Describe, this=this, kind=kind) 1730 1731 def _parse_insert(self) -> exp.Insert: 1732 comments = ensure_list(self._prev_comments) 1733 overwrite = self._match(TokenType.OVERWRITE) 1734 ignore = self._match(TokenType.IGNORE) 1735 local = self._match_text_seq("LOCAL") 1736 alternative = None 1737 1738 if self._match_text_seq("DIRECTORY"): 1739 this: t.Optional[exp.Expression] = self.expression( 1740 exp.Directory, 1741 this=self._parse_var_or_string(), 1742 local=local, 1743 row_format=self._parse_row_format(match_row=True), 1744 ) 1745 else: 1746 if self._match(TokenType.OR): 1747 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1748 1749 self._match(TokenType.INTO) 1750 comments += ensure_list(self._prev_comments) 1751 self._match(TokenType.TABLE) 1752 this = self._parse_table(schema=True) 1753 1754 returning = self._parse_returning() 1755 1756 return self.expression( 1757 exp.Insert, 1758 comments=comments, 1759 this=this, 1760 exists=self._parse_exists(), 1761 partition=self._parse_partition(), 1762 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1763 and self._parse_conjunction(), 1764 expression=self._parse_ddl_select(), 1765 conflict=self._parse_on_conflict(), 1766 returning=returning or self._parse_returning(), 1767 overwrite=overwrite, 1768 alternative=alternative, 1769 ignore=ignore, 1770 ) 1771 1772 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1773 conflict = self._match_text_seq("ON", "CONFLICT") 1774 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1775 1776 if not conflict and not duplicate: 1777 return None 1778 1779 nothing = None 1780 expressions = None 1781 key = None 1782 constraint = None 1783 1784 if conflict: 1785 if self._match_text_seq("ON", "CONSTRAINT"): 1786 constraint = self._parse_id_var() 1787 else: 1788 key = self._parse_csv(self._parse_value) 1789 1790 self._match_text_seq("DO") 1791 if self._match_text_seq("NOTHING"): 1792 nothing = True 1793 else: 1794 self._match(TokenType.UPDATE) 1795 self._match(TokenType.SET) 1796 expressions = self._parse_csv(self._parse_equality) 1797 1798 return self.expression( 1799 exp.OnConflict, 1800 duplicate=duplicate, 1801 expressions=expressions, 1802 nothing=nothing, 1803 key=key, 1804 constraint=constraint, 1805 ) 1806 1807 def _parse_returning(self) -> t.Optional[exp.Returning]: 1808 if not self._match(TokenType.RETURNING): 1809 return None 1810 return self.expression( 1811 exp.Returning, 1812 expressions=self._parse_csv(self._parse_expression), 1813 into=self._match(TokenType.INTO) and self._parse_table_part(), 1814 ) 1815 1816 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1817 if not self._match(TokenType.FORMAT): 1818 return None 1819 return self._parse_row_format() 1820 1821 def _parse_row_format( 1822 self, match_row: bool = False 1823 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1824 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1825 return None 1826 1827 if self._match_text_seq("SERDE"): 1828 this = self._parse_string() 1829 1830 serde_properties = None 1831 if self._match(TokenType.SERDE_PROPERTIES): 1832 serde_properties = self.expression( 1833 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1834 ) 1835 1836 return self.expression( 1837 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1838 ) 1839 1840 self._match_text_seq("DELIMITED") 1841 1842 kwargs = {} 1843 1844 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1845 kwargs["fields"] = self._parse_string() 1846 if self._match_text_seq("ESCAPED", "BY"): 1847 kwargs["escaped"] = self._parse_string() 1848 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1849 kwargs["collection_items"] = self._parse_string() 1850 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1851 kwargs["map_keys"] = self._parse_string() 1852 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1853 kwargs["lines"] = self._parse_string() 1854 if self._match_text_seq("NULL", "DEFINED", "AS"): 1855 kwargs["null"] = self._parse_string() 1856 1857 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1858 1859 def _parse_load(self) -> exp.LoadData | exp.Command: 1860 if self._match_text_seq("DATA"): 1861 local = self._match_text_seq("LOCAL") 1862 self._match_text_seq("INPATH") 1863 inpath = self._parse_string() 1864 overwrite = self._match(TokenType.OVERWRITE) 1865 self._match_pair(TokenType.INTO, TokenType.TABLE) 1866 1867 return self.expression( 1868 exp.LoadData, 1869 this=self._parse_table(schema=True), 1870 local=local, 1871 overwrite=overwrite, 1872 inpath=inpath, 1873 partition=self._parse_partition(), 1874 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1875 serde=self._match_text_seq("SERDE") and self._parse_string(), 1876 ) 1877 return self._parse_as_command(self._prev) 1878 1879 def _parse_delete(self) -> exp.Delete: 1880 # This handles MySQL's "Multiple-Table Syntax" 1881 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1882 tables = None 1883 comments = self._prev_comments 1884 if not self._match(TokenType.FROM, advance=False): 1885 tables = self._parse_csv(self._parse_table) or None 1886 1887 returning = self._parse_returning() 1888 1889 return self.expression( 1890 exp.Delete, 1891 comments=comments, 1892 tables=tables, 1893 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1894 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1895 where=self._parse_where(), 1896 returning=returning or self._parse_returning(), 1897 limit=self._parse_limit(), 1898 ) 1899 1900 def _parse_update(self) -> exp.Update: 1901 comments = self._prev_comments 1902 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1903 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1904 returning = self._parse_returning() 1905 return self.expression( 1906 exp.Update, 1907 comments=comments, 1908 **{ # type: ignore 1909 "this": this, 1910 "expressions": expressions, 1911 "from": self._parse_from(joins=True), 1912 "where": self._parse_where(), 1913 "returning": returning or self._parse_returning(), 1914 "limit": self._parse_limit(), 1915 }, 1916 ) 1917 1918 def _parse_uncache(self) -> exp.Uncache: 1919 if not self._match(TokenType.TABLE): 1920 self.raise_error("Expecting TABLE after UNCACHE") 1921 1922 return self.expression( 1923 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1924 ) 1925 1926 def _parse_cache(self) -> exp.Cache: 1927 lazy = self._match_text_seq("LAZY") 1928 self._match(TokenType.TABLE) 1929 table = self._parse_table(schema=True) 1930 1931 options = [] 1932 if self._match_text_seq("OPTIONS"): 1933 self._match_l_paren() 1934 k = self._parse_string() 1935 self._match(TokenType.EQ) 1936 v = self._parse_string() 1937 options = [k, v] 1938 self._match_r_paren() 1939 1940 self._match(TokenType.ALIAS) 1941 return self.expression( 1942 exp.Cache, 1943 this=table, 1944 lazy=lazy, 1945 options=options, 1946 expression=self._parse_select(nested=True), 1947 ) 1948 1949 def _parse_partition(self) -> t.Optional[exp.Partition]: 1950 if not self._match(TokenType.PARTITION): 1951 return None 1952 1953 return self.expression( 1954 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1955 ) 1956 1957 def _parse_value(self) -> exp.Tuple: 1958 if self._match(TokenType.L_PAREN): 1959 expressions = self._parse_csv(self._parse_conjunction) 1960 self._match_r_paren() 1961 return self.expression(exp.Tuple, expressions=expressions) 1962 1963 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1964 # https://prestodb.io/docs/current/sql/values.html 1965 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1966 1967 def _parse_projections(self) -> t.List[exp.Expression]: 1968 return self._parse_expressions() 1969 1970 def _parse_select( 1971 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1972 ) -> t.Optional[exp.Expression]: 1973 cte = self._parse_with() 1974 if cte: 1975 this = self._parse_statement() 1976 1977 if not this: 1978 self.raise_error("Failed to parse any statement following CTE") 1979 return cte 1980 1981 if "with" in this.arg_types: 1982 this.set("with", cte) 1983 else: 1984 self.raise_error(f"{this.key} does not support CTE") 1985 this = cte 1986 elif self._match(TokenType.SELECT): 1987 comments = self._prev_comments 1988 1989 hint = self._parse_hint() 1990 all_ = self._match(TokenType.ALL) 1991 distinct = self._match_set(self.DISTINCT_TOKENS) 1992 1993 kind = ( 1994 self._match(TokenType.ALIAS) 1995 and self._match_texts(("STRUCT", "VALUE")) 1996 and self._prev.text 1997 ) 1998 1999 if distinct: 2000 distinct = self.expression( 2001 exp.Distinct, 2002 on=self._parse_value() if self._match(TokenType.ON) else None, 2003 ) 2004 2005 if all_ and distinct: 2006 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2007 2008 limit = self._parse_limit(top=True) 2009 projections = self._parse_projections() 2010 2011 this = self.expression( 2012 exp.Select, 2013 kind=kind, 2014 hint=hint, 2015 distinct=distinct, 2016 expressions=projections, 2017 limit=limit, 2018 ) 2019 this.comments = comments 2020 2021 into = self._parse_into() 2022 if into: 2023 this.set("into", into) 2024 2025 from_ = self._parse_from() 2026 if from_: 2027 this.set("from", from_) 2028 2029 this = self._parse_query_modifiers(this) 2030 elif (table or nested) and self._match(TokenType.L_PAREN): 2031 if self._match(TokenType.PIVOT): 2032 this = self._parse_simplified_pivot() 2033 elif self._match(TokenType.FROM): 2034 this = exp.select("*").from_( 2035 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2036 ) 2037 else: 2038 this = self._parse_table() if table else self._parse_select(nested=True) 2039 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2040 2041 self._match_r_paren() 2042 2043 # We return early here so that the UNION isn't attached to the subquery by the 2044 # following call to _parse_set_operations, but instead becomes the parent node 2045 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2046 elif self._match(TokenType.VALUES): 2047 this = self.expression( 2048 exp.Values, 2049 expressions=self._parse_csv(self._parse_value), 2050 alias=self._parse_table_alias(), 2051 ) 2052 else: 2053 this = None 2054 2055 return self._parse_set_operations(this) 2056 2057 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2058 if not skip_with_token and not self._match(TokenType.WITH): 2059 return None 2060 2061 comments = self._prev_comments 2062 recursive = self._match(TokenType.RECURSIVE) 2063 2064 expressions = [] 2065 while True: 2066 expressions.append(self._parse_cte()) 2067 2068 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2069 break 2070 else: 2071 self._match(TokenType.WITH) 2072 2073 return self.expression( 2074 exp.With, comments=comments, expressions=expressions, recursive=recursive 2075 ) 2076 2077 def _parse_cte(self) -> exp.CTE: 2078 alias = self._parse_table_alias() 2079 if not alias or not alias.this: 2080 self.raise_error("Expected CTE to have alias") 2081 2082 self._match(TokenType.ALIAS) 2083 return self.expression( 2084 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2085 ) 2086 2087 def _parse_table_alias( 2088 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2089 ) -> t.Optional[exp.TableAlias]: 2090 any_token = self._match(TokenType.ALIAS) 2091 alias = ( 2092 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2093 or self._parse_string_as_identifier() 2094 ) 2095 2096 index = self._index 2097 if self._match(TokenType.L_PAREN): 2098 columns = self._parse_csv(self._parse_function_parameter) 2099 self._match_r_paren() if columns else self._retreat(index) 2100 else: 2101 columns = None 2102 2103 if not alias and not columns: 2104 return None 2105 2106 return self.expression(exp.TableAlias, this=alias, columns=columns) 2107 2108 def _parse_subquery( 2109 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2110 ) -> t.Optional[exp.Subquery]: 2111 if not this: 2112 return None 2113 2114 return self.expression( 2115 exp.Subquery, 2116 this=this, 2117 pivots=self._parse_pivots(), 2118 alias=self._parse_table_alias() if parse_alias else None, 2119 ) 2120 2121 def _parse_query_modifiers( 2122 self, this: t.Optional[exp.Expression] 2123 ) -> t.Optional[exp.Expression]: 2124 if isinstance(this, self.MODIFIABLES): 2125 for join in iter(self._parse_join, None): 2126 this.append("joins", join) 2127 for lateral in iter(self._parse_lateral, None): 2128 this.append("laterals", lateral) 2129 2130 while True: 2131 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2132 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2133 key, expression = parser(self) 2134 2135 if expression: 2136 this.set(key, expression) 2137 if key == "limit": 2138 offset = expression.args.pop("offset", None) 2139 if offset: 2140 this.set("offset", exp.Offset(expression=offset)) 2141 continue 2142 break 2143 return this 2144 2145 def _parse_hint(self) -> t.Optional[exp.Hint]: 2146 if self._match(TokenType.HINT): 2147 hints = [] 2148 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2149 hints.extend(hint) 2150 2151 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2152 self.raise_error("Expected */ after HINT") 2153 2154 return self.expression(exp.Hint, expressions=hints) 2155 2156 return None 2157 2158 def _parse_into(self) -> t.Optional[exp.Into]: 2159 if not self._match(TokenType.INTO): 2160 return None 2161 2162 temp = self._match(TokenType.TEMPORARY) 2163 unlogged = self._match_text_seq("UNLOGGED") 2164 self._match(TokenType.TABLE) 2165 2166 return self.expression( 2167 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2168 ) 2169 2170 def _parse_from( 2171 self, joins: bool = False, skip_from_token: bool = False 2172 ) -> t.Optional[exp.From]: 2173 if not skip_from_token and not self._match(TokenType.FROM): 2174 return None 2175 2176 return self.expression( 2177 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2178 ) 2179 2180 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2181 if not self._match(TokenType.MATCH_RECOGNIZE): 2182 return None 2183 2184 self._match_l_paren() 2185 2186 partition = self._parse_partition_by() 2187 order = self._parse_order() 2188 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2189 2190 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2191 rows = exp.var("ONE ROW PER MATCH") 2192 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2193 text = "ALL ROWS PER MATCH" 2194 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2195 text += f" SHOW EMPTY MATCHES" 2196 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2197 text += f" OMIT EMPTY MATCHES" 2198 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2199 text += f" WITH UNMATCHED ROWS" 2200 rows = exp.var(text) 2201 else: 2202 rows = None 2203 2204 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2205 text = "AFTER MATCH SKIP" 2206 if self._match_text_seq("PAST", "LAST", "ROW"): 2207 text += f" PAST LAST ROW" 2208 elif self._match_text_seq("TO", "NEXT", "ROW"): 2209 text += f" TO NEXT ROW" 2210 elif self._match_text_seq("TO", "FIRST"): 2211 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2212 elif self._match_text_seq("TO", "LAST"): 2213 text += f" TO LAST {self._advance_any().text}" # type: ignore 2214 after = exp.var(text) 2215 else: 2216 after = None 2217 2218 if self._match_text_seq("PATTERN"): 2219 self._match_l_paren() 2220 2221 if not self._curr: 2222 self.raise_error("Expecting )", self._curr) 2223 2224 paren = 1 2225 start = self._curr 2226 2227 while self._curr and paren > 0: 2228 if self._curr.token_type == TokenType.L_PAREN: 2229 paren += 1 2230 if self._curr.token_type == TokenType.R_PAREN: 2231 paren -= 1 2232 2233 end = self._prev 2234 self._advance() 2235 2236 if paren > 0: 2237 self.raise_error("Expecting )", self._curr) 2238 2239 pattern = exp.var(self._find_sql(start, end)) 2240 else: 2241 pattern = None 2242 2243 define = ( 2244 self._parse_csv( 2245 lambda: self.expression( 2246 exp.Alias, 2247 alias=self._parse_id_var(any_token=True), 2248 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2249 ) 2250 ) 2251 if self._match_text_seq("DEFINE") 2252 else None 2253 ) 2254 2255 self._match_r_paren() 2256 2257 return self.expression( 2258 exp.MatchRecognize, 2259 partition_by=partition, 2260 order=order, 2261 measures=measures, 2262 rows=rows, 2263 after=after, 2264 pattern=pattern, 2265 define=define, 2266 alias=self._parse_table_alias(), 2267 ) 2268 2269 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2270 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2271 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2272 2273 if outer_apply or cross_apply: 2274 this = self._parse_select(table=True) 2275 view = None 2276 outer = not cross_apply 2277 elif self._match(TokenType.LATERAL): 2278 this = self._parse_select(table=True) 2279 view = self._match(TokenType.VIEW) 2280 outer = self._match(TokenType.OUTER) 2281 else: 2282 return None 2283 2284 if not this: 2285 this = ( 2286 self._parse_unnest() 2287 or self._parse_function() 2288 or self._parse_id_var(any_token=False) 2289 ) 2290 2291 while self._match(TokenType.DOT): 2292 this = exp.Dot( 2293 this=this, 2294 expression=self._parse_function() or self._parse_id_var(any_token=False), 2295 ) 2296 2297 if view: 2298 table = self._parse_id_var(any_token=False) 2299 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2300 table_alias: t.Optional[exp.TableAlias] = self.expression( 2301 exp.TableAlias, this=table, columns=columns 2302 ) 2303 elif isinstance(this, exp.Subquery) and this.alias: 2304 # Ensures parity between the Subquery's and the Lateral's "alias" args 2305 table_alias = this.args["alias"].copy() 2306 else: 2307 table_alias = self._parse_table_alias() 2308 2309 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2310 2311 def _parse_join_parts( 2312 self, 2313 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2314 return ( 2315 self._match_set(self.JOIN_METHODS) and self._prev, 2316 self._match_set(self.JOIN_SIDES) and self._prev, 2317 self._match_set(self.JOIN_KINDS) and self._prev, 2318 ) 2319 2320 def _parse_join( 2321 self, skip_join_token: bool = False, parse_bracket: bool = False 2322 ) -> t.Optional[exp.Join]: 2323 if self._match(TokenType.COMMA): 2324 return self.expression(exp.Join, this=self._parse_table()) 2325 2326 index = self._index 2327 method, side, kind = self._parse_join_parts() 2328 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2329 join = self._match(TokenType.JOIN) 2330 2331 if not skip_join_token and not join: 2332 self._retreat(index) 2333 kind = None 2334 method = None 2335 side = None 2336 2337 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2338 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2339 2340 if not skip_join_token and not join and not outer_apply and not cross_apply: 2341 return None 2342 2343 if outer_apply: 2344 side = Token(TokenType.LEFT, "LEFT") 2345 2346 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2347 2348 if method: 2349 kwargs["method"] = method.text 2350 if side: 2351 kwargs["side"] = side.text 2352 if kind: 2353 kwargs["kind"] = kind.text 2354 if hint: 2355 kwargs["hint"] = hint 2356 2357 if self._match(TokenType.ON): 2358 kwargs["on"] = self._parse_conjunction() 2359 elif self._match(TokenType.USING): 2360 kwargs["using"] = self._parse_wrapped_id_vars() 2361 elif not (kind and kind.token_type == TokenType.CROSS): 2362 index = self._index 2363 joins = self._parse_joins() 2364 2365 if joins and self._match(TokenType.ON): 2366 kwargs["on"] = self._parse_conjunction() 2367 elif joins and self._match(TokenType.USING): 2368 kwargs["using"] = self._parse_wrapped_id_vars() 2369 else: 2370 joins = None 2371 self._retreat(index) 2372 2373 kwargs["this"].set("joins", joins) 2374 2375 comments = [c for token in (method, side, kind) if token for c in token.comments] 2376 return self.expression(exp.Join, comments=comments, **kwargs) 2377 2378 def _parse_index( 2379 self, 2380 index: t.Optional[exp.Expression] = None, 2381 ) -> t.Optional[exp.Index]: 2382 if index: 2383 unique = None 2384 primary = None 2385 amp = None 2386 2387 self._match(TokenType.ON) 2388 self._match(TokenType.TABLE) # hive 2389 table = self._parse_table_parts(schema=True) 2390 else: 2391 unique = self._match(TokenType.UNIQUE) 2392 primary = self._match_text_seq("PRIMARY") 2393 amp = self._match_text_seq("AMP") 2394 2395 if not self._match(TokenType.INDEX): 2396 return None 2397 2398 index = self._parse_id_var() 2399 table = None 2400 2401 using = self._parse_field() if self._match(TokenType.USING) else None 2402 2403 if self._match(TokenType.L_PAREN, advance=False): 2404 columns = self._parse_wrapped_csv(self._parse_ordered) 2405 else: 2406 columns = None 2407 2408 return self.expression( 2409 exp.Index, 2410 this=index, 2411 table=table, 2412 using=using, 2413 columns=columns, 2414 unique=unique, 2415 primary=primary, 2416 amp=amp, 2417 partition_by=self._parse_partition_by(), 2418 ) 2419 2420 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2421 hints: t.List[exp.Expression] = [] 2422 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2423 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2424 hints.append( 2425 self.expression( 2426 exp.WithTableHint, 2427 expressions=self._parse_csv( 2428 lambda: self._parse_function() or self._parse_var(any_token=True) 2429 ), 2430 ) 2431 ) 2432 self._match_r_paren() 2433 else: 2434 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2435 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2436 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2437 2438 self._match_texts({"INDEX", "KEY"}) 2439 if self._match(TokenType.FOR): 2440 hint.set("target", self._advance_any() and self._prev.text.upper()) 2441 2442 hint.set("expressions", self._parse_wrapped_id_vars()) 2443 hints.append(hint) 2444 2445 return hints or None 2446 2447 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2448 return ( 2449 (not schema and self._parse_function(optional_parens=False)) 2450 or self._parse_id_var(any_token=False) 2451 or self._parse_string_as_identifier() 2452 or self._parse_placeholder() 2453 ) 2454 2455 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2456 catalog = None 2457 db = None 2458 table = self._parse_table_part(schema=schema) 2459 2460 while self._match(TokenType.DOT): 2461 if catalog: 2462 # This allows nesting the table in arbitrarily many dot expressions if needed 2463 table = self.expression( 2464 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2465 ) 2466 else: 2467 catalog = db 2468 db = table 2469 table = self._parse_table_part(schema=schema) 2470 2471 if not table: 2472 self.raise_error(f"Expected table name but got {self._curr}") 2473 2474 return self.expression( 2475 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2476 ) 2477 2478 def _parse_table( 2479 self, 2480 schema: bool = False, 2481 joins: bool = False, 2482 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2483 parse_bracket: bool = False, 2484 ) -> t.Optional[exp.Expression]: 2485 lateral = self._parse_lateral() 2486 if lateral: 2487 return lateral 2488 2489 unnest = self._parse_unnest() 2490 if unnest: 2491 return unnest 2492 2493 values = self._parse_derived_table_values() 2494 if values: 2495 return values 2496 2497 subquery = self._parse_select(table=True) 2498 if subquery: 2499 if not subquery.args.get("pivots"): 2500 subquery.set("pivots", self._parse_pivots()) 2501 return subquery 2502 2503 bracket = parse_bracket and self._parse_bracket(None) 2504 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2505 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2506 2507 if schema: 2508 return self._parse_schema(this=this) 2509 2510 if self.ALIAS_POST_TABLESAMPLE: 2511 table_sample = self._parse_table_sample() 2512 2513 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2514 if alias: 2515 this.set("alias", alias) 2516 2517 if not this.args.get("pivots"): 2518 this.set("pivots", self._parse_pivots()) 2519 2520 this.set("hints", self._parse_table_hints()) 2521 2522 if not self.ALIAS_POST_TABLESAMPLE: 2523 table_sample = self._parse_table_sample() 2524 2525 if table_sample: 2526 table_sample.set("this", this) 2527 this = table_sample 2528 2529 if joins: 2530 for join in iter(self._parse_join, None): 2531 this.append("joins", join) 2532 2533 return this 2534 2535 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2536 if not self._match(TokenType.UNNEST): 2537 return None 2538 2539 expressions = self._parse_wrapped_csv(self._parse_type) 2540 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2541 2542 alias = self._parse_table_alias() if with_alias else None 2543 2544 if alias and self.UNNEST_COLUMN_ONLY: 2545 if alias.args.get("columns"): 2546 self.raise_error("Unexpected extra column alias in unnest.") 2547 2548 alias.set("columns", [alias.this]) 2549 alias.set("this", None) 2550 2551 offset = None 2552 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2553 self._match(TokenType.ALIAS) 2554 offset = self._parse_id_var() or exp.to_identifier("offset") 2555 2556 return self.expression( 2557 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2558 ) 2559 2560 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2561 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2562 if not is_derived and not self._match(TokenType.VALUES): 2563 return None 2564 2565 expressions = self._parse_csv(self._parse_value) 2566 alias = self._parse_table_alias() 2567 2568 if is_derived: 2569 self._match_r_paren() 2570 2571 return self.expression( 2572 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2573 ) 2574 2575 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2576 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2577 as_modifier and self._match_text_seq("USING", "SAMPLE") 2578 ): 2579 return None 2580 2581 bucket_numerator = None 2582 bucket_denominator = None 2583 bucket_field = None 2584 percent = None 2585 rows = None 2586 size = None 2587 seed = None 2588 2589 kind = ( 2590 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2591 ) 2592 method = self._parse_var(tokens=(TokenType.ROW,)) 2593 2594 self._match(TokenType.L_PAREN) 2595 2596 num = self._parse_number() 2597 2598 if self._match_text_seq("BUCKET"): 2599 bucket_numerator = self._parse_number() 2600 self._match_text_seq("OUT", "OF") 2601 bucket_denominator = bucket_denominator = self._parse_number() 2602 self._match(TokenType.ON) 2603 bucket_field = self._parse_field() 2604 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2605 percent = num 2606 elif self._match(TokenType.ROWS): 2607 rows = num 2608 else: 2609 size = num 2610 2611 self._match(TokenType.R_PAREN) 2612 2613 if self._match(TokenType.L_PAREN): 2614 method = self._parse_var() 2615 seed = self._match(TokenType.COMMA) and self._parse_number() 2616 self._match_r_paren() 2617 elif self._match_texts(("SEED", "REPEATABLE")): 2618 seed = self._parse_wrapped(self._parse_number) 2619 2620 return self.expression( 2621 exp.TableSample, 2622 method=method, 2623 bucket_numerator=bucket_numerator, 2624 bucket_denominator=bucket_denominator, 2625 bucket_field=bucket_field, 2626 percent=percent, 2627 rows=rows, 2628 size=size, 2629 seed=seed, 2630 kind=kind, 2631 ) 2632 2633 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2634 return list(iter(self._parse_pivot, None)) or None 2635 2636 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2637 return list(iter(self._parse_join, None)) or None 2638 2639 # https://duckdb.org/docs/sql/statements/pivot 2640 def _parse_simplified_pivot(self) -> exp.Pivot: 2641 def _parse_on() -> t.Optional[exp.Expression]: 2642 this = self._parse_bitwise() 2643 return self._parse_in(this) if self._match(TokenType.IN) else this 2644 2645 this = self._parse_table() 2646 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2647 using = self._match(TokenType.USING) and self._parse_csv( 2648 lambda: self._parse_alias(self._parse_function()) 2649 ) 2650 group = self._parse_group() 2651 return self.expression( 2652 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2653 ) 2654 2655 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2656 index = self._index 2657 include_nulls = None 2658 2659 if self._match(TokenType.PIVOT): 2660 unpivot = False 2661 elif self._match(TokenType.UNPIVOT): 2662 unpivot = True 2663 2664 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2665 if self._match_text_seq("INCLUDE", "NULLS"): 2666 include_nulls = True 2667 elif self._match_text_seq("EXCLUDE", "NULLS"): 2668 include_nulls = False 2669 else: 2670 return None 2671 2672 expressions = [] 2673 field = None 2674 2675 if not self._match(TokenType.L_PAREN): 2676 self._retreat(index) 2677 return None 2678 2679 if unpivot: 2680 expressions = self._parse_csv(self._parse_column) 2681 else: 2682 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2683 2684 if not expressions: 2685 self.raise_error("Failed to parse PIVOT's aggregation list") 2686 2687 if not self._match(TokenType.FOR): 2688 self.raise_error("Expecting FOR") 2689 2690 value = self._parse_column() 2691 2692 if not self._match(TokenType.IN): 2693 self.raise_error("Expecting IN") 2694 2695 field = self._parse_in(value, alias=True) 2696 2697 self._match_r_paren() 2698 2699 pivot = self.expression( 2700 exp.Pivot, 2701 expressions=expressions, 2702 field=field, 2703 unpivot=unpivot, 2704 include_nulls=include_nulls, 2705 ) 2706 2707 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2708 pivot.set("alias", self._parse_table_alias()) 2709 2710 if not unpivot: 2711 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2712 2713 columns: t.List[exp.Expression] = [] 2714 for fld in pivot.args["field"].expressions: 2715 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2716 for name in names: 2717 if self.PREFIXED_PIVOT_COLUMNS: 2718 name = f"{name}_{field_name}" if name else field_name 2719 else: 2720 name = f"{field_name}_{name}" if name else field_name 2721 2722 columns.append(exp.to_identifier(name)) 2723 2724 pivot.set("columns", columns) 2725 2726 return pivot 2727 2728 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2729 return [agg.alias for agg in aggregations] 2730 2731 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2732 if not skip_where_token and not self._match(TokenType.WHERE): 2733 return None 2734 2735 return self.expression( 2736 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2737 ) 2738 2739 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2740 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2741 return None 2742 2743 elements = defaultdict(list) 2744 2745 if self._match(TokenType.ALL): 2746 return self.expression(exp.Group, all=True) 2747 2748 while True: 2749 expressions = self._parse_csv(self._parse_conjunction) 2750 if expressions: 2751 elements["expressions"].extend(expressions) 2752 2753 grouping_sets = self._parse_grouping_sets() 2754 if grouping_sets: 2755 elements["grouping_sets"].extend(grouping_sets) 2756 2757 rollup = None 2758 cube = None 2759 totals = None 2760 2761 with_ = self._match(TokenType.WITH) 2762 if self._match(TokenType.ROLLUP): 2763 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2764 elements["rollup"].extend(ensure_list(rollup)) 2765 2766 if self._match(TokenType.CUBE): 2767 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2768 elements["cube"].extend(ensure_list(cube)) 2769 2770 if self._match_text_seq("TOTALS"): 2771 totals = True 2772 elements["totals"] = True # type: ignore 2773 2774 if not (grouping_sets or rollup or cube or totals): 2775 break 2776 2777 return self.expression(exp.Group, **elements) # type: ignore 2778 2779 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2780 if not self._match(TokenType.GROUPING_SETS): 2781 return None 2782 2783 return self._parse_wrapped_csv(self._parse_grouping_set) 2784 2785 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2786 if self._match(TokenType.L_PAREN): 2787 grouping_set = self._parse_csv(self._parse_column) 2788 self._match_r_paren() 2789 return self.expression(exp.Tuple, expressions=grouping_set) 2790 2791 return self._parse_column() 2792 2793 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2794 if not skip_having_token and not self._match(TokenType.HAVING): 2795 return None 2796 return self.expression(exp.Having, this=self._parse_conjunction()) 2797 2798 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2799 if not self._match(TokenType.QUALIFY): 2800 return None 2801 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2802 2803 def _parse_order( 2804 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2805 ) -> t.Optional[exp.Expression]: 2806 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2807 return this 2808 2809 return self.expression( 2810 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2811 ) 2812 2813 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2814 if not self._match(token): 2815 return None 2816 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2817 2818 def _parse_ordered(self) -> exp.Ordered: 2819 this = self._parse_conjunction() 2820 self._match(TokenType.ASC) 2821 2822 is_desc = self._match(TokenType.DESC) 2823 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2824 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2825 desc = is_desc or False 2826 asc = not desc 2827 nulls_first = is_nulls_first or False 2828 explicitly_null_ordered = is_nulls_first or is_nulls_last 2829 2830 if ( 2831 not explicitly_null_ordered 2832 and ( 2833 (asc and self.NULL_ORDERING == "nulls_are_small") 2834 or (desc and self.NULL_ORDERING != "nulls_are_small") 2835 ) 2836 and self.NULL_ORDERING != "nulls_are_last" 2837 ): 2838 nulls_first = True 2839 2840 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2841 2842 def _parse_limit( 2843 self, this: t.Optional[exp.Expression] = None, top: bool = False 2844 ) -> t.Optional[exp.Expression]: 2845 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2846 comments = self._prev_comments 2847 if top: 2848 limit_paren = self._match(TokenType.L_PAREN) 2849 expression = self._parse_number() 2850 2851 if limit_paren: 2852 self._match_r_paren() 2853 else: 2854 expression = self._parse_term() 2855 2856 if self._match(TokenType.COMMA): 2857 offset = expression 2858 expression = self._parse_term() 2859 else: 2860 offset = None 2861 2862 limit_exp = self.expression( 2863 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2864 ) 2865 2866 return limit_exp 2867 2868 if self._match(TokenType.FETCH): 2869 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2870 direction = self._prev.text if direction else "FIRST" 2871 2872 count = self._parse_number() 2873 percent = self._match(TokenType.PERCENT) 2874 2875 self._match_set((TokenType.ROW, TokenType.ROWS)) 2876 2877 only = self._match_text_seq("ONLY") 2878 with_ties = self._match_text_seq("WITH", "TIES") 2879 2880 if only and with_ties: 2881 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2882 2883 return self.expression( 2884 exp.Fetch, 2885 direction=direction, 2886 count=count, 2887 percent=percent, 2888 with_ties=with_ties, 2889 ) 2890 2891 return this 2892 2893 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2894 if not self._match(TokenType.OFFSET): 2895 return this 2896 2897 count = self._parse_term() 2898 self._match_set((TokenType.ROW, TokenType.ROWS)) 2899 return self.expression(exp.Offset, this=this, expression=count) 2900 2901 def _parse_locks(self) -> t.List[exp.Lock]: 2902 locks = [] 2903 while True: 2904 if self._match_text_seq("FOR", "UPDATE"): 2905 update = True 2906 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2907 "LOCK", "IN", "SHARE", "MODE" 2908 ): 2909 update = False 2910 else: 2911 break 2912 2913 expressions = None 2914 if self._match_text_seq("OF"): 2915 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2916 2917 wait: t.Optional[bool | exp.Expression] = None 2918 if self._match_text_seq("NOWAIT"): 2919 wait = True 2920 elif self._match_text_seq("WAIT"): 2921 wait = self._parse_primary() 2922 elif self._match_text_seq("SKIP", "LOCKED"): 2923 wait = False 2924 2925 locks.append( 2926 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2927 ) 2928 2929 return locks 2930 2931 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2932 if not self._match_set(self.SET_OPERATIONS): 2933 return this 2934 2935 token_type = self._prev.token_type 2936 2937 if token_type == TokenType.UNION: 2938 expression = exp.Union 2939 elif token_type == TokenType.EXCEPT: 2940 expression = exp.Except 2941 else: 2942 expression = exp.Intersect 2943 2944 return self.expression( 2945 expression, 2946 this=this, 2947 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2948 expression=self._parse_set_operations(self._parse_select(nested=True)), 2949 ) 2950 2951 def _parse_expression(self) -> t.Optional[exp.Expression]: 2952 return self._parse_alias(self._parse_conjunction()) 2953 2954 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2955 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2956 2957 def _parse_equality(self) -> t.Optional[exp.Expression]: 2958 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2959 2960 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2961 return self._parse_tokens(self._parse_range, self.COMPARISON) 2962 2963 def _parse_range(self) -> t.Optional[exp.Expression]: 2964 this = self._parse_bitwise() 2965 negate = self._match(TokenType.NOT) 2966 2967 if self._match_set(self.RANGE_PARSERS): 2968 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2969 if not expression: 2970 return this 2971 2972 this = expression 2973 elif self._match(TokenType.ISNULL): 2974 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2975 2976 # Postgres supports ISNULL and NOTNULL for conditions. 2977 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2978 if self._match(TokenType.NOTNULL): 2979 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2980 this = self.expression(exp.Not, this=this) 2981 2982 if negate: 2983 this = self.expression(exp.Not, this=this) 2984 2985 if self._match(TokenType.IS): 2986 this = self._parse_is(this) 2987 2988 return this 2989 2990 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2991 index = self._index - 1 2992 negate = self._match(TokenType.NOT) 2993 2994 if self._match_text_seq("DISTINCT", "FROM"): 2995 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2996 return self.expression(klass, this=this, expression=self._parse_expression()) 2997 2998 expression = self._parse_null() or self._parse_boolean() 2999 if not expression: 3000 self._retreat(index) 3001 return None 3002 3003 this = self.expression(exp.Is, this=this, expression=expression) 3004 return self.expression(exp.Not, this=this) if negate else this 3005 3006 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3007 unnest = self._parse_unnest(with_alias=False) 3008 if unnest: 3009 this = self.expression(exp.In, this=this, unnest=unnest) 3010 elif self._match(TokenType.L_PAREN): 3011 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3012 3013 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3014 this = self.expression(exp.In, this=this, query=expressions[0]) 3015 else: 3016 this = self.expression(exp.In, this=this, expressions=expressions) 3017 3018 self._match_r_paren(this) 3019 else: 3020 this = self.expression(exp.In, this=this, field=self._parse_field()) 3021 3022 return this 3023 3024 def _parse_between(self, this: exp.Expression) -> exp.Between: 3025 low = self._parse_bitwise() 3026 self._match(TokenType.AND) 3027 high = self._parse_bitwise() 3028 return self.expression(exp.Between, this=this, low=low, high=high) 3029 3030 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3031 if not self._match(TokenType.ESCAPE): 3032 return this 3033 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3034 3035 def _parse_interval(self) -> t.Optional[exp.Interval]: 3036 index = self._index 3037 3038 if not self._match(TokenType.INTERVAL): 3039 return None 3040 3041 if self._match(TokenType.STRING, advance=False): 3042 this = self._parse_primary() 3043 else: 3044 this = self._parse_term() 3045 3046 if not this: 3047 self._retreat(index) 3048 return None 3049 3050 unit = self._parse_function() or self._parse_var() 3051 3052 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3053 # each INTERVAL expression into this canonical form so it's easy to transpile 3054 if this and this.is_number: 3055 this = exp.Literal.string(this.name) 3056 elif this and this.is_string: 3057 parts = this.name.split() 3058 3059 if len(parts) == 2: 3060 if unit: 3061 # this is not actually a unit, it's something else 3062 unit = None 3063 self._retreat(self._index - 1) 3064 else: 3065 this = exp.Literal.string(parts[0]) 3066 unit = self.expression(exp.Var, this=parts[1]) 3067 3068 return self.expression(exp.Interval, this=this, unit=unit) 3069 3070 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3071 this = self._parse_term() 3072 3073 while True: 3074 if self._match_set(self.BITWISE): 3075 this = self.expression( 3076 self.BITWISE[self._prev.token_type], 3077 this=this, 3078 expression=self._parse_term(), 3079 ) 3080 elif self._match(TokenType.DQMARK): 3081 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3082 elif self._match_pair(TokenType.LT, TokenType.LT): 3083 this = self.expression( 3084 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3085 ) 3086 elif self._match_pair(TokenType.GT, TokenType.GT): 3087 this = self.expression( 3088 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3089 ) 3090 else: 3091 break 3092 3093 return this 3094 3095 def _parse_term(self) -> t.Optional[exp.Expression]: 3096 return self._parse_tokens(self._parse_factor, self.TERM) 3097 3098 def _parse_factor(self) -> t.Optional[exp.Expression]: 3099 return self._parse_tokens(self._parse_unary, self.FACTOR) 3100 3101 def _parse_unary(self) -> t.Optional[exp.Expression]: 3102 if self._match_set(self.UNARY_PARSERS): 3103 return self.UNARY_PARSERS[self._prev.token_type](self) 3104 return self._parse_at_time_zone(self._parse_type()) 3105 3106 def _parse_type(self) -> t.Optional[exp.Expression]: 3107 interval = self._parse_interval() 3108 if interval: 3109 return interval 3110 3111 index = self._index 3112 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3113 this = self._parse_column() 3114 3115 if data_type: 3116 if isinstance(this, exp.Literal): 3117 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3118 if parser: 3119 return parser(self, this, data_type) 3120 return self.expression(exp.Cast, this=this, to=data_type) 3121 if not data_type.expressions: 3122 self._retreat(index) 3123 return self._parse_column() 3124 return self._parse_column_ops(data_type) 3125 3126 return this 3127 3128 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3129 this = self._parse_type() 3130 if not this: 3131 return None 3132 3133 return self.expression( 3134 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3135 ) 3136 3137 def _parse_types( 3138 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3139 ) -> t.Optional[exp.Expression]: 3140 index = self._index 3141 3142 prefix = self._match_text_seq("SYSUDTLIB", ".") 3143 3144 if not self._match_set(self.TYPE_TOKENS): 3145 identifier = allow_identifiers and self._parse_id_var( 3146 any_token=False, tokens=(TokenType.VAR,) 3147 ) 3148 3149 if identifier: 3150 tokens = self._tokenizer.tokenize(identifier.name) 3151 3152 if len(tokens) != 1: 3153 self.raise_error("Unexpected identifier", self._prev) 3154 3155 if tokens[0].token_type in self.TYPE_TOKENS: 3156 self._prev = tokens[0] 3157 elif self.SUPPORTS_USER_DEFINED_TYPES: 3158 return identifier 3159 else: 3160 return None 3161 else: 3162 return None 3163 3164 type_token = self._prev.token_type 3165 3166 if type_token == TokenType.PSEUDO_TYPE: 3167 return self.expression(exp.PseudoType, this=self._prev.text) 3168 3169 nested = type_token in self.NESTED_TYPE_TOKENS 3170 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3171 expressions = None 3172 maybe_func = False 3173 3174 if self._match(TokenType.L_PAREN): 3175 if is_struct: 3176 expressions = self._parse_csv(self._parse_struct_types) 3177 elif nested: 3178 expressions = self._parse_csv( 3179 lambda: self._parse_types( 3180 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3181 ) 3182 ) 3183 elif type_token in self.ENUM_TYPE_TOKENS: 3184 expressions = self._parse_csv(self._parse_equality) 3185 else: 3186 expressions = self._parse_csv(self._parse_type_size) 3187 3188 if not expressions or not self._match(TokenType.R_PAREN): 3189 self._retreat(index) 3190 return None 3191 3192 maybe_func = True 3193 3194 this: t.Optional[exp.Expression] = None 3195 values: t.Optional[t.List[exp.Expression]] = None 3196 3197 if nested and self._match(TokenType.LT): 3198 if is_struct: 3199 expressions = self._parse_csv(self._parse_struct_types) 3200 else: 3201 expressions = self._parse_csv( 3202 lambda: self._parse_types( 3203 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3204 ) 3205 ) 3206 3207 if not self._match(TokenType.GT): 3208 self.raise_error("Expecting >") 3209 3210 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3211 values = self._parse_csv(self._parse_conjunction) 3212 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3213 3214 if type_token in self.TIMESTAMPS: 3215 if self._match_text_seq("WITH", "TIME", "ZONE"): 3216 maybe_func = False 3217 tz_type = ( 3218 exp.DataType.Type.TIMETZ 3219 if type_token in self.TIMES 3220 else exp.DataType.Type.TIMESTAMPTZ 3221 ) 3222 this = exp.DataType(this=tz_type, expressions=expressions) 3223 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3224 maybe_func = False 3225 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3226 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3227 maybe_func = False 3228 elif type_token == TokenType.INTERVAL: 3229 if self._match_text_seq("YEAR", "TO", "MONTH"): 3230 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3231 elif self._match_text_seq("DAY", "TO", "SECOND"): 3232 span = [exp.IntervalDayToSecondSpan()] 3233 else: 3234 span = None 3235 3236 unit = not span and self._parse_var() 3237 if not unit: 3238 this = self.expression( 3239 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3240 ) 3241 else: 3242 this = self.expression(exp.Interval, unit=unit) 3243 3244 if maybe_func and check_func: 3245 index2 = self._index 3246 peek = self._parse_string() 3247 3248 if not peek: 3249 self._retreat(index) 3250 return None 3251 3252 self._retreat(index2) 3253 3254 if not this: 3255 this = exp.DataType( 3256 this=exp.DataType.Type[type_token.value], 3257 expressions=expressions, 3258 nested=nested, 3259 values=values, 3260 prefix=prefix, 3261 ) 3262 3263 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3264 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3265 3266 return this 3267 3268 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3269 this = self._parse_type() or self._parse_id_var() 3270 self._match(TokenType.COLON) 3271 return self._parse_column_def(this) 3272 3273 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3274 if not self._match_text_seq("AT", "TIME", "ZONE"): 3275 return this 3276 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3277 3278 def _parse_column(self) -> t.Optional[exp.Expression]: 3279 this = self._parse_field() 3280 if isinstance(this, exp.Identifier): 3281 this = self.expression(exp.Column, this=this) 3282 elif not this: 3283 return self._parse_bracket(this) 3284 return self._parse_column_ops(this) 3285 3286 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3287 this = self._parse_bracket(this) 3288 3289 while self._match_set(self.COLUMN_OPERATORS): 3290 op_token = self._prev.token_type 3291 op = self.COLUMN_OPERATORS.get(op_token) 3292 3293 if op_token == TokenType.DCOLON: 3294 field = self._parse_types() 3295 if not field: 3296 self.raise_error("Expected type") 3297 elif op and self._curr: 3298 self._advance() 3299 value = self._prev.text 3300 field = ( 3301 exp.Literal.number(value) 3302 if self._prev.token_type == TokenType.NUMBER 3303 else exp.Literal.string(value) 3304 ) 3305 else: 3306 field = self._parse_field(anonymous_func=True, any_token=True) 3307 3308 if isinstance(field, exp.Func): 3309 # bigquery allows function calls like x.y.count(...) 3310 # SAFE.SUBSTR(...) 3311 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3312 this = self._replace_columns_with_dots(this) 3313 3314 if op: 3315 this = op(self, this, field) 3316 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3317 this = self.expression( 3318 exp.Column, 3319 this=field, 3320 table=this.this, 3321 db=this.args.get("table"), 3322 catalog=this.args.get("db"), 3323 ) 3324 else: 3325 this = self.expression(exp.Dot, this=this, expression=field) 3326 this = self._parse_bracket(this) 3327 return this 3328 3329 def _parse_primary(self) -> t.Optional[exp.Expression]: 3330 if self._match_set(self.PRIMARY_PARSERS): 3331 token_type = self._prev.token_type 3332 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3333 3334 if token_type == TokenType.STRING: 3335 expressions = [primary] 3336 while self._match(TokenType.STRING): 3337 expressions.append(exp.Literal.string(self._prev.text)) 3338 3339 if len(expressions) > 1: 3340 return self.expression(exp.Concat, expressions=expressions) 3341 3342 return primary 3343 3344 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3345 return exp.Literal.number(f"0.{self._prev.text}") 3346 3347 if self._match(TokenType.L_PAREN): 3348 comments = self._prev_comments 3349 query = self._parse_select() 3350 3351 if query: 3352 expressions = [query] 3353 else: 3354 expressions = self._parse_expressions() 3355 3356 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3357 3358 if isinstance(this, exp.Subqueryable): 3359 this = self._parse_set_operations( 3360 self._parse_subquery(this=this, parse_alias=False) 3361 ) 3362 elif len(expressions) > 1: 3363 this = self.expression(exp.Tuple, expressions=expressions) 3364 else: 3365 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3366 3367 if this: 3368 this.add_comments(comments) 3369 3370 self._match_r_paren(expression=this) 3371 return this 3372 3373 return None 3374 3375 def _parse_field( 3376 self, 3377 any_token: bool = False, 3378 tokens: t.Optional[t.Collection[TokenType]] = None, 3379 anonymous_func: bool = False, 3380 ) -> t.Optional[exp.Expression]: 3381 return ( 3382 self._parse_primary() 3383 or self._parse_function(anonymous=anonymous_func) 3384 or self._parse_id_var(any_token=any_token, tokens=tokens) 3385 ) 3386 3387 def _parse_function( 3388 self, 3389 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3390 anonymous: bool = False, 3391 optional_parens: bool = True, 3392 ) -> t.Optional[exp.Expression]: 3393 if not self._curr: 3394 return None 3395 3396 token_type = self._curr.token_type 3397 this = self._curr.text 3398 upper = this.upper() 3399 3400 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3401 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3402 self._advance() 3403 return parser(self) 3404 3405 if not self._next or self._next.token_type != TokenType.L_PAREN: 3406 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3407 self._advance() 3408 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3409 3410 return None 3411 3412 if token_type not in self.FUNC_TOKENS: 3413 return None 3414 3415 self._advance(2) 3416 3417 parser = self.FUNCTION_PARSERS.get(upper) 3418 if parser and not anonymous: 3419 this = parser(self) 3420 else: 3421 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3422 3423 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3424 this = self.expression(subquery_predicate, this=self._parse_select()) 3425 self._match_r_paren() 3426 return this 3427 3428 if functions is None: 3429 functions = self.FUNCTIONS 3430 3431 function = functions.get(upper) 3432 3433 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3434 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3435 3436 if function and not anonymous: 3437 func = self.validate_expression(function(args), args) 3438 if not self.NORMALIZE_FUNCTIONS: 3439 func.meta["name"] = this 3440 this = func 3441 else: 3442 this = self.expression(exp.Anonymous, this=this, expressions=args) 3443 3444 self._match_r_paren(this) 3445 return self._parse_window(this) 3446 3447 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3448 return self._parse_column_def(self._parse_id_var()) 3449 3450 def _parse_user_defined_function( 3451 self, kind: t.Optional[TokenType] = None 3452 ) -> t.Optional[exp.Expression]: 3453 this = self._parse_id_var() 3454 3455 while self._match(TokenType.DOT): 3456 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3457 3458 if not self._match(TokenType.L_PAREN): 3459 return this 3460 3461 expressions = self._parse_csv(self._parse_function_parameter) 3462 self._match_r_paren() 3463 return self.expression( 3464 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3465 ) 3466 3467 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3468 literal = self._parse_primary() 3469 if literal: 3470 return self.expression(exp.Introducer, this=token.text, expression=literal) 3471 3472 return self.expression(exp.Identifier, this=token.text) 3473 3474 def _parse_session_parameter(self) -> exp.SessionParameter: 3475 kind = None 3476 this = self._parse_id_var() or self._parse_primary() 3477 3478 if this and self._match(TokenType.DOT): 3479 kind = this.name 3480 this = self._parse_var() or self._parse_primary() 3481 3482 return self.expression(exp.SessionParameter, this=this, kind=kind) 3483 3484 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3485 index = self._index 3486 3487 if self._match(TokenType.L_PAREN): 3488 expressions = t.cast( 3489 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3490 ) 3491 3492 if not self._match(TokenType.R_PAREN): 3493 self._retreat(index) 3494 else: 3495 expressions = [self._parse_id_var()] 3496 3497 if self._match_set(self.LAMBDAS): 3498 return self.LAMBDAS[self._prev.token_type](self, expressions) 3499 3500 self._retreat(index) 3501 3502 this: t.Optional[exp.Expression] 3503 3504 if self._match(TokenType.DISTINCT): 3505 this = self.expression( 3506 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3507 ) 3508 else: 3509 this = self._parse_select_or_expression(alias=alias) 3510 3511 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3512 3513 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3514 index = self._index 3515 3516 if not self.errors: 3517 try: 3518 if self._parse_select(nested=True): 3519 return this 3520 except ParseError: 3521 pass 3522 finally: 3523 self.errors.clear() 3524 self._retreat(index) 3525 3526 if not self._match(TokenType.L_PAREN): 3527 return this 3528 3529 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3530 3531 self._match_r_paren() 3532 return self.expression(exp.Schema, this=this, expressions=args) 3533 3534 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3535 return self._parse_column_def(self._parse_field(any_token=True)) 3536 3537 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3538 # column defs are not really columns, they're identifiers 3539 if isinstance(this, exp.Column): 3540 this = this.this 3541 3542 kind = self._parse_types(schema=True) 3543 3544 if self._match_text_seq("FOR", "ORDINALITY"): 3545 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3546 3547 constraints: t.List[exp.Expression] = [] 3548 3549 if not kind and self._match(TokenType.ALIAS): 3550 constraints.append( 3551 self.expression( 3552 exp.ComputedColumnConstraint, 3553 this=self._parse_conjunction(), 3554 persisted=self._match_text_seq("PERSISTED"), 3555 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3556 ) 3557 ) 3558 3559 while True: 3560 constraint = self._parse_column_constraint() 3561 if not constraint: 3562 break 3563 constraints.append(constraint) 3564 3565 if not kind and not constraints: 3566 return this 3567 3568 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3569 3570 def _parse_auto_increment( 3571 self, 3572 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3573 start = None 3574 increment = None 3575 3576 if self._match(TokenType.L_PAREN, advance=False): 3577 args = self._parse_wrapped_csv(self._parse_bitwise) 3578 start = seq_get(args, 0) 3579 increment = seq_get(args, 1) 3580 elif self._match_text_seq("START"): 3581 start = self._parse_bitwise() 3582 self._match_text_seq("INCREMENT") 3583 increment = self._parse_bitwise() 3584 3585 if start and increment: 3586 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3587 3588 return exp.AutoIncrementColumnConstraint() 3589 3590 def _parse_compress(self) -> exp.CompressColumnConstraint: 3591 if self._match(TokenType.L_PAREN, advance=False): 3592 return self.expression( 3593 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3594 ) 3595 3596 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3597 3598 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3599 if self._match_text_seq("BY", "DEFAULT"): 3600 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3601 this = self.expression( 3602 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3603 ) 3604 else: 3605 self._match_text_seq("ALWAYS") 3606 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3607 3608 self._match(TokenType.ALIAS) 3609 identity = self._match_text_seq("IDENTITY") 3610 3611 if self._match(TokenType.L_PAREN): 3612 if self._match_text_seq("START", "WITH"): 3613 this.set("start", self._parse_bitwise()) 3614 if self._match_text_seq("INCREMENT", "BY"): 3615 this.set("increment", self._parse_bitwise()) 3616 if self._match_text_seq("MINVALUE"): 3617 this.set("minvalue", self._parse_bitwise()) 3618 if self._match_text_seq("MAXVALUE"): 3619 this.set("maxvalue", self._parse_bitwise()) 3620 3621 if self._match_text_seq("CYCLE"): 3622 this.set("cycle", True) 3623 elif self._match_text_seq("NO", "CYCLE"): 3624 this.set("cycle", False) 3625 3626 if not identity: 3627 this.set("expression", self._parse_bitwise()) 3628 3629 self._match_r_paren() 3630 3631 return this 3632 3633 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3634 self._match_text_seq("LENGTH") 3635 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3636 3637 def _parse_not_constraint( 3638 self, 3639 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3640 if self._match_text_seq("NULL"): 3641 return self.expression(exp.NotNullColumnConstraint) 3642 if self._match_text_seq("CASESPECIFIC"): 3643 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3644 return None 3645 3646 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3647 if self._match(TokenType.CONSTRAINT): 3648 this = self._parse_id_var() 3649 else: 3650 this = None 3651 3652 if self._match_texts(self.CONSTRAINT_PARSERS): 3653 return self.expression( 3654 exp.ColumnConstraint, 3655 this=this, 3656 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3657 ) 3658 3659 return this 3660 3661 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3662 if not self._match(TokenType.CONSTRAINT): 3663 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3664 3665 this = self._parse_id_var() 3666 expressions = [] 3667 3668 while True: 3669 constraint = self._parse_unnamed_constraint() or self._parse_function() 3670 if not constraint: 3671 break 3672 expressions.append(constraint) 3673 3674 return self.expression(exp.Constraint, this=this, expressions=expressions) 3675 3676 def _parse_unnamed_constraint( 3677 self, constraints: t.Optional[t.Collection[str]] = None 3678 ) -> t.Optional[exp.Expression]: 3679 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3680 return None 3681 3682 constraint = self._prev.text.upper() 3683 if constraint not in self.CONSTRAINT_PARSERS: 3684 self.raise_error(f"No parser found for schema constraint {constraint}.") 3685 3686 return self.CONSTRAINT_PARSERS[constraint](self) 3687 3688 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3689 self._match_text_seq("KEY") 3690 return self.expression( 3691 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3692 ) 3693 3694 def _parse_key_constraint_options(self) -> t.List[str]: 3695 options = [] 3696 while True: 3697 if not self._curr: 3698 break 3699 3700 if self._match(TokenType.ON): 3701 action = None 3702 on = self._advance_any() and self._prev.text 3703 3704 if self._match_text_seq("NO", "ACTION"): 3705 action = "NO ACTION" 3706 elif self._match_text_seq("CASCADE"): 3707 action = "CASCADE" 3708 elif self._match_pair(TokenType.SET, TokenType.NULL): 3709 action = "SET NULL" 3710 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3711 action = "SET DEFAULT" 3712 else: 3713 self.raise_error("Invalid key constraint") 3714 3715 options.append(f"ON {on} {action}") 3716 elif self._match_text_seq("NOT", "ENFORCED"): 3717 options.append("NOT ENFORCED") 3718 elif self._match_text_seq("DEFERRABLE"): 3719 options.append("DEFERRABLE") 3720 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3721 options.append("INITIALLY DEFERRED") 3722 elif self._match_text_seq("NORELY"): 3723 options.append("NORELY") 3724 elif self._match_text_seq("MATCH", "FULL"): 3725 options.append("MATCH FULL") 3726 else: 3727 break 3728 3729 return options 3730 3731 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3732 if match and not self._match(TokenType.REFERENCES): 3733 return None 3734 3735 expressions = None 3736 this = self._parse_table(schema=True) 3737 options = self._parse_key_constraint_options() 3738 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3739 3740 def _parse_foreign_key(self) -> exp.ForeignKey: 3741 expressions = self._parse_wrapped_id_vars() 3742 reference = self._parse_references() 3743 options = {} 3744 3745 while self._match(TokenType.ON): 3746 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3747 self.raise_error("Expected DELETE or UPDATE") 3748 3749 kind = self._prev.text.lower() 3750 3751 if self._match_text_seq("NO", "ACTION"): 3752 action = "NO ACTION" 3753 elif self._match(TokenType.SET): 3754 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3755 action = "SET " + self._prev.text.upper() 3756 else: 3757 self._advance() 3758 action = self._prev.text.upper() 3759 3760 options[kind] = action 3761 3762 return self.expression( 3763 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3764 ) 3765 3766 def _parse_primary_key( 3767 self, wrapped_optional: bool = False, in_props: bool = False 3768 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3769 desc = ( 3770 self._match_set((TokenType.ASC, TokenType.DESC)) 3771 and self._prev.token_type == TokenType.DESC 3772 ) 3773 3774 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3775 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3776 3777 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3778 options = self._parse_key_constraint_options() 3779 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3780 3781 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3782 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3783 return this 3784 3785 bracket_kind = self._prev.token_type 3786 3787 if self._match(TokenType.COLON): 3788 expressions: t.List[exp.Expression] = [ 3789 self.expression(exp.Slice, expression=self._parse_conjunction()) 3790 ] 3791 else: 3792 expressions = self._parse_csv( 3793 lambda: self._parse_slice( 3794 self._parse_alias(self._parse_conjunction(), explicit=True) 3795 ) 3796 ) 3797 3798 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3799 if bracket_kind == TokenType.L_BRACE: 3800 this = self.expression(exp.Struct, expressions=expressions) 3801 elif not this or this.name.upper() == "ARRAY": 3802 this = self.expression(exp.Array, expressions=expressions) 3803 else: 3804 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3805 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3806 3807 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3808 self.raise_error("Expected ]") 3809 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3810 self.raise_error("Expected }") 3811 3812 self._add_comments(this) 3813 return self._parse_bracket(this) 3814 3815 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3816 if self._match(TokenType.COLON): 3817 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3818 return this 3819 3820 def _parse_case(self) -> t.Optional[exp.Expression]: 3821 ifs = [] 3822 default = None 3823 3824 comments = self._prev_comments 3825 expression = self._parse_conjunction() 3826 3827 while self._match(TokenType.WHEN): 3828 this = self._parse_conjunction() 3829 self._match(TokenType.THEN) 3830 then = self._parse_conjunction() 3831 ifs.append(self.expression(exp.If, this=this, true=then)) 3832 3833 if self._match(TokenType.ELSE): 3834 default = self._parse_conjunction() 3835 3836 if not self._match(TokenType.END): 3837 self.raise_error("Expected END after CASE", self._prev) 3838 3839 return self._parse_window( 3840 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3841 ) 3842 3843 def _parse_if(self) -> t.Optional[exp.Expression]: 3844 if self._match(TokenType.L_PAREN): 3845 args = self._parse_csv(self._parse_conjunction) 3846 this = self.validate_expression(exp.If.from_arg_list(args), args) 3847 self._match_r_paren() 3848 else: 3849 index = self._index - 1 3850 condition = self._parse_conjunction() 3851 3852 if not condition: 3853 self._retreat(index) 3854 return None 3855 3856 self._match(TokenType.THEN) 3857 true = self._parse_conjunction() 3858 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3859 self._match(TokenType.END) 3860 this = self.expression(exp.If, this=condition, true=true, false=false) 3861 3862 return self._parse_window(this) 3863 3864 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3865 if not self._match_text_seq("VALUE", "FOR"): 3866 self._retreat(self._index - 1) 3867 return None 3868 3869 return self.expression( 3870 exp.NextValueFor, 3871 this=self._parse_column(), 3872 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3873 ) 3874 3875 def _parse_extract(self) -> exp.Extract: 3876 this = self._parse_function() or self._parse_var() or self._parse_type() 3877 3878 if self._match(TokenType.FROM): 3879 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3880 3881 if not self._match(TokenType.COMMA): 3882 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3883 3884 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3885 3886 def _parse_any_value(self) -> exp.AnyValue: 3887 this = self._parse_lambda() 3888 is_max = None 3889 having = None 3890 3891 if self._match(TokenType.HAVING): 3892 self._match_texts(("MAX", "MIN")) 3893 is_max = self._prev.text == "MAX" 3894 having = self._parse_column() 3895 3896 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3897 3898 def _parse_cast(self, strict: bool) -> exp.Expression: 3899 this = self._parse_conjunction() 3900 3901 if not self._match(TokenType.ALIAS): 3902 if self._match(TokenType.COMMA): 3903 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3904 3905 self.raise_error("Expected AS after CAST") 3906 3907 fmt = None 3908 to = self._parse_types() 3909 3910 if not to: 3911 self.raise_error("Expected TYPE after CAST") 3912 elif isinstance(to, exp.Identifier): 3913 to = exp.DataType.build(to.name, udt=True) 3914 elif to.this == exp.DataType.Type.CHAR: 3915 if self._match(TokenType.CHARACTER_SET): 3916 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3917 elif self._match(TokenType.FORMAT): 3918 fmt_string = self._parse_string() 3919 fmt = self._parse_at_time_zone(fmt_string) 3920 3921 if to.this in exp.DataType.TEMPORAL_TYPES: 3922 this = self.expression( 3923 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3924 this=this, 3925 format=exp.Literal.string( 3926 format_time( 3927 fmt_string.this if fmt_string else "", 3928 self.FORMAT_MAPPING or self.TIME_MAPPING, 3929 self.FORMAT_TRIE or self.TIME_TRIE, 3930 ) 3931 ), 3932 ) 3933 3934 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3935 this.set("zone", fmt.args["zone"]) 3936 3937 return this 3938 3939 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3940 3941 def _parse_concat(self) -> t.Optional[exp.Expression]: 3942 args = self._parse_csv(self._parse_conjunction) 3943 if self.CONCAT_NULL_OUTPUTS_STRING: 3944 args = [ 3945 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3946 for arg in args 3947 if arg 3948 ] 3949 3950 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3951 # we find such a call we replace it with its argument. 3952 if len(args) == 1: 3953 return args[0] 3954 3955 return self.expression( 3956 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3957 ) 3958 3959 def _parse_string_agg(self) -> exp.Expression: 3960 if self._match(TokenType.DISTINCT): 3961 args: t.List[t.Optional[exp.Expression]] = [ 3962 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3963 ] 3964 if self._match(TokenType.COMMA): 3965 args.extend(self._parse_csv(self._parse_conjunction)) 3966 else: 3967 args = self._parse_csv(self._parse_conjunction) # type: ignore 3968 3969 index = self._index 3970 if not self._match(TokenType.R_PAREN) and args: 3971 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3972 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3973 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3974 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3975 3976 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3977 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3978 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3979 if not self._match_text_seq("WITHIN", "GROUP"): 3980 self._retreat(index) 3981 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3982 3983 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3984 order = self._parse_order(this=seq_get(args, 0)) 3985 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3986 3987 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3988 this = self._parse_bitwise() 3989 3990 if self._match(TokenType.USING): 3991 to: t.Optional[exp.Expression] = self.expression( 3992 exp.CharacterSet, this=self._parse_var() 3993 ) 3994 elif self._match(TokenType.COMMA): 3995 to = self._parse_types() 3996 else: 3997 to = None 3998 3999 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4000 4001 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4002 """ 4003 There are generally two variants of the DECODE function: 4004 4005 - DECODE(bin, charset) 4006 - DECODE(expression, search, result [, search, result] ... [, default]) 4007 4008 The second variant will always be parsed into a CASE expression. Note that NULL 4009 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4010 instead of relying on pattern matching. 4011 """ 4012 args = self._parse_csv(self._parse_conjunction) 4013 4014 if len(args) < 3: 4015 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4016 4017 expression, *expressions = args 4018 if not expression: 4019 return None 4020 4021 ifs = [] 4022 for search, result in zip(expressions[::2], expressions[1::2]): 4023 if not search or not result: 4024 return None 4025 4026 if isinstance(search, exp.Literal): 4027 ifs.append( 4028 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4029 ) 4030 elif isinstance(search, exp.Null): 4031 ifs.append( 4032 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4033 ) 4034 else: 4035 cond = exp.or_( 4036 exp.EQ(this=expression.copy(), expression=search), 4037 exp.and_( 4038 exp.Is(this=expression.copy(), expression=exp.Null()), 4039 exp.Is(this=search.copy(), expression=exp.Null()), 4040 copy=False, 4041 ), 4042 copy=False, 4043 ) 4044 ifs.append(exp.If(this=cond, true=result)) 4045 4046 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4047 4048 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4049 self._match_text_seq("KEY") 4050 key = self._parse_field() 4051 self._match(TokenType.COLON) 4052 self._match_text_seq("VALUE") 4053 value = self._parse_field() 4054 4055 if not key and not value: 4056 return None 4057 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4058 4059 def _parse_json_object(self) -> exp.JSONObject: 4060 star = self._parse_star() 4061 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4062 4063 null_handling = None 4064 if self._match_text_seq("NULL", "ON", "NULL"): 4065 null_handling = "NULL ON NULL" 4066 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4067 null_handling = "ABSENT ON NULL" 4068 4069 unique_keys = None 4070 if self._match_text_seq("WITH", "UNIQUE"): 4071 unique_keys = True 4072 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4073 unique_keys = False 4074 4075 self._match_text_seq("KEYS") 4076 4077 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4078 format_json = self._match_text_seq("FORMAT", "JSON") 4079 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4080 4081 return self.expression( 4082 exp.JSONObject, 4083 expressions=expressions, 4084 null_handling=null_handling, 4085 unique_keys=unique_keys, 4086 return_type=return_type, 4087 format_json=format_json, 4088 encoding=encoding, 4089 ) 4090 4091 def _parse_logarithm(self) -> exp.Func: 4092 # Default argument order is base, expression 4093 args = self._parse_csv(self._parse_range) 4094 4095 if len(args) > 1: 4096 if not self.LOG_BASE_FIRST: 4097 args.reverse() 4098 return exp.Log.from_arg_list(args) 4099 4100 return self.expression( 4101 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4102 ) 4103 4104 def _parse_match_against(self) -> exp.MatchAgainst: 4105 expressions = self._parse_csv(self._parse_column) 4106 4107 self._match_text_seq(")", "AGAINST", "(") 4108 4109 this = self._parse_string() 4110 4111 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4112 modifier = "IN NATURAL LANGUAGE MODE" 4113 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4114 modifier = f"{modifier} WITH QUERY EXPANSION" 4115 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4116 modifier = "IN BOOLEAN MODE" 4117 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4118 modifier = "WITH QUERY EXPANSION" 4119 else: 4120 modifier = None 4121 4122 return self.expression( 4123 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4124 ) 4125 4126 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4127 def _parse_open_json(self) -> exp.OpenJSON: 4128 this = self._parse_bitwise() 4129 path = self._match(TokenType.COMMA) and self._parse_string() 4130 4131 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4132 this = self._parse_field(any_token=True) 4133 kind = self._parse_types() 4134 path = self._parse_string() 4135 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4136 4137 return self.expression( 4138 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4139 ) 4140 4141 expressions = None 4142 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4143 self._match_l_paren() 4144 expressions = self._parse_csv(_parse_open_json_column_def) 4145 4146 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4147 4148 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4149 args = self._parse_csv(self._parse_bitwise) 4150 4151 if self._match(TokenType.IN): 4152 return self.expression( 4153 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4154 ) 4155 4156 if haystack_first: 4157 haystack = seq_get(args, 0) 4158 needle = seq_get(args, 1) 4159 else: 4160 needle = seq_get(args, 0) 4161 haystack = seq_get(args, 1) 4162 4163 return self.expression( 4164 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4165 ) 4166 4167 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4168 args = self._parse_csv(self._parse_table) 4169 return exp.JoinHint(this=func_name.upper(), expressions=args) 4170 4171 def _parse_substring(self) -> exp.Substring: 4172 # Postgres supports the form: substring(string [from int] [for int]) 4173 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4174 4175 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4176 4177 if self._match(TokenType.FROM): 4178 args.append(self._parse_bitwise()) 4179 if self._match(TokenType.FOR): 4180 args.append(self._parse_bitwise()) 4181 4182 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4183 4184 def _parse_trim(self) -> exp.Trim: 4185 # https://www.w3resource.com/sql/character-functions/trim.php 4186 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4187 4188 position = None 4189 collation = None 4190 4191 if self._match_texts(self.TRIM_TYPES): 4192 position = self._prev.text.upper() 4193 4194 expression = self._parse_bitwise() 4195 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4196 this = self._parse_bitwise() 4197 else: 4198 this = expression 4199 expression = None 4200 4201 if self._match(TokenType.COLLATE): 4202 collation = self._parse_bitwise() 4203 4204 return self.expression( 4205 exp.Trim, this=this, position=position, expression=expression, collation=collation 4206 ) 4207 4208 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4209 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4210 4211 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4212 return self._parse_window(self._parse_id_var(), alias=True) 4213 4214 def _parse_respect_or_ignore_nulls( 4215 self, this: t.Optional[exp.Expression] 4216 ) -> t.Optional[exp.Expression]: 4217 if self._match_text_seq("IGNORE", "NULLS"): 4218 return self.expression(exp.IgnoreNulls, this=this) 4219 if self._match_text_seq("RESPECT", "NULLS"): 4220 return self.expression(exp.RespectNulls, this=this) 4221 return this 4222 4223 def _parse_window( 4224 self, this: t.Optional[exp.Expression], alias: bool = False 4225 ) -> t.Optional[exp.Expression]: 4226 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4227 self._match(TokenType.WHERE) 4228 this = self.expression( 4229 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4230 ) 4231 self._match_r_paren() 4232 4233 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4234 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4235 if self._match_text_seq("WITHIN", "GROUP"): 4236 order = self._parse_wrapped(self._parse_order) 4237 this = self.expression(exp.WithinGroup, this=this, expression=order) 4238 4239 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4240 # Some dialects choose to implement and some do not. 4241 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4242 4243 # There is some code above in _parse_lambda that handles 4244 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4245 4246 # The below changes handle 4247 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4248 4249 # Oracle allows both formats 4250 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4251 # and Snowflake chose to do the same for familiarity 4252 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4253 this = self._parse_respect_or_ignore_nulls(this) 4254 4255 # bigquery select from window x AS (partition by ...) 4256 if alias: 4257 over = None 4258 self._match(TokenType.ALIAS) 4259 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4260 return this 4261 else: 4262 over = self._prev.text.upper() 4263 4264 if not self._match(TokenType.L_PAREN): 4265 return self.expression( 4266 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4267 ) 4268 4269 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4270 4271 first = self._match(TokenType.FIRST) 4272 if self._match_text_seq("LAST"): 4273 first = False 4274 4275 partition, order = self._parse_partition_and_order() 4276 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4277 4278 if kind: 4279 self._match(TokenType.BETWEEN) 4280 start = self._parse_window_spec() 4281 self._match(TokenType.AND) 4282 end = self._parse_window_spec() 4283 4284 spec = self.expression( 4285 exp.WindowSpec, 4286 kind=kind, 4287 start=start["value"], 4288 start_side=start["side"], 4289 end=end["value"], 4290 end_side=end["side"], 4291 ) 4292 else: 4293 spec = None 4294 4295 self._match_r_paren() 4296 4297 window = self.expression( 4298 exp.Window, 4299 this=this, 4300 partition_by=partition, 4301 order=order, 4302 spec=spec, 4303 alias=window_alias, 4304 over=over, 4305 first=first, 4306 ) 4307 4308 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4309 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4310 return self._parse_window(window, alias=alias) 4311 4312 return window 4313 4314 def _parse_partition_and_order( 4315 self, 4316 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4317 return self._parse_partition_by(), self._parse_order() 4318 4319 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4320 self._match(TokenType.BETWEEN) 4321 4322 return { 4323 "value": ( 4324 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4325 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4326 or self._parse_bitwise() 4327 ), 4328 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4329 } 4330 4331 def _parse_alias( 4332 self, this: t.Optional[exp.Expression], explicit: bool = False 4333 ) -> t.Optional[exp.Expression]: 4334 any_token = self._match(TokenType.ALIAS) 4335 4336 if explicit and not any_token: 4337 return this 4338 4339 if self._match(TokenType.L_PAREN): 4340 aliases = self.expression( 4341 exp.Aliases, 4342 this=this, 4343 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4344 ) 4345 self._match_r_paren(aliases) 4346 return aliases 4347 4348 alias = self._parse_id_var(any_token) 4349 4350 if alias: 4351 return self.expression(exp.Alias, this=this, alias=alias) 4352 4353 return this 4354 4355 def _parse_id_var( 4356 self, 4357 any_token: bool = True, 4358 tokens: t.Optional[t.Collection[TokenType]] = None, 4359 ) -> t.Optional[exp.Expression]: 4360 identifier = self._parse_identifier() 4361 4362 if identifier: 4363 return identifier 4364 4365 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4366 quoted = self._prev.token_type == TokenType.STRING 4367 return exp.Identifier(this=self._prev.text, quoted=quoted) 4368 4369 return None 4370 4371 def _parse_string(self) -> t.Optional[exp.Expression]: 4372 if self._match(TokenType.STRING): 4373 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4374 return self._parse_placeholder() 4375 4376 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4377 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4378 4379 def _parse_number(self) -> t.Optional[exp.Expression]: 4380 if self._match(TokenType.NUMBER): 4381 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4382 return self._parse_placeholder() 4383 4384 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4385 if self._match(TokenType.IDENTIFIER): 4386 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4387 return self._parse_placeholder() 4388 4389 def _parse_var( 4390 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4391 ) -> t.Optional[exp.Expression]: 4392 if ( 4393 (any_token and self._advance_any()) 4394 or self._match(TokenType.VAR) 4395 or (self._match_set(tokens) if tokens else False) 4396 ): 4397 return self.expression(exp.Var, this=self._prev.text) 4398 return self._parse_placeholder() 4399 4400 def _advance_any(self) -> t.Optional[Token]: 4401 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4402 self._advance() 4403 return self._prev 4404 return None 4405 4406 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4407 return self._parse_var() or self._parse_string() 4408 4409 def _parse_null(self) -> t.Optional[exp.Expression]: 4410 if self._match(TokenType.NULL): 4411 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4412 return self._parse_placeholder() 4413 4414 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4415 if self._match(TokenType.TRUE): 4416 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4417 if self._match(TokenType.FALSE): 4418 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4419 return self._parse_placeholder() 4420 4421 def _parse_star(self) -> t.Optional[exp.Expression]: 4422 if self._match(TokenType.STAR): 4423 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4424 return self._parse_placeholder() 4425 4426 def _parse_parameter(self) -> exp.Parameter: 4427 wrapped = self._match(TokenType.L_BRACE) 4428 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4429 self._match(TokenType.R_BRACE) 4430 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4431 4432 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4433 if self._match_set(self.PLACEHOLDER_PARSERS): 4434 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4435 if placeholder: 4436 return placeholder 4437 self._advance(-1) 4438 return None 4439 4440 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4441 if not self._match(TokenType.EXCEPT): 4442 return None 4443 if self._match(TokenType.L_PAREN, advance=False): 4444 return self._parse_wrapped_csv(self._parse_column) 4445 return self._parse_csv(self._parse_column) 4446 4447 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4448 if not self._match(TokenType.REPLACE): 4449 return None 4450 if self._match(TokenType.L_PAREN, advance=False): 4451 return self._parse_wrapped_csv(self._parse_expression) 4452 return self._parse_expressions() 4453 4454 def _parse_csv( 4455 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4456 ) -> t.List[exp.Expression]: 4457 parse_result = parse_method() 4458 items = [parse_result] if parse_result is not None else [] 4459 4460 while self._match(sep): 4461 self._add_comments(parse_result) 4462 parse_result = parse_method() 4463 if parse_result is not None: 4464 items.append(parse_result) 4465 4466 return items 4467 4468 def _parse_tokens( 4469 self, parse_method: t.Callable, expressions: t.Dict 4470 ) -> t.Optional[exp.Expression]: 4471 this = parse_method() 4472 4473 while self._match_set(expressions): 4474 this = self.expression( 4475 expressions[self._prev.token_type], 4476 this=this, 4477 comments=self._prev_comments, 4478 expression=parse_method(), 4479 ) 4480 4481 return this 4482 4483 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4484 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4485 4486 def _parse_wrapped_csv( 4487 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4488 ) -> t.List[exp.Expression]: 4489 return self._parse_wrapped( 4490 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4491 ) 4492 4493 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4494 wrapped = self._match(TokenType.L_PAREN) 4495 if not wrapped and not optional: 4496 self.raise_error("Expecting (") 4497 parse_result = parse_method() 4498 if wrapped: 4499 self._match_r_paren() 4500 return parse_result 4501 4502 def _parse_expressions(self) -> t.List[exp.Expression]: 4503 return self._parse_csv(self._parse_expression) 4504 4505 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4506 return self._parse_select() or self._parse_set_operations( 4507 self._parse_expression() if alias else self._parse_conjunction() 4508 ) 4509 4510 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4511 return self._parse_query_modifiers( 4512 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4513 ) 4514 4515 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4516 this = None 4517 if self._match_texts(self.TRANSACTION_KIND): 4518 this = self._prev.text 4519 4520 self._match_texts({"TRANSACTION", "WORK"}) 4521 4522 modes = [] 4523 while True: 4524 mode = [] 4525 while self._match(TokenType.VAR): 4526 mode.append(self._prev.text) 4527 4528 if mode: 4529 modes.append(" ".join(mode)) 4530 if not self._match(TokenType.COMMA): 4531 break 4532 4533 return self.expression(exp.Transaction, this=this, modes=modes) 4534 4535 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4536 chain = None 4537 savepoint = None 4538 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4539 4540 self._match_texts({"TRANSACTION", "WORK"}) 4541 4542 if self._match_text_seq("TO"): 4543 self._match_text_seq("SAVEPOINT") 4544 savepoint = self._parse_id_var() 4545 4546 if self._match(TokenType.AND): 4547 chain = not self._match_text_seq("NO") 4548 self._match_text_seq("CHAIN") 4549 4550 if is_rollback: 4551 return self.expression(exp.Rollback, savepoint=savepoint) 4552 4553 return self.expression(exp.Commit, chain=chain) 4554 4555 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4556 if not self._match_text_seq("ADD"): 4557 return None 4558 4559 self._match(TokenType.COLUMN) 4560 exists_column = self._parse_exists(not_=True) 4561 expression = self._parse_field_def() 4562 4563 if expression: 4564 expression.set("exists", exists_column) 4565 4566 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4567 if self._match_texts(("FIRST", "AFTER")): 4568 position = self._prev.text 4569 column_position = self.expression( 4570 exp.ColumnPosition, this=self._parse_column(), position=position 4571 ) 4572 expression.set("position", column_position) 4573 4574 return expression 4575 4576 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4577 drop = self._match(TokenType.DROP) and self._parse_drop() 4578 if drop and not isinstance(drop, exp.Command): 4579 drop.set("kind", drop.args.get("kind", "COLUMN")) 4580 return drop 4581 4582 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4583 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4584 return self.expression( 4585 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4586 ) 4587 4588 def _parse_add_constraint(self) -> exp.AddConstraint: 4589 this = None 4590 kind = self._prev.token_type 4591 4592 if kind == TokenType.CONSTRAINT: 4593 this = self._parse_id_var() 4594 4595 if self._match_text_seq("CHECK"): 4596 expression = self._parse_wrapped(self._parse_conjunction) 4597 enforced = self._match_text_seq("ENFORCED") 4598 4599 return self.expression( 4600 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4601 ) 4602 4603 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4604 expression = self._parse_foreign_key() 4605 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4606 expression = self._parse_primary_key() 4607 else: 4608 expression = None 4609 4610 return self.expression(exp.AddConstraint, this=this, expression=expression) 4611 4612 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4613 index = self._index - 1 4614 4615 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4616 return self._parse_csv(self._parse_add_constraint) 4617 4618 self._retreat(index) 4619 return self._parse_csv(self._parse_add_column) 4620 4621 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4622 self._match(TokenType.COLUMN) 4623 column = self._parse_field(any_token=True) 4624 4625 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4626 return self.expression(exp.AlterColumn, this=column, drop=True) 4627 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4628 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4629 4630 self._match_text_seq("SET", "DATA") 4631 return self.expression( 4632 exp.AlterColumn, 4633 this=column, 4634 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4635 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4636 using=self._match(TokenType.USING) and self._parse_conjunction(), 4637 ) 4638 4639 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4640 index = self._index - 1 4641 4642 partition_exists = self._parse_exists() 4643 if self._match(TokenType.PARTITION, advance=False): 4644 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4645 4646 self._retreat(index) 4647 return self._parse_csv(self._parse_drop_column) 4648 4649 def _parse_alter_table_rename(self) -> exp.RenameTable: 4650 self._match_text_seq("TO") 4651 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4652 4653 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4654 start = self._prev 4655 4656 if not self._match(TokenType.TABLE): 4657 return self._parse_as_command(start) 4658 4659 exists = self._parse_exists() 4660 this = self._parse_table(schema=True) 4661 4662 if self._next: 4663 self._advance() 4664 4665 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4666 if parser: 4667 actions = ensure_list(parser(self)) 4668 4669 if not self._curr: 4670 return self.expression( 4671 exp.AlterTable, 4672 this=this, 4673 exists=exists, 4674 actions=actions, 4675 ) 4676 return self._parse_as_command(start) 4677 4678 def _parse_merge(self) -> exp.Merge: 4679 self._match(TokenType.INTO) 4680 target = self._parse_table() 4681 4682 if target and self._match(TokenType.ALIAS, advance=False): 4683 target.set("alias", self._parse_table_alias()) 4684 4685 self._match(TokenType.USING) 4686 using = self._parse_table() 4687 4688 self._match(TokenType.ON) 4689 on = self._parse_conjunction() 4690 4691 whens = [] 4692 while self._match(TokenType.WHEN): 4693 matched = not self._match(TokenType.NOT) 4694 self._match_text_seq("MATCHED") 4695 source = ( 4696 False 4697 if self._match_text_seq("BY", "TARGET") 4698 else self._match_text_seq("BY", "SOURCE") 4699 ) 4700 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4701 4702 self._match(TokenType.THEN) 4703 4704 if self._match(TokenType.INSERT): 4705 _this = self._parse_star() 4706 if _this: 4707 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4708 else: 4709 then = self.expression( 4710 exp.Insert, 4711 this=self._parse_value(), 4712 expression=self._match(TokenType.VALUES) and self._parse_value(), 4713 ) 4714 elif self._match(TokenType.UPDATE): 4715 expressions = self._parse_star() 4716 if expressions: 4717 then = self.expression(exp.Update, expressions=expressions) 4718 else: 4719 then = self.expression( 4720 exp.Update, 4721 expressions=self._match(TokenType.SET) 4722 and self._parse_csv(self._parse_equality), 4723 ) 4724 elif self._match(TokenType.DELETE): 4725 then = self.expression(exp.Var, this=self._prev.text) 4726 else: 4727 then = None 4728 4729 whens.append( 4730 self.expression( 4731 exp.When, 4732 matched=matched, 4733 source=source, 4734 condition=condition, 4735 then=then, 4736 ) 4737 ) 4738 4739 return self.expression( 4740 exp.Merge, 4741 this=target, 4742 using=using, 4743 on=on, 4744 expressions=whens, 4745 ) 4746 4747 def _parse_show(self) -> t.Optional[exp.Expression]: 4748 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4749 if parser: 4750 return parser(self) 4751 self._advance() 4752 return self.expression(exp.Show, this=self._prev.text.upper()) 4753 4754 def _parse_set_item_assignment( 4755 self, kind: t.Optional[str] = None 4756 ) -> t.Optional[exp.Expression]: 4757 index = self._index 4758 4759 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4760 return self._parse_set_transaction(global_=kind == "GLOBAL") 4761 4762 left = self._parse_primary() or self._parse_id_var() 4763 4764 if not self._match_texts(("=", "TO")): 4765 self._retreat(index) 4766 return None 4767 4768 right = self._parse_statement() or self._parse_id_var() 4769 this = self.expression(exp.EQ, this=left, expression=right) 4770 4771 return self.expression(exp.SetItem, this=this, kind=kind) 4772 4773 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4774 self._match_text_seq("TRANSACTION") 4775 characteristics = self._parse_csv( 4776 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4777 ) 4778 return self.expression( 4779 exp.SetItem, 4780 expressions=characteristics, 4781 kind="TRANSACTION", 4782 **{"global": global_}, # type: ignore 4783 ) 4784 4785 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4786 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4787 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4788 4789 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4790 index = self._index 4791 set_ = self.expression( 4792 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4793 ) 4794 4795 if self._curr: 4796 self._retreat(index) 4797 return self._parse_as_command(self._prev) 4798 4799 return set_ 4800 4801 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4802 for option in options: 4803 if self._match_text_seq(*option.split(" ")): 4804 return exp.var(option) 4805 return None 4806 4807 def _parse_as_command(self, start: Token) -> exp.Command: 4808 while self._curr: 4809 self._advance() 4810 text = self._find_sql(start, self._prev) 4811 size = len(start.text) 4812 return exp.Command(this=text[:size], expression=text[size:]) 4813 4814 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4815 settings = [] 4816 4817 self._match_l_paren() 4818 kind = self._parse_id_var() 4819 4820 if self._match(TokenType.L_PAREN): 4821 while True: 4822 key = self._parse_id_var() 4823 value = self._parse_primary() 4824 4825 if not key and value is None: 4826 break 4827 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4828 self._match(TokenType.R_PAREN) 4829 4830 self._match_r_paren() 4831 4832 return self.expression( 4833 exp.DictProperty, 4834 this=this, 4835 kind=kind.this if kind else None, 4836 settings=settings, 4837 ) 4838 4839 def _parse_dict_range(self, this: str) -> exp.DictRange: 4840 self._match_l_paren() 4841 has_min = self._match_text_seq("MIN") 4842 if has_min: 4843 min = self._parse_var() or self._parse_primary() 4844 self._match_text_seq("MAX") 4845 max = self._parse_var() or self._parse_primary() 4846 else: 4847 max = self._parse_var() or self._parse_primary() 4848 min = exp.Literal.number(0) 4849 self._match_r_paren() 4850 return self.expression(exp.DictRange, this=this, min=min, max=max) 4851 4852 def _find_parser( 4853 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4854 ) -> t.Optional[t.Callable]: 4855 if not self._curr: 4856 return None 4857 4858 index = self._index 4859 this = [] 4860 while True: 4861 # The current token might be multiple words 4862 curr = self._curr.text.upper() 4863 key = curr.split(" ") 4864 this.append(curr) 4865 4866 self._advance() 4867 result, trie = in_trie(trie, key) 4868 if result == TrieResult.FAILED: 4869 break 4870 4871 if result == TrieResult.EXISTS: 4872 subparser = parsers[" ".join(this)] 4873 return subparser 4874 4875 self._retreat(index) 4876 return None 4877 4878 def _match(self, token_type, advance=True, expression=None): 4879 if not self._curr: 4880 return None 4881 4882 if self._curr.token_type == token_type: 4883 if advance: 4884 self._advance() 4885 self._add_comments(expression) 4886 return True 4887 4888 return None 4889 4890 def _match_set(self, types, advance=True): 4891 if not self._curr: 4892 return None 4893 4894 if self._curr.token_type in types: 4895 if advance: 4896 self._advance() 4897 return True 4898 4899 return None 4900 4901 def _match_pair(self, token_type_a, token_type_b, advance=True): 4902 if not self._curr or not self._next: 4903 return None 4904 4905 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4906 if advance: 4907 self._advance(2) 4908 return True 4909 4910 return None 4911 4912 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4913 if not self._match(TokenType.L_PAREN, expression=expression): 4914 self.raise_error("Expecting (") 4915 4916 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4917 if not self._match(TokenType.R_PAREN, expression=expression): 4918 self.raise_error("Expecting )") 4919 4920 def _match_texts(self, texts, advance=True): 4921 if self._curr and self._curr.text.upper() in texts: 4922 if advance: 4923 self._advance() 4924 return True 4925 return False 4926 4927 def _match_text_seq(self, *texts, advance=True): 4928 index = self._index 4929 for text in texts: 4930 if self._curr and self._curr.text.upper() == text: 4931 self._advance() 4932 else: 4933 self._retreat(index) 4934 return False 4935 4936 if not advance: 4937 self._retreat(index) 4938 4939 return True 4940 4941 @t.overload 4942 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4943 ... 4944 4945 @t.overload 4946 def _replace_columns_with_dots( 4947 self, this: t.Optional[exp.Expression] 4948 ) -> t.Optional[exp.Expression]: 4949 ... 4950 4951 def _replace_columns_with_dots(self, this): 4952 if isinstance(this, exp.Dot): 4953 exp.replace_children(this, self._replace_columns_with_dots) 4954 elif isinstance(this, exp.Column): 4955 exp.replace_children(this, self._replace_columns_with_dots) 4956 table = this.args.get("table") 4957 this = ( 4958 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4959 ) 4960 4961 return this 4962 4963 def _replace_lambda( 4964 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4965 ) -> t.Optional[exp.Expression]: 4966 if not node: 4967 return node 4968 4969 for column in node.find_all(exp.Column): 4970 if column.parts[0].name in lambda_variables: 4971 dot_or_id = column.to_dot() if column.table else column.this 4972 parent = column.parent 4973 4974 while isinstance(parent, exp.Dot): 4975 if not isinstance(parent.parent, exp.Dot): 4976 parent.replace(dot_or_id) 4977 break 4978 parent = parent.parent 4979 else: 4980 if column is node: 4981 node = dot_or_id 4982 else: 4983 column.replace(dot_or_id) 4984 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
872 def __init__( 873 self, 874 error_level: t.Optional[ErrorLevel] = None, 875 error_message_context: int = 100, 876 max_errors: int = 3, 877 ): 878 self.error_level = error_level or ErrorLevel.IMMEDIATE 879 self.error_message_context = error_message_context 880 self.max_errors = max_errors 881 self._tokenizer = self.TOKENIZER_CLASS() 882 self.reset()
894 def parse( 895 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 896 ) -> t.List[t.Optional[exp.Expression]]: 897 """ 898 Parses a list of tokens and returns a list of syntax trees, one tree 899 per parsed SQL statement. 900 901 Args: 902 raw_tokens: The list of tokens. 903 sql: The original SQL string, used to produce helpful debug messages. 904 905 Returns: 906 The list of the produced syntax trees. 907 """ 908 return self._parse( 909 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 910 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
912 def parse_into( 913 self, 914 expression_types: exp.IntoType, 915 raw_tokens: t.List[Token], 916 sql: t.Optional[str] = None, 917 ) -> t.List[t.Optional[exp.Expression]]: 918 """ 919 Parses a list of tokens into a given Expression type. If a collection of Expression 920 types is given instead, this method will try to parse the token list into each one 921 of them, stopping at the first for which the parsing succeeds. 922 923 Args: 924 expression_types: The expression type(s) to try and parse the token list into. 925 raw_tokens: The list of tokens. 926 sql: The original SQL string, used to produce helpful debug messages. 927 928 Returns: 929 The target Expression. 930 """ 931 errors = [] 932 for expression_type in ensure_list(expression_types): 933 parser = self.EXPRESSION_PARSERS.get(expression_type) 934 if not parser: 935 raise TypeError(f"No parser registered for {expression_type}") 936 937 try: 938 return self._parse(parser, raw_tokens, sql) 939 except ParseError as e: 940 e.errors[0]["into_expression"] = expression_type 941 errors.append(e) 942 943 raise ParseError( 944 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 945 errors=merge_errors(errors), 946 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
983 def check_errors(self) -> None: 984 """Logs or raises any found errors, depending on the chosen error level setting.""" 985 if self.error_level == ErrorLevel.WARN: 986 for error in self.errors: 987 logger.error(str(error)) 988 elif self.error_level == ErrorLevel.RAISE and self.errors: 989 raise ParseError( 990 concat_messages(self.errors, self.max_errors), 991 errors=merge_errors(self.errors), 992 )
Logs or raises any found errors, depending on the chosen error level setting.
994 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 995 """ 996 Appends an error in the list of recorded errors or raises it, depending on the chosen 997 error level setting. 998 """ 999 token = token or self._curr or self._prev or Token.string("") 1000 start = token.start 1001 end = token.end + 1 1002 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1003 highlight = self.sql[start:end] 1004 end_context = self.sql[end : end + self.error_message_context] 1005 1006 error = ParseError.new( 1007 f"{message}. Line {token.line}, Col: {token.col}.\n" 1008 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1009 description=message, 1010 line=token.line, 1011 col=token.col, 1012 start_context=start_context, 1013 highlight=highlight, 1014 end_context=end_context, 1015 ) 1016 1017 if self.error_level == ErrorLevel.IMMEDIATE: 1018 raise error 1019 1020 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1022 def expression( 1023 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1024 ) -> E: 1025 """ 1026 Creates a new, validated Expression. 1027 1028 Args: 1029 exp_class: The expression class to instantiate. 1030 comments: An optional list of comments to attach to the expression. 1031 kwargs: The arguments to set for the expression along with their respective values. 1032 1033 Returns: 1034 The target expression. 1035 """ 1036 instance = exp_class(**kwargs) 1037 instance.add_comments(comments) if comments else self._add_comments(instance) 1038 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1045 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1046 """ 1047 Validates an Expression, making sure that all its mandatory arguments are set. 1048 1049 Args: 1050 expression: The expression to validate. 1051 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1052 1053 Returns: 1054 The validated expression. 1055 """ 1056 if self.error_level != ErrorLevel.IGNORE: 1057 for error_message in expression.error_messages(args): 1058 self.raise_error(error_message) 1059 1060 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.