sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 589 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 590 "COPY": lambda self: self._parse_copy_property(), 591 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 592 "DEFINER": lambda self: self._parse_definer(), 593 "DETERMINISTIC": lambda self: self.expression( 594 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 595 ), 596 "DISTKEY": lambda self: self._parse_distkey(), 597 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 598 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 599 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 600 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 601 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 602 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 603 "FREESPACE": lambda self: self._parse_freespace(), 604 "IMMUTABLE": lambda self: self.expression( 605 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 606 ), 607 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 608 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 609 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 610 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 611 "LIKE": lambda self: self._parse_create_like(), 612 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 613 "LOCK": lambda self: self._parse_locking(), 614 "LOCKING": lambda self: self._parse_locking(), 615 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 616 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 617 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 618 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 619 "NO": lambda self: self._parse_no_property(), 620 "ON": lambda self: self._parse_on_property(), 621 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 622 "PARTITION BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 625 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 626 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 627 "RETURNS": lambda self: self._parse_returns(), 628 "ROW": lambda self: self._parse_row(), 629 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 630 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 631 "SETTINGS": lambda self: self.expression( 632 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 633 ), 634 "SORTKEY": lambda self: self._parse_sortkey(), 635 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 636 "STABLE": lambda self: self.expression( 637 exp.StabilityProperty, this=exp.Literal.string("STABLE") 638 ), 639 "STORED": lambda self: self._parse_stored(), 640 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 641 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 642 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 643 "TO": lambda self: self._parse_to_table(), 644 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 645 "TTL": lambda self: self._parse_ttl(), 646 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "VOLATILE": lambda self: self._parse_volatile_property(), 648 "WITH": lambda self: self._parse_with_property(), 649 } 650 651 CONSTRAINT_PARSERS = { 652 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 653 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 654 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 655 "CHARACTER SET": lambda self: self.expression( 656 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 657 ), 658 "CHECK": lambda self: self.expression( 659 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 660 ), 661 "COLLATE": lambda self: self.expression( 662 exp.CollateColumnConstraint, this=self._parse_var() 663 ), 664 "COMMENT": lambda self: self.expression( 665 exp.CommentColumnConstraint, this=self._parse_string() 666 ), 667 "COMPRESS": lambda self: self._parse_compress(), 668 "DEFAULT": lambda self: self.expression( 669 exp.DefaultColumnConstraint, this=self._parse_bitwise() 670 ), 671 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 672 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 673 "FORMAT": lambda self: self.expression( 674 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 675 ), 676 "GENERATED": lambda self: self._parse_generated_as_identity(), 677 "IDENTITY": lambda self: self._parse_auto_increment(), 678 "INLINE": lambda self: self._parse_inline(), 679 "LIKE": lambda self: self._parse_create_like(), 680 "NOT": lambda self: self._parse_not_constraint(), 681 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 682 "ON": lambda self: self._match(TokenType.UPDATE) 683 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 684 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 685 "PRIMARY KEY": lambda self: self._parse_primary_key(), 686 "REFERENCES": lambda self: self._parse_references(match=False), 687 "TITLE": lambda self: self.expression( 688 exp.TitleColumnConstraint, this=self._parse_var_or_string() 689 ), 690 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 691 "UNIQUE": lambda self: self._parse_unique(), 692 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 693 } 694 695 ALTER_PARSERS = { 696 "ADD": lambda self: self._parse_alter_table_add(), 697 "ALTER": lambda self: self._parse_alter_table_alter(), 698 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 699 "DROP": lambda self: self._parse_alter_table_drop(), 700 "RENAME": lambda self: self._parse_alter_table_rename(), 701 } 702 703 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 704 705 NO_PAREN_FUNCTION_PARSERS = { 706 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 707 TokenType.CASE: lambda self: self._parse_case(), 708 TokenType.IF: lambda self: self._parse_if(), 709 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 710 exp.NextValueFor, 711 this=self._parse_column(), 712 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 713 ), 714 } 715 716 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 717 718 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 719 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 720 "CONCAT": lambda self: self._parse_concat(), 721 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 722 "DECODE": lambda self: self._parse_decode(), 723 "EXTRACT": lambda self: self._parse_extract(), 724 "JSON_OBJECT": lambda self: self._parse_json_object(), 725 "LOG": lambda self: self._parse_logarithm(), 726 "MATCH": lambda self: self._parse_match_against(), 727 "OPENJSON": lambda self: self._parse_open_json(), 728 "POSITION": lambda self: self._parse_position(), 729 "SAFE_CAST": lambda self: self._parse_cast(False), 730 "STRING_AGG": lambda self: self._parse_string_agg(), 731 "SUBSTRING": lambda self: self._parse_substring(), 732 "TRIM": lambda self: self._parse_trim(), 733 "TRY_CAST": lambda self: self._parse_cast(False), 734 "TRY_CONVERT": lambda self: self._parse_convert(False), 735 } 736 737 QUERY_MODIFIER_PARSERS = { 738 "joins": lambda self: list(iter(self._parse_join, None)), 739 "laterals": lambda self: list(iter(self._parse_lateral, None)), 740 "match": lambda self: self._parse_match_recognize(), 741 "where": lambda self: self._parse_where(), 742 "group": lambda self: self._parse_group(), 743 "having": lambda self: self._parse_having(), 744 "qualify": lambda self: self._parse_qualify(), 745 "windows": lambda self: self._parse_window_clause(), 746 "order": lambda self: self._parse_order(), 747 "limit": lambda self: self._parse_limit(), 748 "offset": lambda self: self._parse_offset(), 749 "locks": lambda self: self._parse_locks(), 750 "sample": lambda self: self._parse_table_sample(as_modifier=True), 751 } 752 753 SET_PARSERS = { 754 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 755 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 756 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 757 "TRANSACTION": lambda self: self._parse_set_transaction(), 758 } 759 760 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 761 762 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 763 764 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 765 766 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 767 768 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 769 770 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 771 TRANSACTION_CHARACTERISTICS = { 772 "ISOLATION LEVEL REPEATABLE READ", 773 "ISOLATION LEVEL READ COMMITTED", 774 "ISOLATION LEVEL READ UNCOMMITTED", 775 "ISOLATION LEVEL SERIALIZABLE", 776 "READ WRITE", 777 "READ ONLY", 778 } 779 780 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 781 782 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 783 784 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 785 786 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 787 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 788 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 789 790 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 791 792 STRICT_CAST = True 793 794 # A NULL arg in CONCAT yields NULL by default 795 CONCAT_NULL_OUTPUTS_STRING = False 796 797 CONVERT_TYPE_FIRST = False 798 799 PREFIXED_PIVOT_COLUMNS = False 800 IDENTIFY_PIVOT_STRINGS = False 801 802 LOG_BASE_FIRST = True 803 LOG_DEFAULTS_TO_LN = False 804 805 __slots__ = ( 806 "error_level", 807 "error_message_context", 808 "max_errors", 809 "sql", 810 "errors", 811 "_tokens", 812 "_index", 813 "_curr", 814 "_next", 815 "_prev", 816 "_prev_comments", 817 ) 818 819 # Autofilled 820 INDEX_OFFSET: int = 0 821 UNNEST_COLUMN_ONLY: bool = False 822 ALIAS_POST_TABLESAMPLE: bool = False 823 STRICT_STRING_CONCAT = False 824 NULL_ORDERING: str = "nulls_are_small" 825 SHOW_TRIE: t.Dict = {} 826 SET_TRIE: t.Dict = {} 827 FORMAT_MAPPING: t.Dict[str, str] = {} 828 FORMAT_TRIE: t.Dict = {} 829 TIME_MAPPING: t.Dict[str, str] = {} 830 TIME_TRIE: t.Dict = {} 831 832 def __init__( 833 self, 834 error_level: t.Optional[ErrorLevel] = None, 835 error_message_context: int = 100, 836 max_errors: int = 3, 837 ): 838 self.error_level = error_level or ErrorLevel.IMMEDIATE 839 self.error_message_context = error_message_context 840 self.max_errors = max_errors 841 self.reset() 842 843 def reset(self): 844 self.sql = "" 845 self.errors = [] 846 self._tokens = [] 847 self._index = 0 848 self._curr = None 849 self._next = None 850 self._prev = None 851 self._prev_comments = None 852 853 def parse( 854 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 855 ) -> t.List[t.Optional[exp.Expression]]: 856 """ 857 Parses a list of tokens and returns a list of syntax trees, one tree 858 per parsed SQL statement. 859 860 Args: 861 raw_tokens: The list of tokens. 862 sql: The original SQL string, used to produce helpful debug messages. 863 864 Returns: 865 The list of the produced syntax trees. 866 """ 867 return self._parse( 868 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 869 ) 870 871 def parse_into( 872 self, 873 expression_types: exp.IntoType, 874 raw_tokens: t.List[Token], 875 sql: t.Optional[str] = None, 876 ) -> t.List[t.Optional[exp.Expression]]: 877 """ 878 Parses a list of tokens into a given Expression type. If a collection of Expression 879 types is given instead, this method will try to parse the token list into each one 880 of them, stopping at the first for which the parsing succeeds. 881 882 Args: 883 expression_types: The expression type(s) to try and parse the token list into. 884 raw_tokens: The list of tokens. 885 sql: The original SQL string, used to produce helpful debug messages. 886 887 Returns: 888 The target Expression. 889 """ 890 errors = [] 891 for expression_type in ensure_list(expression_types): 892 parser = self.EXPRESSION_PARSERS.get(expression_type) 893 if not parser: 894 raise TypeError(f"No parser registered for {expression_type}") 895 896 try: 897 return self._parse(parser, raw_tokens, sql) 898 except ParseError as e: 899 e.errors[0]["into_expression"] = expression_type 900 errors.append(e) 901 902 raise ParseError( 903 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 904 errors=merge_errors(errors), 905 ) from errors[-1] 906 907 def _parse( 908 self, 909 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 910 raw_tokens: t.List[Token], 911 sql: t.Optional[str] = None, 912 ) -> t.List[t.Optional[exp.Expression]]: 913 self.reset() 914 self.sql = sql or "" 915 916 total = len(raw_tokens) 917 chunks: t.List[t.List[Token]] = [[]] 918 919 for i, token in enumerate(raw_tokens): 920 if token.token_type == TokenType.SEMICOLON: 921 if i < total - 1: 922 chunks.append([]) 923 else: 924 chunks[-1].append(token) 925 926 expressions = [] 927 928 for tokens in chunks: 929 self._index = -1 930 self._tokens = tokens 931 self._advance() 932 933 expressions.append(parse_method(self)) 934 935 if self._index < len(self._tokens): 936 self.raise_error("Invalid expression / Unexpected token") 937 938 self.check_errors() 939 940 return expressions 941 942 def check_errors(self) -> None: 943 """Logs or raises any found errors, depending on the chosen error level setting.""" 944 if self.error_level == ErrorLevel.WARN: 945 for error in self.errors: 946 logger.error(str(error)) 947 elif self.error_level == ErrorLevel.RAISE and self.errors: 948 raise ParseError( 949 concat_messages(self.errors, self.max_errors), 950 errors=merge_errors(self.errors), 951 ) 952 953 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 954 """ 955 Appends an error in the list of recorded errors or raises it, depending on the chosen 956 error level setting. 957 """ 958 token = token or self._curr or self._prev or Token.string("") 959 start = token.start 960 end = token.end + 1 961 start_context = self.sql[max(start - self.error_message_context, 0) : start] 962 highlight = self.sql[start:end] 963 end_context = self.sql[end : end + self.error_message_context] 964 965 error = ParseError.new( 966 f"{message}. Line {token.line}, Col: {token.col}.\n" 967 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 968 description=message, 969 line=token.line, 970 col=token.col, 971 start_context=start_context, 972 highlight=highlight, 973 end_context=end_context, 974 ) 975 976 if self.error_level == ErrorLevel.IMMEDIATE: 977 raise error 978 979 self.errors.append(error) 980 981 def expression( 982 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 983 ) -> E: 984 """ 985 Creates a new, validated Expression. 986 987 Args: 988 exp_class: The expression class to instantiate. 989 comments: An optional list of comments to attach to the expression. 990 kwargs: The arguments to set for the expression along with their respective values. 991 992 Returns: 993 The target expression. 994 """ 995 instance = exp_class(**kwargs) 996 instance.add_comments(comments) if comments else self._add_comments(instance) 997 return self.validate_expression(instance) 998 999 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1000 if expression and self._prev_comments: 1001 expression.add_comments(self._prev_comments) 1002 self._prev_comments = None 1003 1004 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1005 """ 1006 Validates an Expression, making sure that all its mandatory arguments are set. 1007 1008 Args: 1009 expression: The expression to validate. 1010 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1011 1012 Returns: 1013 The validated expression. 1014 """ 1015 if self.error_level != ErrorLevel.IGNORE: 1016 for error_message in expression.error_messages(args): 1017 self.raise_error(error_message) 1018 1019 return expression 1020 1021 def _find_sql(self, start: Token, end: Token) -> str: 1022 return self.sql[start.start : end.end + 1] 1023 1024 def _advance(self, times: int = 1) -> None: 1025 self._index += times 1026 self._curr = seq_get(self._tokens, self._index) 1027 self._next = seq_get(self._tokens, self._index + 1) 1028 1029 if self._index > 0: 1030 self._prev = self._tokens[self._index - 1] 1031 self._prev_comments = self._prev.comments 1032 else: 1033 self._prev = None 1034 self._prev_comments = None 1035 1036 def _retreat(self, index: int) -> None: 1037 if index != self._index: 1038 self._advance(index - self._index) 1039 1040 def _parse_command(self) -> exp.Command: 1041 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1042 1043 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1044 start = self._prev 1045 exists = self._parse_exists() if allow_exists else None 1046 1047 self._match(TokenType.ON) 1048 1049 kind = self._match_set(self.CREATABLES) and self._prev 1050 if not kind: 1051 return self._parse_as_command(start) 1052 1053 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1054 this = self._parse_user_defined_function(kind=kind.token_type) 1055 elif kind.token_type == TokenType.TABLE: 1056 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1057 elif kind.token_type == TokenType.COLUMN: 1058 this = self._parse_column() 1059 else: 1060 this = self._parse_id_var() 1061 1062 self._match(TokenType.IS) 1063 1064 return self.expression( 1065 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1066 ) 1067 1068 def _parse_to_table( 1069 self, 1070 ) -> exp.ToTableProperty: 1071 table = self._parse_table_parts(schema=True) 1072 return self.expression(exp.ToTableProperty, this=table) 1073 1074 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1075 def _parse_ttl(self) -> exp.Expression: 1076 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1077 this = self._parse_bitwise() 1078 1079 if self._match_text_seq("DELETE"): 1080 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1081 if self._match_text_seq("RECOMPRESS"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1084 ) 1085 if self._match_text_seq("TO", "DISK"): 1086 return self.expression( 1087 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1088 ) 1089 if self._match_text_seq("TO", "VOLUME"): 1090 return self.expression( 1091 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1092 ) 1093 1094 return this 1095 1096 expressions = self._parse_csv(_parse_ttl_action) 1097 where = self._parse_where() 1098 group = self._parse_group() 1099 1100 aggregates = None 1101 if group and self._match(TokenType.SET): 1102 aggregates = self._parse_csv(self._parse_set_item) 1103 1104 return self.expression( 1105 exp.MergeTreeTTL, 1106 expressions=expressions, 1107 where=where, 1108 group=group, 1109 aggregates=aggregates, 1110 ) 1111 1112 def _parse_statement(self) -> t.Optional[exp.Expression]: 1113 if self._curr is None: 1114 return None 1115 1116 if self._match_set(self.STATEMENT_PARSERS): 1117 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1118 1119 if self._match_set(Tokenizer.COMMANDS): 1120 return self._parse_command() 1121 1122 expression = self._parse_expression() 1123 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1124 return self._parse_query_modifiers(expression) 1125 1126 def _parse_drop(self) -> exp.Drop | exp.Command: 1127 start = self._prev 1128 temporary = self._match(TokenType.TEMPORARY) 1129 materialized = self._match_text_seq("MATERIALIZED") 1130 1131 kind = self._match_set(self.CREATABLES) and self._prev.text 1132 if not kind: 1133 return self._parse_as_command(start) 1134 1135 return self.expression( 1136 exp.Drop, 1137 exists=self._parse_exists(), 1138 this=self._parse_table(schema=True), 1139 kind=kind, 1140 temporary=temporary, 1141 materialized=materialized, 1142 cascade=self._match_text_seq("CASCADE"), 1143 constraints=self._match_text_seq("CONSTRAINTS"), 1144 purge=self._match_text_seq("PURGE"), 1145 ) 1146 1147 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1148 return ( 1149 self._match(TokenType.IF) 1150 and (not not_ or self._match(TokenType.NOT)) 1151 and self._match(TokenType.EXISTS) 1152 ) 1153 1154 def _parse_create(self) -> exp.Create | exp.Command: 1155 # Note: this can't be None because we've matched a statement parser 1156 start = self._prev 1157 replace = start.text.upper() == "REPLACE" or self._match_pair( 1158 TokenType.OR, TokenType.REPLACE 1159 ) 1160 unique = self._match(TokenType.UNIQUE) 1161 1162 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1163 self._advance() 1164 1165 properties = None 1166 create_token = self._match_set(self.CREATABLES) and self._prev 1167 1168 if not create_token: 1169 # exp.Properties.Location.POST_CREATE 1170 properties = self._parse_properties() 1171 create_token = self._match_set(self.CREATABLES) and self._prev 1172 1173 if not properties or not create_token: 1174 return self._parse_as_command(start) 1175 1176 exists = self._parse_exists(not_=True) 1177 this = None 1178 expression = None 1179 indexes = None 1180 no_schema_binding = None 1181 begin = None 1182 clone = None 1183 1184 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1185 nonlocal properties 1186 if properties and temp_props: 1187 properties.expressions.extend(temp_props.expressions) 1188 elif temp_props: 1189 properties = temp_props 1190 1191 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1192 this = self._parse_user_defined_function(kind=create_token.token_type) 1193 1194 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1195 extend_props(self._parse_properties()) 1196 1197 self._match(TokenType.ALIAS) 1198 begin = self._match(TokenType.BEGIN) 1199 return_ = self._match_text_seq("RETURN") 1200 expression = self._parse_statement() 1201 1202 if return_: 1203 expression = self.expression(exp.Return, this=expression) 1204 elif create_token.token_type == TokenType.INDEX: 1205 this = self._parse_index(index=self._parse_id_var()) 1206 elif create_token.token_type in self.DB_CREATABLES: 1207 table_parts = self._parse_table_parts(schema=True) 1208 1209 # exp.Properties.Location.POST_NAME 1210 self._match(TokenType.COMMA) 1211 extend_props(self._parse_properties(before=True)) 1212 1213 this = self._parse_schema(this=table_parts) 1214 1215 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1216 extend_props(self._parse_properties()) 1217 1218 self._match(TokenType.ALIAS) 1219 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1220 # exp.Properties.Location.POST_ALIAS 1221 extend_props(self._parse_properties()) 1222 1223 expression = self._parse_ddl_select() 1224 1225 if create_token.token_type == TokenType.TABLE: 1226 indexes = [] 1227 while True: 1228 index = self._parse_index() 1229 1230 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1231 extend_props(self._parse_properties()) 1232 1233 if not index: 1234 break 1235 else: 1236 self._match(TokenType.COMMA) 1237 indexes.append(index) 1238 elif create_token.token_type == TokenType.VIEW: 1239 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1240 no_schema_binding = True 1241 1242 if self._match_text_seq("CLONE"): 1243 clone = self._parse_table(schema=True) 1244 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1245 clone_kind = ( 1246 self._match(TokenType.L_PAREN) 1247 and self._match_texts(self.CLONE_KINDS) 1248 and self._prev.text.upper() 1249 ) 1250 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1251 self._match(TokenType.R_PAREN) 1252 clone = self.expression( 1253 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1254 ) 1255 1256 return self.expression( 1257 exp.Create, 1258 this=this, 1259 kind=create_token.text, 1260 replace=replace, 1261 unique=unique, 1262 expression=expression, 1263 exists=exists, 1264 properties=properties, 1265 indexes=indexes, 1266 no_schema_binding=no_schema_binding, 1267 begin=begin, 1268 clone=clone, 1269 ) 1270 1271 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1272 # only used for teradata currently 1273 self._match(TokenType.COMMA) 1274 1275 kwargs = { 1276 "no": self._match_text_seq("NO"), 1277 "dual": self._match_text_seq("DUAL"), 1278 "before": self._match_text_seq("BEFORE"), 1279 "default": self._match_text_seq("DEFAULT"), 1280 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1281 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1282 "after": self._match_text_seq("AFTER"), 1283 "minimum": self._match_texts(("MIN", "MINIMUM")), 1284 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1285 } 1286 1287 if self._match_texts(self.PROPERTY_PARSERS): 1288 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1289 try: 1290 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1291 except TypeError: 1292 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1293 1294 return None 1295 1296 def _parse_property(self) -> t.Optional[exp.Expression]: 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1299 1300 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1301 return self._parse_character_set(default=True) 1302 1303 if self._match_text_seq("COMPOUND", "SORTKEY"): 1304 return self._parse_sortkey(compound=True) 1305 1306 if self._match_text_seq("SQL", "SECURITY"): 1307 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1308 1309 assignment = self._match_pair( 1310 TokenType.VAR, TokenType.EQ, advance=False 1311 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1312 1313 if assignment: 1314 key = self._parse_var_or_string() 1315 self._match(TokenType.EQ) 1316 return self.expression(exp.Property, this=key, value=self._parse_column()) 1317 1318 return None 1319 1320 def _parse_stored(self) -> exp.FileFormatProperty: 1321 self._match(TokenType.ALIAS) 1322 1323 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1324 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1325 1326 return self.expression( 1327 exp.FileFormatProperty, 1328 this=self.expression( 1329 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1330 ) 1331 if input_format or output_format 1332 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1333 ) 1334 1335 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1336 self._match(TokenType.EQ) 1337 self._match(TokenType.ALIAS) 1338 return self.expression(exp_class, this=self._parse_field()) 1339 1340 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1341 properties = [] 1342 while True: 1343 if before: 1344 prop = self._parse_property_before() 1345 else: 1346 prop = self._parse_property() 1347 1348 if not prop: 1349 break 1350 for p in ensure_list(prop): 1351 properties.append(p) 1352 1353 if properties: 1354 return self.expression(exp.Properties, expressions=properties) 1355 1356 return None 1357 1358 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1359 return self.expression( 1360 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1361 ) 1362 1363 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1364 if self._index >= 2: 1365 pre_volatile_token = self._tokens[self._index - 2] 1366 else: 1367 pre_volatile_token = None 1368 1369 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1370 return exp.VolatileProperty() 1371 1372 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1373 1374 def _parse_with_property( 1375 self, 1376 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1377 self._match(TokenType.WITH) 1378 if self._match(TokenType.L_PAREN, advance=False): 1379 return self._parse_wrapped_csv(self._parse_property) 1380 1381 if self._match_text_seq("JOURNAL"): 1382 return self._parse_withjournaltable() 1383 1384 if self._match_text_seq("DATA"): 1385 return self._parse_withdata(no=False) 1386 elif self._match_text_seq("NO", "DATA"): 1387 return self._parse_withdata(no=True) 1388 1389 if not self._next: 1390 return None 1391 1392 return self._parse_withisolatedloading() 1393 1394 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1395 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1396 self._match(TokenType.EQ) 1397 1398 user = self._parse_id_var() 1399 self._match(TokenType.PARAMETER) 1400 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1401 1402 if not user or not host: 1403 return None 1404 1405 return exp.DefinerProperty(this=f"{user}@{host}") 1406 1407 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1408 self._match(TokenType.TABLE) 1409 self._match(TokenType.EQ) 1410 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1411 1412 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1413 return self.expression(exp.LogProperty, no=no) 1414 1415 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1416 return self.expression(exp.JournalProperty, **kwargs) 1417 1418 def _parse_checksum(self) -> exp.ChecksumProperty: 1419 self._match(TokenType.EQ) 1420 1421 on = None 1422 if self._match(TokenType.ON): 1423 on = True 1424 elif self._match_text_seq("OFF"): 1425 on = False 1426 1427 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1428 1429 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1430 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1431 1432 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1433 if not self._match_text_seq("GRANTS"): 1434 self._retreat(self._index - 1) 1435 return None 1436 1437 return self.expression(exp.CopyGrantsProperty) 1438 1439 def _parse_freespace(self) -> exp.FreespaceProperty: 1440 self._match(TokenType.EQ) 1441 return self.expression( 1442 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1443 ) 1444 1445 def _parse_mergeblockratio( 1446 self, no: bool = False, default: bool = False 1447 ) -> exp.MergeBlockRatioProperty: 1448 if self._match(TokenType.EQ): 1449 return self.expression( 1450 exp.MergeBlockRatioProperty, 1451 this=self._parse_number(), 1452 percent=self._match(TokenType.PERCENT), 1453 ) 1454 1455 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1456 1457 def _parse_datablocksize( 1458 self, 1459 default: t.Optional[bool] = None, 1460 minimum: t.Optional[bool] = None, 1461 maximum: t.Optional[bool] = None, 1462 ) -> exp.DataBlocksizeProperty: 1463 self._match(TokenType.EQ) 1464 size = self._parse_number() 1465 1466 units = None 1467 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1468 units = self._prev.text 1469 1470 return self.expression( 1471 exp.DataBlocksizeProperty, 1472 size=size, 1473 units=units, 1474 default=default, 1475 minimum=minimum, 1476 maximum=maximum, 1477 ) 1478 1479 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1480 self._match(TokenType.EQ) 1481 always = self._match_text_seq("ALWAYS") 1482 manual = self._match_text_seq("MANUAL") 1483 never = self._match_text_seq("NEVER") 1484 default = self._match_text_seq("DEFAULT") 1485 1486 autotemp = None 1487 if self._match_text_seq("AUTOTEMP"): 1488 autotemp = self._parse_schema() 1489 1490 return self.expression( 1491 exp.BlockCompressionProperty, 1492 always=always, 1493 manual=manual, 1494 never=never, 1495 default=default, 1496 autotemp=autotemp, 1497 ) 1498 1499 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1500 no = self._match_text_seq("NO") 1501 concurrent = self._match_text_seq("CONCURRENT") 1502 self._match_text_seq("ISOLATED", "LOADING") 1503 for_all = self._match_text_seq("FOR", "ALL") 1504 for_insert = self._match_text_seq("FOR", "INSERT") 1505 for_none = self._match_text_seq("FOR", "NONE") 1506 return self.expression( 1507 exp.IsolatedLoadingProperty, 1508 no=no, 1509 concurrent=concurrent, 1510 for_all=for_all, 1511 for_insert=for_insert, 1512 for_none=for_none, 1513 ) 1514 1515 def _parse_locking(self) -> exp.LockingProperty: 1516 if self._match(TokenType.TABLE): 1517 kind = "TABLE" 1518 elif self._match(TokenType.VIEW): 1519 kind = "VIEW" 1520 elif self._match(TokenType.ROW): 1521 kind = "ROW" 1522 elif self._match_text_seq("DATABASE"): 1523 kind = "DATABASE" 1524 else: 1525 kind = None 1526 1527 if kind in ("DATABASE", "TABLE", "VIEW"): 1528 this = self._parse_table_parts() 1529 else: 1530 this = None 1531 1532 if self._match(TokenType.FOR): 1533 for_or_in = "FOR" 1534 elif self._match(TokenType.IN): 1535 for_or_in = "IN" 1536 else: 1537 for_or_in = None 1538 1539 if self._match_text_seq("ACCESS"): 1540 lock_type = "ACCESS" 1541 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1542 lock_type = "EXCLUSIVE" 1543 elif self._match_text_seq("SHARE"): 1544 lock_type = "SHARE" 1545 elif self._match_text_seq("READ"): 1546 lock_type = "READ" 1547 elif self._match_text_seq("WRITE"): 1548 lock_type = "WRITE" 1549 elif self._match_text_seq("CHECKSUM"): 1550 lock_type = "CHECKSUM" 1551 else: 1552 lock_type = None 1553 1554 override = self._match_text_seq("OVERRIDE") 1555 1556 return self.expression( 1557 exp.LockingProperty, 1558 this=this, 1559 kind=kind, 1560 for_or_in=for_or_in, 1561 lock_type=lock_type, 1562 override=override, 1563 ) 1564 1565 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1566 if self._match(TokenType.PARTITION_BY): 1567 return self._parse_csv(self._parse_conjunction) 1568 return [] 1569 1570 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1571 self._match(TokenType.EQ) 1572 return self.expression( 1573 exp.PartitionedByProperty, 1574 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1575 ) 1576 1577 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1578 if self._match_text_seq("AND", "STATISTICS"): 1579 statistics = True 1580 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1581 statistics = False 1582 else: 1583 statistics = None 1584 1585 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1586 1587 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1588 if self._match_text_seq("PRIMARY", "INDEX"): 1589 return exp.NoPrimaryIndexProperty() 1590 return None 1591 1592 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1593 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1594 return exp.OnCommitProperty() 1595 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1596 return exp.OnCommitProperty(delete=True) 1597 return None 1598 1599 def _parse_distkey(self) -> exp.DistKeyProperty: 1600 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1601 1602 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1603 table = self._parse_table(schema=True) 1604 1605 options = [] 1606 while self._match_texts(("INCLUDING", "EXCLUDING")): 1607 this = self._prev.text.upper() 1608 1609 id_var = self._parse_id_var() 1610 if not id_var: 1611 return None 1612 1613 options.append( 1614 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1615 ) 1616 1617 return self.expression(exp.LikeProperty, this=table, expressions=options) 1618 1619 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1620 return self.expression( 1621 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1622 ) 1623 1624 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1625 self._match(TokenType.EQ) 1626 return self.expression( 1627 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1628 ) 1629 1630 def _parse_returns(self) -> exp.ReturnsProperty: 1631 value: t.Optional[exp.Expression] 1632 is_table = self._match(TokenType.TABLE) 1633 1634 if is_table: 1635 if self._match(TokenType.LT): 1636 value = self.expression( 1637 exp.Schema, 1638 this="TABLE", 1639 expressions=self._parse_csv(self._parse_struct_types), 1640 ) 1641 if not self._match(TokenType.GT): 1642 self.raise_error("Expecting >") 1643 else: 1644 value = self._parse_schema(exp.var("TABLE")) 1645 else: 1646 value = self._parse_types() 1647 1648 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1649 1650 def _parse_describe(self) -> exp.Describe: 1651 kind = self._match_set(self.CREATABLES) and self._prev.text 1652 this = self._parse_table() 1653 return self.expression(exp.Describe, this=this, kind=kind) 1654 1655 def _parse_insert(self) -> exp.Insert: 1656 overwrite = self._match(TokenType.OVERWRITE) 1657 local = self._match_text_seq("LOCAL") 1658 alternative = None 1659 1660 if self._match_text_seq("DIRECTORY"): 1661 this: t.Optional[exp.Expression] = self.expression( 1662 exp.Directory, 1663 this=self._parse_var_or_string(), 1664 local=local, 1665 row_format=self._parse_row_format(match_row=True), 1666 ) 1667 else: 1668 if self._match(TokenType.OR): 1669 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1670 1671 self._match(TokenType.INTO) 1672 self._match(TokenType.TABLE) 1673 this = self._parse_table(schema=True) 1674 1675 return self.expression( 1676 exp.Insert, 1677 this=this, 1678 exists=self._parse_exists(), 1679 partition=self._parse_partition(), 1680 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1681 and self._parse_conjunction(), 1682 expression=self._parse_ddl_select(), 1683 conflict=self._parse_on_conflict(), 1684 returning=self._parse_returning(), 1685 overwrite=overwrite, 1686 alternative=alternative, 1687 ) 1688 1689 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1690 conflict = self._match_text_seq("ON", "CONFLICT") 1691 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1692 1693 if not conflict and not duplicate: 1694 return None 1695 1696 nothing = None 1697 expressions = None 1698 key = None 1699 constraint = None 1700 1701 if conflict: 1702 if self._match_text_seq("ON", "CONSTRAINT"): 1703 constraint = self._parse_id_var() 1704 else: 1705 key = self._parse_csv(self._parse_value) 1706 1707 self._match_text_seq("DO") 1708 if self._match_text_seq("NOTHING"): 1709 nothing = True 1710 else: 1711 self._match(TokenType.UPDATE) 1712 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1713 1714 return self.expression( 1715 exp.OnConflict, 1716 duplicate=duplicate, 1717 expressions=expressions, 1718 nothing=nothing, 1719 key=key, 1720 constraint=constraint, 1721 ) 1722 1723 def _parse_returning(self) -> t.Optional[exp.Returning]: 1724 if not self._match(TokenType.RETURNING): 1725 return None 1726 1727 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1728 1729 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1730 if not self._match(TokenType.FORMAT): 1731 return None 1732 return self._parse_row_format() 1733 1734 def _parse_row_format( 1735 self, match_row: bool = False 1736 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1737 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1738 return None 1739 1740 if self._match_text_seq("SERDE"): 1741 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1742 1743 self._match_text_seq("DELIMITED") 1744 1745 kwargs = {} 1746 1747 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1748 kwargs["fields"] = self._parse_string() 1749 if self._match_text_seq("ESCAPED", "BY"): 1750 kwargs["escaped"] = self._parse_string() 1751 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1752 kwargs["collection_items"] = self._parse_string() 1753 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1754 kwargs["map_keys"] = self._parse_string() 1755 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1756 kwargs["lines"] = self._parse_string() 1757 if self._match_text_seq("NULL", "DEFINED", "AS"): 1758 kwargs["null"] = self._parse_string() 1759 1760 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1761 1762 def _parse_load(self) -> exp.LoadData | exp.Command: 1763 if self._match_text_seq("DATA"): 1764 local = self._match_text_seq("LOCAL") 1765 self._match_text_seq("INPATH") 1766 inpath = self._parse_string() 1767 overwrite = self._match(TokenType.OVERWRITE) 1768 self._match_pair(TokenType.INTO, TokenType.TABLE) 1769 1770 return self.expression( 1771 exp.LoadData, 1772 this=self._parse_table(schema=True), 1773 local=local, 1774 overwrite=overwrite, 1775 inpath=inpath, 1776 partition=self._parse_partition(), 1777 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1778 serde=self._match_text_seq("SERDE") and self._parse_string(), 1779 ) 1780 return self._parse_as_command(self._prev) 1781 1782 def _parse_delete(self) -> exp.Delete: 1783 self._match(TokenType.FROM) 1784 1785 return self.expression( 1786 exp.Delete, 1787 this=self._parse_table(), 1788 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1789 where=self._parse_where(), 1790 returning=self._parse_returning(), 1791 limit=self._parse_limit(), 1792 ) 1793 1794 def _parse_update(self) -> exp.Update: 1795 return self.expression( 1796 exp.Update, 1797 **{ # type: ignore 1798 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1799 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1800 "from": self._parse_from(modifiers=True), 1801 "where": self._parse_where(), 1802 "returning": self._parse_returning(), 1803 "limit": self._parse_limit(), 1804 }, 1805 ) 1806 1807 def _parse_uncache(self) -> exp.Uncache: 1808 if not self._match(TokenType.TABLE): 1809 self.raise_error("Expecting TABLE after UNCACHE") 1810 1811 return self.expression( 1812 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1813 ) 1814 1815 def _parse_cache(self) -> exp.Cache: 1816 lazy = self._match_text_seq("LAZY") 1817 self._match(TokenType.TABLE) 1818 table = self._parse_table(schema=True) 1819 1820 options = [] 1821 if self._match_text_seq("OPTIONS"): 1822 self._match_l_paren() 1823 k = self._parse_string() 1824 self._match(TokenType.EQ) 1825 v = self._parse_string() 1826 options = [k, v] 1827 self._match_r_paren() 1828 1829 self._match(TokenType.ALIAS) 1830 return self.expression( 1831 exp.Cache, 1832 this=table, 1833 lazy=lazy, 1834 options=options, 1835 expression=self._parse_select(nested=True), 1836 ) 1837 1838 def _parse_partition(self) -> t.Optional[exp.Partition]: 1839 if not self._match(TokenType.PARTITION): 1840 return None 1841 1842 return self.expression( 1843 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1844 ) 1845 1846 def _parse_value(self) -> exp.Tuple: 1847 if self._match(TokenType.L_PAREN): 1848 expressions = self._parse_csv(self._parse_conjunction) 1849 self._match_r_paren() 1850 return self.expression(exp.Tuple, expressions=expressions) 1851 1852 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1853 # Source: https://prestodb.io/docs/current/sql/values.html 1854 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1855 1856 def _parse_select( 1857 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1858 ) -> t.Optional[exp.Expression]: 1859 cte = self._parse_with() 1860 if cte: 1861 this = self._parse_statement() 1862 1863 if not this: 1864 self.raise_error("Failed to parse any statement following CTE") 1865 return cte 1866 1867 if "with" in this.arg_types: 1868 this.set("with", cte) 1869 else: 1870 self.raise_error(f"{this.key} does not support CTE") 1871 this = cte 1872 elif self._match(TokenType.SELECT): 1873 comments = self._prev_comments 1874 1875 hint = self._parse_hint() 1876 all_ = self._match(TokenType.ALL) 1877 distinct = self._match(TokenType.DISTINCT) 1878 1879 kind = ( 1880 self._match(TokenType.ALIAS) 1881 and self._match_texts(("STRUCT", "VALUE")) 1882 and self._prev.text 1883 ) 1884 1885 if distinct: 1886 distinct = self.expression( 1887 exp.Distinct, 1888 on=self._parse_value() if self._match(TokenType.ON) else None, 1889 ) 1890 1891 if all_ and distinct: 1892 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1893 1894 limit = self._parse_limit(top=True) 1895 expressions = self._parse_csv(self._parse_expression) 1896 1897 this = self.expression( 1898 exp.Select, 1899 kind=kind, 1900 hint=hint, 1901 distinct=distinct, 1902 expressions=expressions, 1903 limit=limit, 1904 ) 1905 this.comments = comments 1906 1907 into = self._parse_into() 1908 if into: 1909 this.set("into", into) 1910 1911 from_ = self._parse_from() 1912 if from_: 1913 this.set("from", from_) 1914 1915 this = self._parse_query_modifiers(this) 1916 elif (table or nested) and self._match(TokenType.L_PAREN): 1917 if self._match(TokenType.PIVOT): 1918 this = self._parse_simplified_pivot() 1919 elif self._match(TokenType.FROM): 1920 this = exp.select("*").from_( 1921 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1922 ) 1923 else: 1924 this = self._parse_table() if table else self._parse_select(nested=True) 1925 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1926 1927 self._match_r_paren() 1928 1929 # early return so that subquery unions aren't parsed again 1930 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1931 # Union ALL should be a property of the top select node, not the subquery 1932 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1933 elif self._match(TokenType.VALUES): 1934 this = self.expression( 1935 exp.Values, 1936 expressions=self._parse_csv(self._parse_value), 1937 alias=self._parse_table_alias(), 1938 ) 1939 else: 1940 this = None 1941 1942 return self._parse_set_operations(this) 1943 1944 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1945 if not skip_with_token and not self._match(TokenType.WITH): 1946 return None 1947 1948 comments = self._prev_comments 1949 recursive = self._match(TokenType.RECURSIVE) 1950 1951 expressions = [] 1952 while True: 1953 expressions.append(self._parse_cte()) 1954 1955 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1956 break 1957 else: 1958 self._match(TokenType.WITH) 1959 1960 return self.expression( 1961 exp.With, comments=comments, expressions=expressions, recursive=recursive 1962 ) 1963 1964 def _parse_cte(self) -> exp.CTE: 1965 alias = self._parse_table_alias() 1966 if not alias or not alias.this: 1967 self.raise_error("Expected CTE to have alias") 1968 1969 self._match(TokenType.ALIAS) 1970 return self.expression( 1971 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1972 ) 1973 1974 def _parse_table_alias( 1975 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1976 ) -> t.Optional[exp.TableAlias]: 1977 any_token = self._match(TokenType.ALIAS) 1978 alias = ( 1979 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1980 or self._parse_string_as_identifier() 1981 ) 1982 1983 index = self._index 1984 if self._match(TokenType.L_PAREN): 1985 columns = self._parse_csv(self._parse_function_parameter) 1986 self._match_r_paren() if columns else self._retreat(index) 1987 else: 1988 columns = None 1989 1990 if not alias and not columns: 1991 return None 1992 1993 return self.expression(exp.TableAlias, this=alias, columns=columns) 1994 1995 def _parse_subquery( 1996 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1997 ) -> t.Optional[exp.Subquery]: 1998 if not this: 1999 return None 2000 2001 return self.expression( 2002 exp.Subquery, 2003 this=this, 2004 pivots=self._parse_pivots(), 2005 alias=self._parse_table_alias() if parse_alias else None, 2006 ) 2007 2008 def _parse_query_modifiers( 2009 self, this: t.Optional[exp.Expression] 2010 ) -> t.Optional[exp.Expression]: 2011 if isinstance(this, self.MODIFIABLES): 2012 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2013 expression = parser(self) 2014 2015 if expression: 2016 if key == "limit": 2017 offset = expression.args.pop("offset", None) 2018 if offset: 2019 this.set("offset", exp.Offset(expression=offset)) 2020 this.set(key, expression) 2021 return this 2022 2023 def _parse_hint(self) -> t.Optional[exp.Hint]: 2024 if self._match(TokenType.HINT): 2025 hints = self._parse_csv(self._parse_function) 2026 2027 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2028 self.raise_error("Expected */ after HINT") 2029 2030 return self.expression(exp.Hint, expressions=hints) 2031 2032 return None 2033 2034 def _parse_into(self) -> t.Optional[exp.Into]: 2035 if not self._match(TokenType.INTO): 2036 return None 2037 2038 temp = self._match(TokenType.TEMPORARY) 2039 unlogged = self._match_text_seq("UNLOGGED") 2040 self._match(TokenType.TABLE) 2041 2042 return self.expression( 2043 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2044 ) 2045 2046 def _parse_from( 2047 self, modifiers: bool = False, skip_from_token: bool = False 2048 ) -> t.Optional[exp.From]: 2049 if not skip_from_token and not self._match(TokenType.FROM): 2050 return None 2051 2052 comments = self._prev_comments 2053 this = self._parse_table() 2054 2055 return self.expression( 2056 exp.From, 2057 comments=comments, 2058 this=self._parse_query_modifiers(this) if modifiers else this, 2059 ) 2060 2061 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2062 if not self._match(TokenType.MATCH_RECOGNIZE): 2063 return None 2064 2065 self._match_l_paren() 2066 2067 partition = self._parse_partition_by() 2068 order = self._parse_order() 2069 measures = ( 2070 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2071 ) 2072 2073 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2074 rows = exp.var("ONE ROW PER MATCH") 2075 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2076 text = "ALL ROWS PER MATCH" 2077 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2078 text += f" SHOW EMPTY MATCHES" 2079 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2080 text += f" OMIT EMPTY MATCHES" 2081 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2082 text += f" WITH UNMATCHED ROWS" 2083 rows = exp.var(text) 2084 else: 2085 rows = None 2086 2087 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2088 text = "AFTER MATCH SKIP" 2089 if self._match_text_seq("PAST", "LAST", "ROW"): 2090 text += f" PAST LAST ROW" 2091 elif self._match_text_seq("TO", "NEXT", "ROW"): 2092 text += f" TO NEXT ROW" 2093 elif self._match_text_seq("TO", "FIRST"): 2094 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2095 elif self._match_text_seq("TO", "LAST"): 2096 text += f" TO LAST {self._advance_any().text}" # type: ignore 2097 after = exp.var(text) 2098 else: 2099 after = None 2100 2101 if self._match_text_seq("PATTERN"): 2102 self._match_l_paren() 2103 2104 if not self._curr: 2105 self.raise_error("Expecting )", self._curr) 2106 2107 paren = 1 2108 start = self._curr 2109 2110 while self._curr and paren > 0: 2111 if self._curr.token_type == TokenType.L_PAREN: 2112 paren += 1 2113 if self._curr.token_type == TokenType.R_PAREN: 2114 paren -= 1 2115 2116 end = self._prev 2117 self._advance() 2118 2119 if paren > 0: 2120 self.raise_error("Expecting )", self._curr) 2121 2122 pattern = exp.var(self._find_sql(start, end)) 2123 else: 2124 pattern = None 2125 2126 define = ( 2127 self._parse_csv( 2128 lambda: self.expression( 2129 exp.Alias, 2130 alias=self._parse_id_var(any_token=True), 2131 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2132 ) 2133 ) 2134 if self._match_text_seq("DEFINE") 2135 else None 2136 ) 2137 2138 self._match_r_paren() 2139 2140 return self.expression( 2141 exp.MatchRecognize, 2142 partition_by=partition, 2143 order=order, 2144 measures=measures, 2145 rows=rows, 2146 after=after, 2147 pattern=pattern, 2148 define=define, 2149 alias=self._parse_table_alias(), 2150 ) 2151 2152 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2153 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2154 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2155 2156 if outer_apply or cross_apply: 2157 this = self._parse_select(table=True) 2158 view = None 2159 outer = not cross_apply 2160 elif self._match(TokenType.LATERAL): 2161 this = self._parse_select(table=True) 2162 view = self._match(TokenType.VIEW) 2163 outer = self._match(TokenType.OUTER) 2164 else: 2165 return None 2166 2167 if not this: 2168 this = self._parse_function() or self._parse_id_var(any_token=False) 2169 while self._match(TokenType.DOT): 2170 this = exp.Dot( 2171 this=this, 2172 expression=self._parse_function() or self._parse_id_var(any_token=False), 2173 ) 2174 2175 if view: 2176 table = self._parse_id_var(any_token=False) 2177 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2178 table_alias: t.Optional[exp.TableAlias] = self.expression( 2179 exp.TableAlias, this=table, columns=columns 2180 ) 2181 elif isinstance(this, exp.Subquery) and this.alias: 2182 # Ensures parity between the Subquery's and the Lateral's "alias" args 2183 table_alias = this.args["alias"].copy() 2184 else: 2185 table_alias = self._parse_table_alias() 2186 2187 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2188 2189 def _parse_join_parts( 2190 self, 2191 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2192 return ( 2193 self._match_set(self.JOIN_METHODS) and self._prev, 2194 self._match_set(self.JOIN_SIDES) and self._prev, 2195 self._match_set(self.JOIN_KINDS) and self._prev, 2196 ) 2197 2198 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2199 if self._match(TokenType.COMMA): 2200 return self.expression(exp.Join, this=self._parse_table()) 2201 2202 index = self._index 2203 method, side, kind = self._parse_join_parts() 2204 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2205 join = self._match(TokenType.JOIN) 2206 2207 if not skip_join_token and not join: 2208 self._retreat(index) 2209 kind = None 2210 method = None 2211 side = None 2212 2213 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2214 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2215 2216 if not skip_join_token and not join and not outer_apply and not cross_apply: 2217 return None 2218 2219 if outer_apply: 2220 side = Token(TokenType.LEFT, "LEFT") 2221 2222 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2223 2224 if method: 2225 kwargs["method"] = method.text 2226 if side: 2227 kwargs["side"] = side.text 2228 if kind: 2229 kwargs["kind"] = kind.text 2230 if hint: 2231 kwargs["hint"] = hint 2232 2233 if self._match(TokenType.ON): 2234 kwargs["on"] = self._parse_conjunction() 2235 elif self._match(TokenType.USING): 2236 kwargs["using"] = self._parse_wrapped_id_vars() 2237 2238 return self.expression(exp.Join, **kwargs) 2239 2240 def _parse_index( 2241 self, 2242 index: t.Optional[exp.Expression] = None, 2243 ) -> t.Optional[exp.Index]: 2244 if index: 2245 unique = None 2246 primary = None 2247 amp = None 2248 2249 self._match(TokenType.ON) 2250 self._match(TokenType.TABLE) # hive 2251 table = self._parse_table_parts(schema=True) 2252 else: 2253 unique = self._match(TokenType.UNIQUE) 2254 primary = self._match_text_seq("PRIMARY") 2255 amp = self._match_text_seq("AMP") 2256 2257 if not self._match(TokenType.INDEX): 2258 return None 2259 2260 index = self._parse_id_var() 2261 table = None 2262 2263 using = self._parse_field() if self._match(TokenType.USING) else None 2264 2265 if self._match(TokenType.L_PAREN, advance=False): 2266 columns = self._parse_wrapped_csv(self._parse_ordered) 2267 else: 2268 columns = None 2269 2270 return self.expression( 2271 exp.Index, 2272 this=index, 2273 table=table, 2274 using=using, 2275 columns=columns, 2276 unique=unique, 2277 primary=primary, 2278 amp=amp, 2279 partition_by=self._parse_partition_by(), 2280 ) 2281 2282 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2283 hints: t.List[exp.Expression] = [] 2284 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2285 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2286 hints.append( 2287 self.expression( 2288 exp.WithTableHint, 2289 expressions=self._parse_csv( 2290 lambda: self._parse_function() or self._parse_var(any_token=True) 2291 ), 2292 ) 2293 ) 2294 self._match_r_paren() 2295 else: 2296 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2297 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2298 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2299 2300 self._match_texts({"INDEX", "KEY"}) 2301 if self._match(TokenType.FOR): 2302 hint.set("target", self._advance_any() and self._prev.text.upper()) 2303 2304 hint.set("expressions", self._parse_wrapped_id_vars()) 2305 hints.append(hint) 2306 2307 return hints or None 2308 2309 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2310 return ( 2311 (not schema and self._parse_function(optional_parens=False)) 2312 or self._parse_id_var(any_token=False) 2313 or self._parse_string_as_identifier() 2314 or self._parse_placeholder() 2315 ) 2316 2317 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2318 catalog = None 2319 db = None 2320 table = self._parse_table_part(schema=schema) 2321 2322 while self._match(TokenType.DOT): 2323 if catalog: 2324 # This allows nesting the table in arbitrarily many dot expressions if needed 2325 table = self.expression( 2326 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2327 ) 2328 else: 2329 catalog = db 2330 db = table 2331 table = self._parse_table_part(schema=schema) 2332 2333 if not table: 2334 self.raise_error(f"Expected table name but got {self._curr}") 2335 2336 return self.expression( 2337 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2338 ) 2339 2340 def _parse_table( 2341 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2342 ) -> t.Optional[exp.Expression]: 2343 lateral = self._parse_lateral() 2344 if lateral: 2345 return lateral 2346 2347 unnest = self._parse_unnest() 2348 if unnest: 2349 return unnest 2350 2351 values = self._parse_derived_table_values() 2352 if values: 2353 return values 2354 2355 subquery = self._parse_select(table=True) 2356 if subquery: 2357 if not subquery.args.get("pivots"): 2358 subquery.set("pivots", self._parse_pivots()) 2359 return subquery 2360 2361 this: exp.Expression = self._parse_table_parts(schema=schema) 2362 2363 if schema: 2364 return self._parse_schema(this=this) 2365 2366 if self.ALIAS_POST_TABLESAMPLE: 2367 table_sample = self._parse_table_sample() 2368 2369 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2370 if alias: 2371 this.set("alias", alias) 2372 2373 if not this.args.get("pivots"): 2374 this.set("pivots", self._parse_pivots()) 2375 2376 this.set("hints", self._parse_table_hints()) 2377 2378 if not self.ALIAS_POST_TABLESAMPLE: 2379 table_sample = self._parse_table_sample() 2380 2381 if table_sample: 2382 table_sample.set("this", this) 2383 this = table_sample 2384 2385 return this 2386 2387 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2388 if not self._match(TokenType.UNNEST): 2389 return None 2390 2391 expressions = self._parse_wrapped_csv(self._parse_type) 2392 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2393 2394 alias = self._parse_table_alias() if with_alias else None 2395 2396 if alias and self.UNNEST_COLUMN_ONLY: 2397 if alias.args.get("columns"): 2398 self.raise_error("Unexpected extra column alias in unnest.") 2399 2400 alias.set("columns", [alias.this]) 2401 alias.set("this", None) 2402 2403 offset = None 2404 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2405 self._match(TokenType.ALIAS) 2406 offset = self._parse_id_var() or exp.to_identifier("offset") 2407 2408 return self.expression( 2409 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2410 ) 2411 2412 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2413 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2414 if not is_derived and not self._match(TokenType.VALUES): 2415 return None 2416 2417 expressions = self._parse_csv(self._parse_value) 2418 alias = self._parse_table_alias() 2419 2420 if is_derived: 2421 self._match_r_paren() 2422 2423 return self.expression( 2424 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2425 ) 2426 2427 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2428 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2429 as_modifier and self._match_text_seq("USING", "SAMPLE") 2430 ): 2431 return None 2432 2433 bucket_numerator = None 2434 bucket_denominator = None 2435 bucket_field = None 2436 percent = None 2437 rows = None 2438 size = None 2439 seed = None 2440 2441 kind = ( 2442 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2443 ) 2444 method = self._parse_var(tokens=(TokenType.ROW,)) 2445 2446 self._match(TokenType.L_PAREN) 2447 2448 num = self._parse_number() 2449 2450 if self._match_text_seq("BUCKET"): 2451 bucket_numerator = self._parse_number() 2452 self._match_text_seq("OUT", "OF") 2453 bucket_denominator = bucket_denominator = self._parse_number() 2454 self._match(TokenType.ON) 2455 bucket_field = self._parse_field() 2456 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2457 percent = num 2458 elif self._match(TokenType.ROWS): 2459 rows = num 2460 else: 2461 size = num 2462 2463 self._match(TokenType.R_PAREN) 2464 2465 if self._match(TokenType.L_PAREN): 2466 method = self._parse_var() 2467 seed = self._match(TokenType.COMMA) and self._parse_number() 2468 self._match_r_paren() 2469 elif self._match_texts(("SEED", "REPEATABLE")): 2470 seed = self._parse_wrapped(self._parse_number) 2471 2472 return self.expression( 2473 exp.TableSample, 2474 method=method, 2475 bucket_numerator=bucket_numerator, 2476 bucket_denominator=bucket_denominator, 2477 bucket_field=bucket_field, 2478 percent=percent, 2479 rows=rows, 2480 size=size, 2481 seed=seed, 2482 kind=kind, 2483 ) 2484 2485 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2486 return list(iter(self._parse_pivot, None)) 2487 2488 # https://duckdb.org/docs/sql/statements/pivot 2489 def _parse_simplified_pivot(self) -> exp.Pivot: 2490 def _parse_on() -> t.Optional[exp.Expression]: 2491 this = self._parse_bitwise() 2492 return self._parse_in(this) if self._match(TokenType.IN) else this 2493 2494 this = self._parse_table() 2495 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2496 using = self._match(TokenType.USING) and self._parse_csv( 2497 lambda: self._parse_alias(self._parse_function()) 2498 ) 2499 group = self._parse_group() 2500 return self.expression( 2501 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2502 ) 2503 2504 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2505 index = self._index 2506 2507 if self._match(TokenType.PIVOT): 2508 unpivot = False 2509 elif self._match(TokenType.UNPIVOT): 2510 unpivot = True 2511 else: 2512 return None 2513 2514 expressions = [] 2515 field = None 2516 2517 if not self._match(TokenType.L_PAREN): 2518 self._retreat(index) 2519 return None 2520 2521 if unpivot: 2522 expressions = self._parse_csv(self._parse_column) 2523 else: 2524 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2525 2526 if not expressions: 2527 self.raise_error("Failed to parse PIVOT's aggregation list") 2528 2529 if not self._match(TokenType.FOR): 2530 self.raise_error("Expecting FOR") 2531 2532 value = self._parse_column() 2533 2534 if not self._match(TokenType.IN): 2535 self.raise_error("Expecting IN") 2536 2537 field = self._parse_in(value, alias=True) 2538 2539 self._match_r_paren() 2540 2541 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2542 2543 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2544 pivot.set("alias", self._parse_table_alias()) 2545 2546 if not unpivot: 2547 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2548 2549 columns: t.List[exp.Expression] = [] 2550 for fld in pivot.args["field"].expressions: 2551 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2552 for name in names: 2553 if self.PREFIXED_PIVOT_COLUMNS: 2554 name = f"{name}_{field_name}" if name else field_name 2555 else: 2556 name = f"{field_name}_{name}" if name else field_name 2557 2558 columns.append(exp.to_identifier(name)) 2559 2560 pivot.set("columns", columns) 2561 2562 return pivot 2563 2564 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2565 return [agg.alias for agg in aggregations] 2566 2567 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2568 if not skip_where_token and not self._match(TokenType.WHERE): 2569 return None 2570 2571 return self.expression( 2572 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2573 ) 2574 2575 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2576 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2577 return None 2578 2579 elements = defaultdict(list) 2580 2581 while True: 2582 expressions = self._parse_csv(self._parse_conjunction) 2583 if expressions: 2584 elements["expressions"].extend(expressions) 2585 2586 grouping_sets = self._parse_grouping_sets() 2587 if grouping_sets: 2588 elements["grouping_sets"].extend(grouping_sets) 2589 2590 rollup = None 2591 cube = None 2592 totals = None 2593 2594 with_ = self._match(TokenType.WITH) 2595 if self._match(TokenType.ROLLUP): 2596 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2597 elements["rollup"].extend(ensure_list(rollup)) 2598 2599 if self._match(TokenType.CUBE): 2600 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2601 elements["cube"].extend(ensure_list(cube)) 2602 2603 if self._match_text_seq("TOTALS"): 2604 totals = True 2605 elements["totals"] = True # type: ignore 2606 2607 if not (grouping_sets or rollup or cube or totals): 2608 break 2609 2610 return self.expression(exp.Group, **elements) # type: ignore 2611 2612 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2613 if not self._match(TokenType.GROUPING_SETS): 2614 return None 2615 2616 return self._parse_wrapped_csv(self._parse_grouping_set) 2617 2618 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2619 if self._match(TokenType.L_PAREN): 2620 grouping_set = self._parse_csv(self._parse_column) 2621 self._match_r_paren() 2622 return self.expression(exp.Tuple, expressions=grouping_set) 2623 2624 return self._parse_column() 2625 2626 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2627 if not skip_having_token and not self._match(TokenType.HAVING): 2628 return None 2629 return self.expression(exp.Having, this=self._parse_conjunction()) 2630 2631 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2632 if not self._match(TokenType.QUALIFY): 2633 return None 2634 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2635 2636 def _parse_order( 2637 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2638 ) -> t.Optional[exp.Expression]: 2639 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2640 return this 2641 2642 return self.expression( 2643 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2644 ) 2645 2646 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2647 if not self._match(token): 2648 return None 2649 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2650 2651 def _parse_ordered(self) -> exp.Ordered: 2652 this = self._parse_conjunction() 2653 self._match(TokenType.ASC) 2654 2655 is_desc = self._match(TokenType.DESC) 2656 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2657 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2658 desc = is_desc or False 2659 asc = not desc 2660 nulls_first = is_nulls_first or False 2661 explicitly_null_ordered = is_nulls_first or is_nulls_last 2662 2663 if ( 2664 not explicitly_null_ordered 2665 and ( 2666 (asc and self.NULL_ORDERING == "nulls_are_small") 2667 or (desc and self.NULL_ORDERING != "nulls_are_small") 2668 ) 2669 and self.NULL_ORDERING != "nulls_are_last" 2670 ): 2671 nulls_first = True 2672 2673 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2674 2675 def _parse_limit( 2676 self, this: t.Optional[exp.Expression] = None, top: bool = False 2677 ) -> t.Optional[exp.Expression]: 2678 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2679 limit_paren = self._match(TokenType.L_PAREN) 2680 expression = self._parse_number() if top else self._parse_term() 2681 2682 if self._match(TokenType.COMMA): 2683 offset = expression 2684 expression = self._parse_term() 2685 else: 2686 offset = None 2687 2688 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2689 2690 if limit_paren: 2691 self._match_r_paren() 2692 2693 return limit_exp 2694 2695 if self._match(TokenType.FETCH): 2696 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2697 direction = self._prev.text if direction else "FIRST" 2698 2699 count = self._parse_number() 2700 percent = self._match(TokenType.PERCENT) 2701 2702 self._match_set((TokenType.ROW, TokenType.ROWS)) 2703 2704 only = self._match_text_seq("ONLY") 2705 with_ties = self._match_text_seq("WITH", "TIES") 2706 2707 if only and with_ties: 2708 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2709 2710 return self.expression( 2711 exp.Fetch, 2712 direction=direction, 2713 count=count, 2714 percent=percent, 2715 with_ties=with_ties, 2716 ) 2717 2718 return this 2719 2720 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2721 if not self._match(TokenType.OFFSET): 2722 return this 2723 2724 count = self._parse_number() 2725 self._match_set((TokenType.ROW, TokenType.ROWS)) 2726 return self.expression(exp.Offset, this=this, expression=count) 2727 2728 def _parse_locks(self) -> t.List[exp.Lock]: 2729 locks = [] 2730 while True: 2731 if self._match_text_seq("FOR", "UPDATE"): 2732 update = True 2733 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2734 "LOCK", "IN", "SHARE", "MODE" 2735 ): 2736 update = False 2737 else: 2738 break 2739 2740 expressions = None 2741 if self._match_text_seq("OF"): 2742 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2743 2744 wait: t.Optional[bool | exp.Expression] = None 2745 if self._match_text_seq("NOWAIT"): 2746 wait = True 2747 elif self._match_text_seq("WAIT"): 2748 wait = self._parse_primary() 2749 elif self._match_text_seq("SKIP", "LOCKED"): 2750 wait = False 2751 2752 locks.append( 2753 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2754 ) 2755 2756 return locks 2757 2758 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2759 if not self._match_set(self.SET_OPERATIONS): 2760 return this 2761 2762 token_type = self._prev.token_type 2763 2764 if token_type == TokenType.UNION: 2765 expression = exp.Union 2766 elif token_type == TokenType.EXCEPT: 2767 expression = exp.Except 2768 else: 2769 expression = exp.Intersect 2770 2771 return self.expression( 2772 expression, 2773 this=this, 2774 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2775 expression=self._parse_set_operations(self._parse_select(nested=True)), 2776 ) 2777 2778 def _parse_expression(self) -> t.Optional[exp.Expression]: 2779 return self._parse_alias(self._parse_conjunction()) 2780 2781 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2782 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2783 2784 def _parse_equality(self) -> t.Optional[exp.Expression]: 2785 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2786 2787 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2788 return self._parse_tokens(self._parse_range, self.COMPARISON) 2789 2790 def _parse_range(self) -> t.Optional[exp.Expression]: 2791 this = self._parse_bitwise() 2792 negate = self._match(TokenType.NOT) 2793 2794 if self._match_set(self.RANGE_PARSERS): 2795 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2796 if not expression: 2797 return this 2798 2799 this = expression 2800 elif self._match(TokenType.ISNULL): 2801 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2802 2803 # Postgres supports ISNULL and NOTNULL for conditions. 2804 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2805 if self._match(TokenType.NOTNULL): 2806 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2807 this = self.expression(exp.Not, this=this) 2808 2809 if negate: 2810 this = self.expression(exp.Not, this=this) 2811 2812 if self._match(TokenType.IS): 2813 this = self._parse_is(this) 2814 2815 return this 2816 2817 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2818 index = self._index - 1 2819 negate = self._match(TokenType.NOT) 2820 2821 if self._match_text_seq("DISTINCT", "FROM"): 2822 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2823 return self.expression(klass, this=this, expression=self._parse_expression()) 2824 2825 expression = self._parse_null() or self._parse_boolean() 2826 if not expression: 2827 self._retreat(index) 2828 return None 2829 2830 this = self.expression(exp.Is, this=this, expression=expression) 2831 return self.expression(exp.Not, this=this) if negate else this 2832 2833 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2834 unnest = self._parse_unnest(with_alias=False) 2835 if unnest: 2836 this = self.expression(exp.In, this=this, unnest=unnest) 2837 elif self._match(TokenType.L_PAREN): 2838 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2839 2840 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2841 this = self.expression(exp.In, this=this, query=expressions[0]) 2842 else: 2843 this = self.expression(exp.In, this=this, expressions=expressions) 2844 2845 self._match_r_paren(this) 2846 else: 2847 this = self.expression(exp.In, this=this, field=self._parse_field()) 2848 2849 return this 2850 2851 def _parse_between(self, this: exp.Expression) -> exp.Between: 2852 low = self._parse_bitwise() 2853 self._match(TokenType.AND) 2854 high = self._parse_bitwise() 2855 return self.expression(exp.Between, this=this, low=low, high=high) 2856 2857 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2858 if not self._match(TokenType.ESCAPE): 2859 return this 2860 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2861 2862 def _parse_interval(self) -> t.Optional[exp.Interval]: 2863 if not self._match(TokenType.INTERVAL): 2864 return None 2865 2866 if self._match(TokenType.STRING, advance=False): 2867 this = self._parse_primary() 2868 else: 2869 this = self._parse_term() 2870 2871 unit = self._parse_function() or self._parse_var() 2872 2873 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2874 # each INTERVAL expression into this canonical form so it's easy to transpile 2875 if this and this.is_number: 2876 this = exp.Literal.string(this.name) 2877 elif this and this.is_string: 2878 parts = this.name.split() 2879 2880 if len(parts) == 2: 2881 if unit: 2882 # this is not actually a unit, it's something else 2883 unit = None 2884 self._retreat(self._index - 1) 2885 else: 2886 this = exp.Literal.string(parts[0]) 2887 unit = self.expression(exp.Var, this=parts[1]) 2888 2889 return self.expression(exp.Interval, this=this, unit=unit) 2890 2891 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2892 this = self._parse_term() 2893 2894 while True: 2895 if self._match_set(self.BITWISE): 2896 this = self.expression( 2897 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2898 ) 2899 elif self._match_pair(TokenType.LT, TokenType.LT): 2900 this = self.expression( 2901 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2902 ) 2903 elif self._match_pair(TokenType.GT, TokenType.GT): 2904 this = self.expression( 2905 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2906 ) 2907 else: 2908 break 2909 2910 return this 2911 2912 def _parse_term(self) -> t.Optional[exp.Expression]: 2913 return self._parse_tokens(self._parse_factor, self.TERM) 2914 2915 def _parse_factor(self) -> t.Optional[exp.Expression]: 2916 return self._parse_tokens(self._parse_unary, self.FACTOR) 2917 2918 def _parse_unary(self) -> t.Optional[exp.Expression]: 2919 if self._match_set(self.UNARY_PARSERS): 2920 return self.UNARY_PARSERS[self._prev.token_type](self) 2921 return self._parse_at_time_zone(self._parse_type()) 2922 2923 def _parse_type(self) -> t.Optional[exp.Expression]: 2924 interval = self._parse_interval() 2925 if interval: 2926 return interval 2927 2928 index = self._index 2929 data_type = self._parse_types(check_func=True) 2930 this = self._parse_column() 2931 2932 if data_type: 2933 if isinstance(this, exp.Literal): 2934 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2935 if parser: 2936 return parser(self, this, data_type) 2937 return self.expression(exp.Cast, this=this, to=data_type) 2938 if not data_type.expressions: 2939 self._retreat(index) 2940 return self._parse_column() 2941 return self._parse_column_ops(data_type) 2942 2943 return this 2944 2945 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2946 this = self._parse_type() 2947 if not this: 2948 return None 2949 2950 return self.expression( 2951 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2952 ) 2953 2954 def _parse_types( 2955 self, check_func: bool = False, schema: bool = False 2956 ) -> t.Optional[exp.Expression]: 2957 index = self._index 2958 2959 prefix = self._match_text_seq("SYSUDTLIB", ".") 2960 2961 if not self._match_set(self.TYPE_TOKENS): 2962 return None 2963 2964 type_token = self._prev.token_type 2965 2966 if type_token == TokenType.PSEUDO_TYPE: 2967 return self.expression(exp.PseudoType, this=self._prev.text) 2968 2969 nested = type_token in self.NESTED_TYPE_TOKENS 2970 is_struct = type_token == TokenType.STRUCT 2971 expressions = None 2972 maybe_func = False 2973 2974 if self._match(TokenType.L_PAREN): 2975 if is_struct: 2976 expressions = self._parse_csv(self._parse_struct_types) 2977 elif nested: 2978 expressions = self._parse_csv( 2979 lambda: self._parse_types(check_func=check_func, schema=schema) 2980 ) 2981 elif type_token in self.ENUM_TYPE_TOKENS: 2982 expressions = self._parse_csv(self._parse_primary) 2983 else: 2984 expressions = self._parse_csv(self._parse_type_size) 2985 2986 if not expressions or not self._match(TokenType.R_PAREN): 2987 self._retreat(index) 2988 return None 2989 2990 maybe_func = True 2991 2992 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2993 this = exp.DataType( 2994 this=exp.DataType.Type.ARRAY, 2995 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2996 nested=True, 2997 ) 2998 2999 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3000 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3001 3002 return this 3003 3004 if self._match(TokenType.L_BRACKET): 3005 self._retreat(index) 3006 return None 3007 3008 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3009 if nested and self._match(TokenType.LT): 3010 if is_struct: 3011 expressions = self._parse_csv(self._parse_struct_types) 3012 else: 3013 expressions = self._parse_csv( 3014 lambda: self._parse_types(check_func=check_func, schema=schema) 3015 ) 3016 3017 if not self._match(TokenType.GT): 3018 self.raise_error("Expecting >") 3019 3020 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3021 values = self._parse_csv(self._parse_conjunction) 3022 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3023 3024 value: t.Optional[exp.Expression] = None 3025 if type_token in self.TIMESTAMPS: 3026 if self._match_text_seq("WITH", "TIME", "ZONE"): 3027 maybe_func = False 3028 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3029 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3030 maybe_func = False 3031 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3032 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3033 maybe_func = False 3034 elif type_token == TokenType.INTERVAL: 3035 unit = self._parse_var() 3036 3037 if not unit: 3038 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3039 else: 3040 value = self.expression(exp.Interval, unit=unit) 3041 3042 if maybe_func and check_func: 3043 index2 = self._index 3044 peek = self._parse_string() 3045 3046 if not peek: 3047 self._retreat(index) 3048 return None 3049 3050 self._retreat(index2) 3051 3052 if value: 3053 return value 3054 3055 return exp.DataType( 3056 this=exp.DataType.Type[type_token.value.upper()], 3057 expressions=expressions, 3058 nested=nested, 3059 values=values, 3060 prefix=prefix, 3061 ) 3062 3063 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3064 this = self._parse_type() or self._parse_id_var() 3065 self._match(TokenType.COLON) 3066 return self._parse_column_def(this) 3067 3068 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3069 if not self._match_text_seq("AT", "TIME", "ZONE"): 3070 return this 3071 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3072 3073 def _parse_column(self) -> t.Optional[exp.Expression]: 3074 this = self._parse_field() 3075 if isinstance(this, exp.Identifier): 3076 this = self.expression(exp.Column, this=this) 3077 elif not this: 3078 return self._parse_bracket(this) 3079 return self._parse_column_ops(this) 3080 3081 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3082 this = self._parse_bracket(this) 3083 3084 while self._match_set(self.COLUMN_OPERATORS): 3085 op_token = self._prev.token_type 3086 op = self.COLUMN_OPERATORS.get(op_token) 3087 3088 if op_token == TokenType.DCOLON: 3089 field = self._parse_types() 3090 if not field: 3091 self.raise_error("Expected type") 3092 elif op and self._curr: 3093 self._advance() 3094 value = self._prev.text 3095 field = ( 3096 exp.Literal.number(value) 3097 if self._prev.token_type == TokenType.NUMBER 3098 else exp.Literal.string(value) 3099 ) 3100 else: 3101 field = self._parse_field(anonymous_func=True, any_token=True) 3102 3103 if isinstance(field, exp.Func): 3104 # bigquery allows function calls like x.y.count(...) 3105 # SAFE.SUBSTR(...) 3106 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3107 this = self._replace_columns_with_dots(this) 3108 3109 if op: 3110 this = op(self, this, field) 3111 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3112 this = self.expression( 3113 exp.Column, 3114 this=field, 3115 table=this.this, 3116 db=this.args.get("table"), 3117 catalog=this.args.get("db"), 3118 ) 3119 else: 3120 this = self.expression(exp.Dot, this=this, expression=field) 3121 this = self._parse_bracket(this) 3122 return this 3123 3124 def _parse_primary(self) -> t.Optional[exp.Expression]: 3125 if self._match_set(self.PRIMARY_PARSERS): 3126 token_type = self._prev.token_type 3127 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3128 3129 if token_type == TokenType.STRING: 3130 expressions = [primary] 3131 while self._match(TokenType.STRING): 3132 expressions.append(exp.Literal.string(self._prev.text)) 3133 3134 if len(expressions) > 1: 3135 return self.expression(exp.Concat, expressions=expressions) 3136 3137 return primary 3138 3139 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3140 return exp.Literal.number(f"0.{self._prev.text}") 3141 3142 if self._match(TokenType.L_PAREN): 3143 comments = self._prev_comments 3144 query = self._parse_select() 3145 3146 if query: 3147 expressions = [query] 3148 else: 3149 expressions = self._parse_csv(self._parse_expression) 3150 3151 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3152 3153 if isinstance(this, exp.Subqueryable): 3154 this = self._parse_set_operations( 3155 self._parse_subquery(this=this, parse_alias=False) 3156 ) 3157 elif len(expressions) > 1: 3158 this = self.expression(exp.Tuple, expressions=expressions) 3159 else: 3160 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3161 3162 if this: 3163 this.add_comments(comments) 3164 3165 self._match_r_paren(expression=this) 3166 return this 3167 3168 return None 3169 3170 def _parse_field( 3171 self, 3172 any_token: bool = False, 3173 tokens: t.Optional[t.Collection[TokenType]] = None, 3174 anonymous_func: bool = False, 3175 ) -> t.Optional[exp.Expression]: 3176 return ( 3177 self._parse_primary() 3178 or self._parse_function(anonymous=anonymous_func) 3179 or self._parse_id_var(any_token=any_token, tokens=tokens) 3180 ) 3181 3182 def _parse_function( 3183 self, 3184 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3185 anonymous: bool = False, 3186 optional_parens: bool = True, 3187 ) -> t.Optional[exp.Expression]: 3188 if not self._curr: 3189 return None 3190 3191 token_type = self._curr.token_type 3192 3193 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3194 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3195 3196 if not self._next or self._next.token_type != TokenType.L_PAREN: 3197 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3198 self._advance() 3199 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3200 3201 return None 3202 3203 if token_type not in self.FUNC_TOKENS: 3204 return None 3205 3206 this = self._curr.text 3207 upper = this.upper() 3208 self._advance(2) 3209 3210 parser = self.FUNCTION_PARSERS.get(upper) 3211 3212 if parser and not anonymous: 3213 this = parser(self) 3214 else: 3215 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3216 3217 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3218 this = self.expression(subquery_predicate, this=self._parse_select()) 3219 self._match_r_paren() 3220 return this 3221 3222 if functions is None: 3223 functions = self.FUNCTIONS 3224 3225 function = functions.get(upper) 3226 3227 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3228 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3229 3230 if function and not anonymous: 3231 this = self.validate_expression(function(args), args) 3232 else: 3233 this = self.expression(exp.Anonymous, this=this, expressions=args) 3234 3235 self._match_r_paren(this) 3236 return self._parse_window(this) 3237 3238 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3239 return self._parse_column_def(self._parse_id_var()) 3240 3241 def _parse_user_defined_function( 3242 self, kind: t.Optional[TokenType] = None 3243 ) -> t.Optional[exp.Expression]: 3244 this = self._parse_id_var() 3245 3246 while self._match(TokenType.DOT): 3247 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3248 3249 if not self._match(TokenType.L_PAREN): 3250 return this 3251 3252 expressions = self._parse_csv(self._parse_function_parameter) 3253 self._match_r_paren() 3254 return self.expression( 3255 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3256 ) 3257 3258 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3259 literal = self._parse_primary() 3260 if literal: 3261 return self.expression(exp.Introducer, this=token.text, expression=literal) 3262 3263 return self.expression(exp.Identifier, this=token.text) 3264 3265 def _parse_session_parameter(self) -> exp.SessionParameter: 3266 kind = None 3267 this = self._parse_id_var() or self._parse_primary() 3268 3269 if this and self._match(TokenType.DOT): 3270 kind = this.name 3271 this = self._parse_var() or self._parse_primary() 3272 3273 return self.expression(exp.SessionParameter, this=this, kind=kind) 3274 3275 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3276 index = self._index 3277 3278 if self._match(TokenType.L_PAREN): 3279 expressions = self._parse_csv(self._parse_id_var) 3280 3281 if not self._match(TokenType.R_PAREN): 3282 self._retreat(index) 3283 else: 3284 expressions = [self._parse_id_var()] 3285 3286 if self._match_set(self.LAMBDAS): 3287 return self.LAMBDAS[self._prev.token_type](self, expressions) 3288 3289 self._retreat(index) 3290 3291 this: t.Optional[exp.Expression] 3292 3293 if self._match(TokenType.DISTINCT): 3294 this = self.expression( 3295 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3296 ) 3297 else: 3298 this = self._parse_select_or_expression(alias=alias) 3299 3300 if isinstance(this, exp.EQ): 3301 left = this.this 3302 if isinstance(left, exp.Column): 3303 left.replace(exp.var(left.text("this"))) 3304 3305 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3306 3307 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3308 index = self._index 3309 3310 if not self.errors: 3311 try: 3312 if self._parse_select(nested=True): 3313 return this 3314 except ParseError: 3315 pass 3316 finally: 3317 self.errors.clear() 3318 self._retreat(index) 3319 3320 if not self._match(TokenType.L_PAREN): 3321 return this 3322 3323 args = self._parse_csv( 3324 lambda: self._parse_constraint() 3325 or self._parse_column_def(self._parse_field(any_token=True)) 3326 ) 3327 3328 self._match_r_paren() 3329 return self.expression(exp.Schema, this=this, expressions=args) 3330 3331 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3332 # column defs are not really columns, they're identifiers 3333 if isinstance(this, exp.Column): 3334 this = this.this 3335 3336 kind = self._parse_types(schema=True) 3337 3338 if self._match_text_seq("FOR", "ORDINALITY"): 3339 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3340 3341 constraints = [] 3342 while True: 3343 constraint = self._parse_column_constraint() 3344 if not constraint: 3345 break 3346 constraints.append(constraint) 3347 3348 if not kind and not constraints: 3349 return this 3350 3351 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3352 3353 def _parse_auto_increment( 3354 self, 3355 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3356 start = None 3357 increment = None 3358 3359 if self._match(TokenType.L_PAREN, advance=False): 3360 args = self._parse_wrapped_csv(self._parse_bitwise) 3361 start = seq_get(args, 0) 3362 increment = seq_get(args, 1) 3363 elif self._match_text_seq("START"): 3364 start = self._parse_bitwise() 3365 self._match_text_seq("INCREMENT") 3366 increment = self._parse_bitwise() 3367 3368 if start and increment: 3369 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3370 3371 return exp.AutoIncrementColumnConstraint() 3372 3373 def _parse_compress(self) -> exp.CompressColumnConstraint: 3374 if self._match(TokenType.L_PAREN, advance=False): 3375 return self.expression( 3376 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3377 ) 3378 3379 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3380 3381 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3382 if self._match_text_seq("BY", "DEFAULT"): 3383 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3384 this = self.expression( 3385 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3386 ) 3387 else: 3388 self._match_text_seq("ALWAYS") 3389 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3390 3391 self._match(TokenType.ALIAS) 3392 identity = self._match_text_seq("IDENTITY") 3393 3394 if self._match(TokenType.L_PAREN): 3395 if self._match_text_seq("START", "WITH"): 3396 this.set("start", self._parse_bitwise()) 3397 if self._match_text_seq("INCREMENT", "BY"): 3398 this.set("increment", self._parse_bitwise()) 3399 if self._match_text_seq("MINVALUE"): 3400 this.set("minvalue", self._parse_bitwise()) 3401 if self._match_text_seq("MAXVALUE"): 3402 this.set("maxvalue", self._parse_bitwise()) 3403 3404 if self._match_text_seq("CYCLE"): 3405 this.set("cycle", True) 3406 elif self._match_text_seq("NO", "CYCLE"): 3407 this.set("cycle", False) 3408 3409 if not identity: 3410 this.set("expression", self._parse_bitwise()) 3411 3412 self._match_r_paren() 3413 3414 return this 3415 3416 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3417 self._match_text_seq("LENGTH") 3418 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3419 3420 def _parse_not_constraint( 3421 self, 3422 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3423 if self._match_text_seq("NULL"): 3424 return self.expression(exp.NotNullColumnConstraint) 3425 if self._match_text_seq("CASESPECIFIC"): 3426 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3427 return None 3428 3429 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3430 if self._match(TokenType.CONSTRAINT): 3431 this = self._parse_id_var() 3432 else: 3433 this = None 3434 3435 if self._match_texts(self.CONSTRAINT_PARSERS): 3436 return self.expression( 3437 exp.ColumnConstraint, 3438 this=this, 3439 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3440 ) 3441 3442 return this 3443 3444 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3445 if not self._match(TokenType.CONSTRAINT): 3446 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3447 3448 this = self._parse_id_var() 3449 expressions = [] 3450 3451 while True: 3452 constraint = self._parse_unnamed_constraint() or self._parse_function() 3453 if not constraint: 3454 break 3455 expressions.append(constraint) 3456 3457 return self.expression(exp.Constraint, this=this, expressions=expressions) 3458 3459 def _parse_unnamed_constraint( 3460 self, constraints: t.Optional[t.Collection[str]] = None 3461 ) -> t.Optional[exp.Expression]: 3462 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3463 return None 3464 3465 constraint = self._prev.text.upper() 3466 if constraint not in self.CONSTRAINT_PARSERS: 3467 self.raise_error(f"No parser found for schema constraint {constraint}.") 3468 3469 return self.CONSTRAINT_PARSERS[constraint](self) 3470 3471 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3472 self._match_text_seq("KEY") 3473 return self.expression( 3474 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3475 ) 3476 3477 def _parse_key_constraint_options(self) -> t.List[str]: 3478 options = [] 3479 while True: 3480 if not self._curr: 3481 break 3482 3483 if self._match(TokenType.ON): 3484 action = None 3485 on = self._advance_any() and self._prev.text 3486 3487 if self._match_text_seq("NO", "ACTION"): 3488 action = "NO ACTION" 3489 elif self._match_text_seq("CASCADE"): 3490 action = "CASCADE" 3491 elif self._match_pair(TokenType.SET, TokenType.NULL): 3492 action = "SET NULL" 3493 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3494 action = "SET DEFAULT" 3495 else: 3496 self.raise_error("Invalid key constraint") 3497 3498 options.append(f"ON {on} {action}") 3499 elif self._match_text_seq("NOT", "ENFORCED"): 3500 options.append("NOT ENFORCED") 3501 elif self._match_text_seq("DEFERRABLE"): 3502 options.append("DEFERRABLE") 3503 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3504 options.append("INITIALLY DEFERRED") 3505 elif self._match_text_seq("NORELY"): 3506 options.append("NORELY") 3507 elif self._match_text_seq("MATCH", "FULL"): 3508 options.append("MATCH FULL") 3509 else: 3510 break 3511 3512 return options 3513 3514 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3515 if match and not self._match(TokenType.REFERENCES): 3516 return None 3517 3518 expressions = None 3519 this = self._parse_id_var() 3520 3521 if self._match(TokenType.L_PAREN, advance=False): 3522 expressions = self._parse_wrapped_id_vars() 3523 3524 options = self._parse_key_constraint_options() 3525 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3526 3527 def _parse_foreign_key(self) -> exp.ForeignKey: 3528 expressions = self._parse_wrapped_id_vars() 3529 reference = self._parse_references() 3530 options = {} 3531 3532 while self._match(TokenType.ON): 3533 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3534 self.raise_error("Expected DELETE or UPDATE") 3535 3536 kind = self._prev.text.lower() 3537 3538 if self._match_text_seq("NO", "ACTION"): 3539 action = "NO ACTION" 3540 elif self._match(TokenType.SET): 3541 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3542 action = "SET " + self._prev.text.upper() 3543 else: 3544 self._advance() 3545 action = self._prev.text.upper() 3546 3547 options[kind] = action 3548 3549 return self.expression( 3550 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3551 ) 3552 3553 def _parse_primary_key( 3554 self, wrapped_optional: bool = False, in_props: bool = False 3555 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3556 desc = ( 3557 self._match_set((TokenType.ASC, TokenType.DESC)) 3558 and self._prev.token_type == TokenType.DESC 3559 ) 3560 3561 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3562 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3563 3564 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3565 options = self._parse_key_constraint_options() 3566 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3567 3568 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3569 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3570 return this 3571 3572 bracket_kind = self._prev.token_type 3573 3574 if self._match(TokenType.COLON): 3575 expressions: t.List[t.Optional[exp.Expression]] = [ 3576 self.expression(exp.Slice, expression=self._parse_conjunction()) 3577 ] 3578 else: 3579 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3580 3581 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3582 if bracket_kind == TokenType.L_BRACE: 3583 this = self.expression(exp.Struct, expressions=expressions) 3584 elif not this or this.name.upper() == "ARRAY": 3585 this = self.expression(exp.Array, expressions=expressions) 3586 else: 3587 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3588 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3589 3590 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3591 self.raise_error("Expected ]") 3592 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3593 self.raise_error("Expected }") 3594 3595 self._add_comments(this) 3596 return self._parse_bracket(this) 3597 3598 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3599 if self._match(TokenType.COLON): 3600 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3601 return this 3602 3603 def _parse_case(self) -> t.Optional[exp.Expression]: 3604 ifs = [] 3605 default = None 3606 3607 expression = self._parse_conjunction() 3608 3609 while self._match(TokenType.WHEN): 3610 this = self._parse_conjunction() 3611 self._match(TokenType.THEN) 3612 then = self._parse_conjunction() 3613 ifs.append(self.expression(exp.If, this=this, true=then)) 3614 3615 if self._match(TokenType.ELSE): 3616 default = self._parse_conjunction() 3617 3618 if not self._match(TokenType.END): 3619 self.raise_error("Expected END after CASE", self._prev) 3620 3621 return self._parse_window( 3622 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3623 ) 3624 3625 def _parse_if(self) -> t.Optional[exp.Expression]: 3626 if self._match(TokenType.L_PAREN): 3627 args = self._parse_csv(self._parse_conjunction) 3628 this = self.validate_expression(exp.If.from_arg_list(args), args) 3629 self._match_r_paren() 3630 else: 3631 index = self._index - 1 3632 condition = self._parse_conjunction() 3633 3634 if not condition: 3635 self._retreat(index) 3636 return None 3637 3638 self._match(TokenType.THEN) 3639 true = self._parse_conjunction() 3640 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3641 self._match(TokenType.END) 3642 this = self.expression(exp.If, this=condition, true=true, false=false) 3643 3644 return self._parse_window(this) 3645 3646 def _parse_extract(self) -> exp.Extract: 3647 this = self._parse_function() or self._parse_var() or self._parse_type() 3648 3649 if self._match(TokenType.FROM): 3650 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3651 3652 if not self._match(TokenType.COMMA): 3653 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3654 3655 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3656 3657 def _parse_cast(self, strict: bool) -> exp.Expression: 3658 this = self._parse_conjunction() 3659 3660 if not self._match(TokenType.ALIAS): 3661 if self._match(TokenType.COMMA): 3662 return self.expression( 3663 exp.CastToStrType, this=this, expression=self._parse_string() 3664 ) 3665 else: 3666 self.raise_error("Expected AS after CAST") 3667 3668 to = self._parse_types() 3669 3670 if not to: 3671 self.raise_error("Expected TYPE after CAST") 3672 elif to.this == exp.DataType.Type.CHAR: 3673 if self._match(TokenType.CHARACTER_SET): 3674 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3675 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3676 fmt = self._parse_string() 3677 3678 return self.expression( 3679 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3680 this=this, 3681 format=exp.Literal.string( 3682 format_time( 3683 fmt.this if fmt else "", 3684 self.FORMAT_MAPPING or self.TIME_MAPPING, 3685 self.FORMAT_TRIE or self.TIME_TRIE, 3686 ) 3687 ), 3688 ) 3689 3690 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3691 3692 def _parse_concat(self) -> t.Optional[exp.Expression]: 3693 args = self._parse_csv(self._parse_conjunction) 3694 if self.CONCAT_NULL_OUTPUTS_STRING: 3695 args = [ 3696 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3697 for arg in args 3698 if arg 3699 ] 3700 3701 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3702 # we find such a call we replace it with its argument. 3703 if len(args) == 1: 3704 return args[0] 3705 3706 return self.expression( 3707 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3708 ) 3709 3710 def _parse_string_agg(self) -> exp.Expression: 3711 expression: t.Optional[exp.Expression] 3712 3713 if self._match(TokenType.DISTINCT): 3714 args = self._parse_csv(self._parse_conjunction) 3715 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3716 else: 3717 args = self._parse_csv(self._parse_conjunction) 3718 expression = seq_get(args, 0) 3719 3720 index = self._index 3721 if not self._match(TokenType.R_PAREN): 3722 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3723 order = self._parse_order(this=expression) 3724 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3725 3726 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3727 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3728 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3729 if not self._match_text_seq("WITHIN", "GROUP"): 3730 self._retreat(index) 3731 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3732 3733 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3734 order = self._parse_order(this=expression) 3735 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3736 3737 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3738 to: t.Optional[exp.Expression] 3739 this = self._parse_bitwise() 3740 3741 if self._match(TokenType.USING): 3742 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3743 elif self._match(TokenType.COMMA): 3744 to = self._parse_bitwise() 3745 else: 3746 to = None 3747 3748 # Swap the argument order if needed to produce the correct AST 3749 if self.CONVERT_TYPE_FIRST: 3750 this, to = to, this 3751 3752 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3753 3754 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3755 """ 3756 There are generally two variants of the DECODE function: 3757 3758 - DECODE(bin, charset) 3759 - DECODE(expression, search, result [, search, result] ... [, default]) 3760 3761 The second variant will always be parsed into a CASE expression. Note that NULL 3762 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3763 instead of relying on pattern matching. 3764 """ 3765 args = self._parse_csv(self._parse_conjunction) 3766 3767 if len(args) < 3: 3768 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3769 3770 expression, *expressions = args 3771 if not expression: 3772 return None 3773 3774 ifs = [] 3775 for search, result in zip(expressions[::2], expressions[1::2]): 3776 if not search or not result: 3777 return None 3778 3779 if isinstance(search, exp.Literal): 3780 ifs.append( 3781 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3782 ) 3783 elif isinstance(search, exp.Null): 3784 ifs.append( 3785 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3786 ) 3787 else: 3788 cond = exp.or_( 3789 exp.EQ(this=expression.copy(), expression=search), 3790 exp.and_( 3791 exp.Is(this=expression.copy(), expression=exp.Null()), 3792 exp.Is(this=search.copy(), expression=exp.Null()), 3793 copy=False, 3794 ), 3795 copy=False, 3796 ) 3797 ifs.append(exp.If(this=cond, true=result)) 3798 3799 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3800 3801 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3802 self._match_text_seq("KEY") 3803 key = self._parse_field() 3804 self._match(TokenType.COLON) 3805 self._match_text_seq("VALUE") 3806 value = self._parse_field() 3807 3808 if not key and not value: 3809 return None 3810 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3811 3812 def _parse_json_object(self) -> exp.JSONObject: 3813 star = self._parse_star() 3814 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3815 3816 null_handling = None 3817 if self._match_text_seq("NULL", "ON", "NULL"): 3818 null_handling = "NULL ON NULL" 3819 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3820 null_handling = "ABSENT ON NULL" 3821 3822 unique_keys = None 3823 if self._match_text_seq("WITH", "UNIQUE"): 3824 unique_keys = True 3825 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3826 unique_keys = False 3827 3828 self._match_text_seq("KEYS") 3829 3830 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3831 format_json = self._match_text_seq("FORMAT", "JSON") 3832 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3833 3834 return self.expression( 3835 exp.JSONObject, 3836 expressions=expressions, 3837 null_handling=null_handling, 3838 unique_keys=unique_keys, 3839 return_type=return_type, 3840 format_json=format_json, 3841 encoding=encoding, 3842 ) 3843 3844 def _parse_logarithm(self) -> exp.Func: 3845 # Default argument order is base, expression 3846 args = self._parse_csv(self._parse_range) 3847 3848 if len(args) > 1: 3849 if not self.LOG_BASE_FIRST: 3850 args.reverse() 3851 return exp.Log.from_arg_list(args) 3852 3853 return self.expression( 3854 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3855 ) 3856 3857 def _parse_match_against(self) -> exp.MatchAgainst: 3858 expressions = self._parse_csv(self._parse_column) 3859 3860 self._match_text_seq(")", "AGAINST", "(") 3861 3862 this = self._parse_string() 3863 3864 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3865 modifier = "IN NATURAL LANGUAGE MODE" 3866 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3867 modifier = f"{modifier} WITH QUERY EXPANSION" 3868 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3869 modifier = "IN BOOLEAN MODE" 3870 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3871 modifier = "WITH QUERY EXPANSION" 3872 else: 3873 modifier = None 3874 3875 return self.expression( 3876 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3877 ) 3878 3879 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3880 def _parse_open_json(self) -> exp.OpenJSON: 3881 this = self._parse_bitwise() 3882 path = self._match(TokenType.COMMA) and self._parse_string() 3883 3884 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3885 this = self._parse_field(any_token=True) 3886 kind = self._parse_types() 3887 path = self._parse_string() 3888 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3889 3890 return self.expression( 3891 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3892 ) 3893 3894 expressions = None 3895 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3896 self._match_l_paren() 3897 expressions = self._parse_csv(_parse_open_json_column_def) 3898 3899 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3900 3901 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3902 args = self._parse_csv(self._parse_bitwise) 3903 3904 if self._match(TokenType.IN): 3905 return self.expression( 3906 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3907 ) 3908 3909 if haystack_first: 3910 haystack = seq_get(args, 0) 3911 needle = seq_get(args, 1) 3912 else: 3913 needle = seq_get(args, 0) 3914 haystack = seq_get(args, 1) 3915 3916 return self.expression( 3917 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3918 ) 3919 3920 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3921 args = self._parse_csv(self._parse_table) 3922 return exp.JoinHint(this=func_name.upper(), expressions=args) 3923 3924 def _parse_substring(self) -> exp.Substring: 3925 # Postgres supports the form: substring(string [from int] [for int]) 3926 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3927 3928 args = self._parse_csv(self._parse_bitwise) 3929 3930 if self._match(TokenType.FROM): 3931 args.append(self._parse_bitwise()) 3932 if self._match(TokenType.FOR): 3933 args.append(self._parse_bitwise()) 3934 3935 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3936 3937 def _parse_trim(self) -> exp.Trim: 3938 # https://www.w3resource.com/sql/character-functions/trim.php 3939 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3940 3941 position = None 3942 collation = None 3943 3944 if self._match_texts(self.TRIM_TYPES): 3945 position = self._prev.text.upper() 3946 3947 expression = self._parse_bitwise() 3948 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3949 this = self._parse_bitwise() 3950 else: 3951 this = expression 3952 expression = None 3953 3954 if self._match(TokenType.COLLATE): 3955 collation = self._parse_bitwise() 3956 3957 return self.expression( 3958 exp.Trim, this=this, position=position, expression=expression, collation=collation 3959 ) 3960 3961 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3962 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3963 3964 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3965 return self._parse_window(self._parse_id_var(), alias=True) 3966 3967 def _parse_respect_or_ignore_nulls( 3968 self, this: t.Optional[exp.Expression] 3969 ) -> t.Optional[exp.Expression]: 3970 if self._match_text_seq("IGNORE", "NULLS"): 3971 return self.expression(exp.IgnoreNulls, this=this) 3972 if self._match_text_seq("RESPECT", "NULLS"): 3973 return self.expression(exp.RespectNulls, this=this) 3974 return this 3975 3976 def _parse_window( 3977 self, this: t.Optional[exp.Expression], alias: bool = False 3978 ) -> t.Optional[exp.Expression]: 3979 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3980 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3981 self._match_r_paren() 3982 3983 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3984 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3985 if self._match_text_seq("WITHIN", "GROUP"): 3986 order = self._parse_wrapped(self._parse_order) 3987 this = self.expression(exp.WithinGroup, this=this, expression=order) 3988 3989 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3990 # Some dialects choose to implement and some do not. 3991 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3992 3993 # There is some code above in _parse_lambda that handles 3994 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3995 3996 # The below changes handle 3997 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3998 3999 # Oracle allows both formats 4000 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4001 # and Snowflake chose to do the same for familiarity 4002 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4003 this = self._parse_respect_or_ignore_nulls(this) 4004 4005 # bigquery select from window x AS (partition by ...) 4006 if alias: 4007 over = None 4008 self._match(TokenType.ALIAS) 4009 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4010 return this 4011 else: 4012 over = self._prev.text.upper() 4013 4014 if not self._match(TokenType.L_PAREN): 4015 return self.expression( 4016 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4017 ) 4018 4019 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4020 4021 first = self._match(TokenType.FIRST) 4022 if self._match_text_seq("LAST"): 4023 first = False 4024 4025 partition = self._parse_partition_by() 4026 order = self._parse_order() 4027 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4028 4029 if kind: 4030 self._match(TokenType.BETWEEN) 4031 start = self._parse_window_spec() 4032 self._match(TokenType.AND) 4033 end = self._parse_window_spec() 4034 4035 spec = self.expression( 4036 exp.WindowSpec, 4037 kind=kind, 4038 start=start["value"], 4039 start_side=start["side"], 4040 end=end["value"], 4041 end_side=end["side"], 4042 ) 4043 else: 4044 spec = None 4045 4046 self._match_r_paren() 4047 4048 return self.expression( 4049 exp.Window, 4050 this=this, 4051 partition_by=partition, 4052 order=order, 4053 spec=spec, 4054 alias=window_alias, 4055 over=over, 4056 first=first, 4057 ) 4058 4059 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4060 self._match(TokenType.BETWEEN) 4061 4062 return { 4063 "value": ( 4064 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4065 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4066 or self._parse_bitwise() 4067 ), 4068 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4069 } 4070 4071 def _parse_alias( 4072 self, this: t.Optional[exp.Expression], explicit: bool = False 4073 ) -> t.Optional[exp.Expression]: 4074 any_token = self._match(TokenType.ALIAS) 4075 4076 if explicit and not any_token: 4077 return this 4078 4079 if self._match(TokenType.L_PAREN): 4080 aliases = self.expression( 4081 exp.Aliases, 4082 this=this, 4083 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4084 ) 4085 self._match_r_paren(aliases) 4086 return aliases 4087 4088 alias = self._parse_id_var(any_token) 4089 4090 if alias: 4091 return self.expression(exp.Alias, this=this, alias=alias) 4092 4093 return this 4094 4095 def _parse_id_var( 4096 self, 4097 any_token: bool = True, 4098 tokens: t.Optional[t.Collection[TokenType]] = None, 4099 ) -> t.Optional[exp.Expression]: 4100 identifier = self._parse_identifier() 4101 4102 if identifier: 4103 return identifier 4104 4105 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4106 quoted = self._prev.token_type == TokenType.STRING 4107 return exp.Identifier(this=self._prev.text, quoted=quoted) 4108 4109 return None 4110 4111 def _parse_string(self) -> t.Optional[exp.Expression]: 4112 if self._match(TokenType.STRING): 4113 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4114 return self._parse_placeholder() 4115 4116 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4117 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4118 4119 def _parse_number(self) -> t.Optional[exp.Expression]: 4120 if self._match(TokenType.NUMBER): 4121 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4122 return self._parse_placeholder() 4123 4124 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4125 if self._match(TokenType.IDENTIFIER): 4126 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4127 return self._parse_placeholder() 4128 4129 def _parse_var( 4130 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4131 ) -> t.Optional[exp.Expression]: 4132 if ( 4133 (any_token and self._advance_any()) 4134 or self._match(TokenType.VAR) 4135 or (self._match_set(tokens) if tokens else False) 4136 ): 4137 return self.expression(exp.Var, this=self._prev.text) 4138 return self._parse_placeholder() 4139 4140 def _advance_any(self) -> t.Optional[Token]: 4141 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4142 self._advance() 4143 return self._prev 4144 return None 4145 4146 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4147 return self._parse_var() or self._parse_string() 4148 4149 def _parse_null(self) -> t.Optional[exp.Expression]: 4150 if self._match(TokenType.NULL): 4151 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4152 return None 4153 4154 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4155 if self._match(TokenType.TRUE): 4156 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4157 if self._match(TokenType.FALSE): 4158 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4159 return None 4160 4161 def _parse_star(self) -> t.Optional[exp.Expression]: 4162 if self._match(TokenType.STAR): 4163 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4164 return None 4165 4166 def _parse_parameter(self) -> exp.Parameter: 4167 wrapped = self._match(TokenType.L_BRACE) 4168 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4169 self._match(TokenType.R_BRACE) 4170 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4171 4172 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4173 if self._match_set(self.PLACEHOLDER_PARSERS): 4174 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4175 if placeholder: 4176 return placeholder 4177 self._advance(-1) 4178 return None 4179 4180 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4181 if not self._match(TokenType.EXCEPT): 4182 return None 4183 if self._match(TokenType.L_PAREN, advance=False): 4184 return self._parse_wrapped_csv(self._parse_column) 4185 return self._parse_csv(self._parse_column) 4186 4187 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4188 if not self._match(TokenType.REPLACE): 4189 return None 4190 if self._match(TokenType.L_PAREN, advance=False): 4191 return self._parse_wrapped_csv(self._parse_expression) 4192 return self._parse_csv(self._parse_expression) 4193 4194 def _parse_csv( 4195 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4196 ) -> t.List[t.Optional[exp.Expression]]: 4197 parse_result = parse_method() 4198 items = [parse_result] if parse_result is not None else [] 4199 4200 while self._match(sep): 4201 self._add_comments(parse_result) 4202 parse_result = parse_method() 4203 if parse_result is not None: 4204 items.append(parse_result) 4205 4206 return items 4207 4208 def _parse_tokens( 4209 self, parse_method: t.Callable, expressions: t.Dict 4210 ) -> t.Optional[exp.Expression]: 4211 this = parse_method() 4212 4213 while self._match_set(expressions): 4214 this = self.expression( 4215 expressions[self._prev.token_type], 4216 this=this, 4217 comments=self._prev_comments, 4218 expression=parse_method(), 4219 ) 4220 4221 return this 4222 4223 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4224 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4225 4226 def _parse_wrapped_csv( 4227 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4228 ) -> t.List[t.Optional[exp.Expression]]: 4229 return self._parse_wrapped( 4230 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4231 ) 4232 4233 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4234 wrapped = self._match(TokenType.L_PAREN) 4235 if not wrapped and not optional: 4236 self.raise_error("Expecting (") 4237 parse_result = parse_method() 4238 if wrapped: 4239 self._match_r_paren() 4240 return parse_result 4241 4242 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4243 return self._parse_select() or self._parse_set_operations( 4244 self._parse_expression() if alias else self._parse_conjunction() 4245 ) 4246 4247 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4248 return self._parse_query_modifiers( 4249 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4250 ) 4251 4252 def _parse_transaction(self) -> exp.Transaction: 4253 this = None 4254 if self._match_texts(self.TRANSACTION_KIND): 4255 this = self._prev.text 4256 4257 self._match_texts({"TRANSACTION", "WORK"}) 4258 4259 modes = [] 4260 while True: 4261 mode = [] 4262 while self._match(TokenType.VAR): 4263 mode.append(self._prev.text) 4264 4265 if mode: 4266 modes.append(" ".join(mode)) 4267 if not self._match(TokenType.COMMA): 4268 break 4269 4270 return self.expression(exp.Transaction, this=this, modes=modes) 4271 4272 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4273 chain = None 4274 savepoint = None 4275 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4276 4277 self._match_texts({"TRANSACTION", "WORK"}) 4278 4279 if self._match_text_seq("TO"): 4280 self._match_text_seq("SAVEPOINT") 4281 savepoint = self._parse_id_var() 4282 4283 if self._match(TokenType.AND): 4284 chain = not self._match_text_seq("NO") 4285 self._match_text_seq("CHAIN") 4286 4287 if is_rollback: 4288 return self.expression(exp.Rollback, savepoint=savepoint) 4289 4290 return self.expression(exp.Commit, chain=chain) 4291 4292 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4293 if not self._match_text_seq("ADD"): 4294 return None 4295 4296 self._match(TokenType.COLUMN) 4297 exists_column = self._parse_exists(not_=True) 4298 expression = self._parse_column_def(self._parse_field(any_token=True)) 4299 4300 if expression: 4301 expression.set("exists", exists_column) 4302 4303 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4304 if self._match_texts(("FIRST", "AFTER")): 4305 position = self._prev.text 4306 column_position = self.expression( 4307 exp.ColumnPosition, this=self._parse_column(), position=position 4308 ) 4309 expression.set("position", column_position) 4310 4311 return expression 4312 4313 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4314 drop = self._match(TokenType.DROP) and self._parse_drop() 4315 if drop and not isinstance(drop, exp.Command): 4316 drop.set("kind", drop.args.get("kind", "COLUMN")) 4317 return drop 4318 4319 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4320 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4321 return self.expression( 4322 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4323 ) 4324 4325 def _parse_add_constraint(self) -> exp.AddConstraint: 4326 this = None 4327 kind = self._prev.token_type 4328 4329 if kind == TokenType.CONSTRAINT: 4330 this = self._parse_id_var() 4331 4332 if self._match_text_seq("CHECK"): 4333 expression = self._parse_wrapped(self._parse_conjunction) 4334 enforced = self._match_text_seq("ENFORCED") 4335 4336 return self.expression( 4337 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4338 ) 4339 4340 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4341 expression = self._parse_foreign_key() 4342 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4343 expression = self._parse_primary_key() 4344 else: 4345 expression = None 4346 4347 return self.expression(exp.AddConstraint, this=this, expression=expression) 4348 4349 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4350 index = self._index - 1 4351 4352 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4353 return self._parse_csv(self._parse_add_constraint) 4354 4355 self._retreat(index) 4356 return self._parse_csv(self._parse_add_column) 4357 4358 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4359 self._match(TokenType.COLUMN) 4360 column = self._parse_field(any_token=True) 4361 4362 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4363 return self.expression(exp.AlterColumn, this=column, drop=True) 4364 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4365 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4366 4367 self._match_text_seq("SET", "DATA") 4368 return self.expression( 4369 exp.AlterColumn, 4370 this=column, 4371 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4372 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4373 using=self._match(TokenType.USING) and self._parse_conjunction(), 4374 ) 4375 4376 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4377 index = self._index - 1 4378 4379 partition_exists = self._parse_exists() 4380 if self._match(TokenType.PARTITION, advance=False): 4381 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4382 4383 self._retreat(index) 4384 return self._parse_csv(self._parse_drop_column) 4385 4386 def _parse_alter_table_rename(self) -> exp.RenameTable: 4387 self._match_text_seq("TO") 4388 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4389 4390 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4391 start = self._prev 4392 4393 if not self._match(TokenType.TABLE): 4394 return self._parse_as_command(start) 4395 4396 exists = self._parse_exists() 4397 this = self._parse_table(schema=True) 4398 4399 if self._next: 4400 self._advance() 4401 4402 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4403 if parser: 4404 actions = ensure_list(parser(self)) 4405 4406 if not self._curr: 4407 return self.expression( 4408 exp.AlterTable, 4409 this=this, 4410 exists=exists, 4411 actions=actions, 4412 ) 4413 return self._parse_as_command(start) 4414 4415 def _parse_merge(self) -> exp.Merge: 4416 self._match(TokenType.INTO) 4417 target = self._parse_table() 4418 4419 self._match(TokenType.USING) 4420 using = self._parse_table() 4421 4422 self._match(TokenType.ON) 4423 on = self._parse_conjunction() 4424 4425 whens = [] 4426 while self._match(TokenType.WHEN): 4427 matched = not self._match(TokenType.NOT) 4428 self._match_text_seq("MATCHED") 4429 source = ( 4430 False 4431 if self._match_text_seq("BY", "TARGET") 4432 else self._match_text_seq("BY", "SOURCE") 4433 ) 4434 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4435 4436 self._match(TokenType.THEN) 4437 4438 if self._match(TokenType.INSERT): 4439 _this = self._parse_star() 4440 if _this: 4441 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4442 else: 4443 then = self.expression( 4444 exp.Insert, 4445 this=self._parse_value(), 4446 expression=self._match(TokenType.VALUES) and self._parse_value(), 4447 ) 4448 elif self._match(TokenType.UPDATE): 4449 expressions = self._parse_star() 4450 if expressions: 4451 then = self.expression(exp.Update, expressions=expressions) 4452 else: 4453 then = self.expression( 4454 exp.Update, 4455 expressions=self._match(TokenType.SET) 4456 and self._parse_csv(self._parse_equality), 4457 ) 4458 elif self._match(TokenType.DELETE): 4459 then = self.expression(exp.Var, this=self._prev.text) 4460 else: 4461 then = None 4462 4463 whens.append( 4464 self.expression( 4465 exp.When, 4466 matched=matched, 4467 source=source, 4468 condition=condition, 4469 then=then, 4470 ) 4471 ) 4472 4473 return self.expression( 4474 exp.Merge, 4475 this=target, 4476 using=using, 4477 on=on, 4478 expressions=whens, 4479 ) 4480 4481 def _parse_show(self) -> t.Optional[exp.Expression]: 4482 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4483 if parser: 4484 return parser(self) 4485 self._advance() 4486 return self.expression(exp.Show, this=self._prev.text.upper()) 4487 4488 def _parse_set_item_assignment( 4489 self, kind: t.Optional[str] = None 4490 ) -> t.Optional[exp.Expression]: 4491 index = self._index 4492 4493 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4494 return self._parse_set_transaction(global_=kind == "GLOBAL") 4495 4496 left = self._parse_primary() or self._parse_id_var() 4497 4498 if not self._match_texts(("=", "TO")): 4499 self._retreat(index) 4500 return None 4501 4502 right = self._parse_statement() or self._parse_id_var() 4503 this = self.expression(exp.EQ, this=left, expression=right) 4504 4505 return self.expression(exp.SetItem, this=this, kind=kind) 4506 4507 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4508 self._match_text_seq("TRANSACTION") 4509 characteristics = self._parse_csv( 4510 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4511 ) 4512 return self.expression( 4513 exp.SetItem, 4514 expressions=characteristics, 4515 kind="TRANSACTION", 4516 **{"global": global_}, # type: ignore 4517 ) 4518 4519 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4520 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4521 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4522 4523 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4524 index = self._index 4525 set_ = self.expression( 4526 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4527 ) 4528 4529 if self._curr: 4530 self._retreat(index) 4531 return self._parse_as_command(self._prev) 4532 4533 return set_ 4534 4535 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4536 for option in options: 4537 if self._match_text_seq(*option.split(" ")): 4538 return exp.var(option) 4539 return None 4540 4541 def _parse_as_command(self, start: Token) -> exp.Command: 4542 while self._curr: 4543 self._advance() 4544 text = self._find_sql(start, self._prev) 4545 size = len(start.text) 4546 return exp.Command(this=text[:size], expression=text[size:]) 4547 4548 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4549 settings = [] 4550 4551 self._match_l_paren() 4552 kind = self._parse_id_var() 4553 4554 if self._match(TokenType.L_PAREN): 4555 while True: 4556 key = self._parse_id_var() 4557 value = self._parse_primary() 4558 4559 if not key and value is None: 4560 break 4561 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4562 self._match(TokenType.R_PAREN) 4563 4564 self._match_r_paren() 4565 4566 return self.expression( 4567 exp.DictProperty, 4568 this=this, 4569 kind=kind.this if kind else None, 4570 settings=settings, 4571 ) 4572 4573 def _parse_dict_range(self, this: str) -> exp.DictRange: 4574 self._match_l_paren() 4575 has_min = self._match_text_seq("MIN") 4576 if has_min: 4577 min = self._parse_var() or self._parse_primary() 4578 self._match_text_seq("MAX") 4579 max = self._parse_var() or self._parse_primary() 4580 else: 4581 max = self._parse_var() or self._parse_primary() 4582 min = exp.Literal.number(0) 4583 self._match_r_paren() 4584 return self.expression(exp.DictRange, this=this, min=min, max=max) 4585 4586 def _find_parser( 4587 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4588 ) -> t.Optional[t.Callable]: 4589 if not self._curr: 4590 return None 4591 4592 index = self._index 4593 this = [] 4594 while True: 4595 # The current token might be multiple words 4596 curr = self._curr.text.upper() 4597 key = curr.split(" ") 4598 this.append(curr) 4599 4600 self._advance() 4601 result, trie = in_trie(trie, key) 4602 if result == TrieResult.FAILED: 4603 break 4604 4605 if result == TrieResult.EXISTS: 4606 subparser = parsers[" ".join(this)] 4607 return subparser 4608 4609 self._retreat(index) 4610 return None 4611 4612 def _match(self, token_type, advance=True, expression=None): 4613 if not self._curr: 4614 return None 4615 4616 if self._curr.token_type == token_type: 4617 if advance: 4618 self._advance() 4619 self._add_comments(expression) 4620 return True 4621 4622 return None 4623 4624 def _match_set(self, types, advance=True): 4625 if not self._curr: 4626 return None 4627 4628 if self._curr.token_type in types: 4629 if advance: 4630 self._advance() 4631 return True 4632 4633 return None 4634 4635 def _match_pair(self, token_type_a, token_type_b, advance=True): 4636 if not self._curr or not self._next: 4637 return None 4638 4639 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4640 if advance: 4641 self._advance(2) 4642 return True 4643 4644 return None 4645 4646 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4647 if not self._match(TokenType.L_PAREN, expression=expression): 4648 self.raise_error("Expecting (") 4649 4650 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4651 if not self._match(TokenType.R_PAREN, expression=expression): 4652 self.raise_error("Expecting )") 4653 4654 def _match_texts(self, texts, advance=True): 4655 if self._curr and self._curr.text.upper() in texts: 4656 if advance: 4657 self._advance() 4658 return True 4659 return False 4660 4661 def _match_text_seq(self, *texts, advance=True): 4662 index = self._index 4663 for text in texts: 4664 if self._curr and self._curr.text.upper() == text: 4665 self._advance() 4666 else: 4667 self._retreat(index) 4668 return False 4669 4670 if not advance: 4671 self._retreat(index) 4672 4673 return True 4674 4675 @t.overload 4676 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4677 ... 4678 4679 @t.overload 4680 def _replace_columns_with_dots( 4681 self, this: t.Optional[exp.Expression] 4682 ) -> t.Optional[exp.Expression]: 4683 ... 4684 4685 def _replace_columns_with_dots(self, this): 4686 if isinstance(this, exp.Dot): 4687 exp.replace_children(this, self._replace_columns_with_dots) 4688 elif isinstance(this, exp.Column): 4689 exp.replace_children(this, self._replace_columns_with_dots) 4690 table = this.args.get("table") 4691 this = ( 4692 self.expression(exp.Dot, this=table, expression=this.this) 4693 if table 4694 else self.expression(exp.Var, this=this.name) 4695 ) 4696 elif isinstance(this, exp.Identifier): 4697 this = self.expression(exp.Var, this=this.name) 4698 4699 return this 4700 4701 def _replace_lambda( 4702 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4703 ) -> t.Optional[exp.Expression]: 4704 if not node: 4705 return node 4706 4707 for column in node.find_all(exp.Column): 4708 if column.parts[0].name in lambda_variables: 4709 dot_or_id = column.to_dot() if column.table else column.this 4710 parent = column.parent 4711 4712 while isinstance(parent, exp.Dot): 4713 if not isinstance(parent.parent, exp.Dot): 4714 parent.replace(dot_or_id) 4715 break 4716 parent = parent.parent 4717 else: 4718 if column is node: 4719 node = dot_or_id 4720 else: 4721 column.replace(dot_or_id) 4722 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 721 "CONCAT": lambda self: self._parse_concat(), 722 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 723 "DECODE": lambda self: self._parse_decode(), 724 "EXTRACT": lambda self: self._parse_extract(), 725 "JSON_OBJECT": lambda self: self._parse_json_object(), 726 "LOG": lambda self: self._parse_logarithm(), 727 "MATCH": lambda self: self._parse_match_against(), 728 "OPENJSON": lambda self: self._parse_open_json(), 729 "POSITION": lambda self: self._parse_position(), 730 "SAFE_CAST": lambda self: self._parse_cast(False), 731 "STRING_AGG": lambda self: self._parse_string_agg(), 732 "SUBSTRING": lambda self: self._parse_substring(), 733 "TRIM": lambda self: self._parse_trim(), 734 "TRY_CAST": lambda self: self._parse_cast(False), 735 "TRY_CONVERT": lambda self: self._parse_convert(False), 736 } 737 738 QUERY_MODIFIER_PARSERS = { 739 "joins": lambda self: list(iter(self._parse_join, None)), 740 "laterals": lambda self: list(iter(self._parse_lateral, None)), 741 "match": lambda self: self._parse_match_recognize(), 742 "where": lambda self: self._parse_where(), 743 "group": lambda self: self._parse_group(), 744 "having": lambda self: self._parse_having(), 745 "qualify": lambda self: self._parse_qualify(), 746 "windows": lambda self: self._parse_window_clause(), 747 "order": lambda self: self._parse_order(), 748 "limit": lambda self: self._parse_limit(), 749 "offset": lambda self: self._parse_offset(), 750 "locks": lambda self: self._parse_locks(), 751 "sample": lambda self: self._parse_table_sample(as_modifier=True), 752 } 753 754 SET_PARSERS = { 755 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 756 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 757 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 758 "TRANSACTION": lambda self: self._parse_set_transaction(), 759 } 760 761 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 762 763 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 764 765 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 766 767 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 768 769 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 770 771 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 772 TRANSACTION_CHARACTERISTICS = { 773 "ISOLATION LEVEL REPEATABLE READ", 774 "ISOLATION LEVEL READ COMMITTED", 775 "ISOLATION LEVEL READ UNCOMMITTED", 776 "ISOLATION LEVEL SERIALIZABLE", 777 "READ WRITE", 778 "READ ONLY", 779 } 780 781 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 782 783 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 784 785 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 786 787 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 788 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 789 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 790 791 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 792 793 STRICT_CAST = True 794 795 # A NULL arg in CONCAT yields NULL by default 796 CONCAT_NULL_OUTPUTS_STRING = False 797 798 CONVERT_TYPE_FIRST = False 799 800 PREFIXED_PIVOT_COLUMNS = False 801 IDENTIFY_PIVOT_STRINGS = False 802 803 LOG_BASE_FIRST = True 804 LOG_DEFAULTS_TO_LN = False 805 806 __slots__ = ( 807 "error_level", 808 "error_message_context", 809 "max_errors", 810 "sql", 811 "errors", 812 "_tokens", 813 "_index", 814 "_curr", 815 "_next", 816 "_prev", 817 "_prev_comments", 818 ) 819 820 # Autofilled 821 INDEX_OFFSET: int = 0 822 UNNEST_COLUMN_ONLY: bool = False 823 ALIAS_POST_TABLESAMPLE: bool = False 824 STRICT_STRING_CONCAT = False 825 NULL_ORDERING: str = "nulls_are_small" 826 SHOW_TRIE: t.Dict = {} 827 SET_TRIE: t.Dict = {} 828 FORMAT_MAPPING: t.Dict[str, str] = {} 829 FORMAT_TRIE: t.Dict = {} 830 TIME_MAPPING: t.Dict[str, str] = {} 831 TIME_TRIE: t.Dict = {} 832 833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset() 843 844 def reset(self): 845 self.sql = "" 846 self.errors = [] 847 self._tokens = [] 848 self._index = 0 849 self._curr = None 850 self._next = None 851 self._prev = None 852 self._prev_comments = None 853 854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 ) 871 872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1] 907 908 def _parse( 909 self, 910 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 911 raw_tokens: t.List[Token], 912 sql: t.Optional[str] = None, 913 ) -> t.List[t.Optional[exp.Expression]]: 914 self.reset() 915 self.sql = sql or "" 916 917 total = len(raw_tokens) 918 chunks: t.List[t.List[Token]] = [[]] 919 920 for i, token in enumerate(raw_tokens): 921 if token.token_type == TokenType.SEMICOLON: 922 if i < total - 1: 923 chunks.append([]) 924 else: 925 chunks[-1].append(token) 926 927 expressions = [] 928 929 for tokens in chunks: 930 self._index = -1 931 self._tokens = tokens 932 self._advance() 933 934 expressions.append(parse_method(self)) 935 936 if self._index < len(self._tokens): 937 self.raise_error("Invalid expression / Unexpected token") 938 939 self.check_errors() 940 941 return expressions 942 943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 ) 953 954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error) 981 982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance) 999 1000 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1001 if expression and self._prev_comments: 1002 expression.add_comments(self._prev_comments) 1003 self._prev_comments = None 1004 1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression 1021 1022 def _find_sql(self, start: Token, end: Token) -> str: 1023 return self.sql[start.start : end.end + 1] 1024 1025 def _advance(self, times: int = 1) -> None: 1026 self._index += times 1027 self._curr = seq_get(self._tokens, self._index) 1028 self._next = seq_get(self._tokens, self._index + 1) 1029 1030 if self._index > 0: 1031 self._prev = self._tokens[self._index - 1] 1032 self._prev_comments = self._prev.comments 1033 else: 1034 self._prev = None 1035 self._prev_comments = None 1036 1037 def _retreat(self, index: int) -> None: 1038 if index != self._index: 1039 self._advance(index - self._index) 1040 1041 def _parse_command(self) -> exp.Command: 1042 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1043 1044 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1045 start = self._prev 1046 exists = self._parse_exists() if allow_exists else None 1047 1048 self._match(TokenType.ON) 1049 1050 kind = self._match_set(self.CREATABLES) and self._prev 1051 if not kind: 1052 return self._parse_as_command(start) 1053 1054 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1055 this = self._parse_user_defined_function(kind=kind.token_type) 1056 elif kind.token_type == TokenType.TABLE: 1057 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1058 elif kind.token_type == TokenType.COLUMN: 1059 this = self._parse_column() 1060 else: 1061 this = self._parse_id_var() 1062 1063 self._match(TokenType.IS) 1064 1065 return self.expression( 1066 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1067 ) 1068 1069 def _parse_to_table( 1070 self, 1071 ) -> exp.ToTableProperty: 1072 table = self._parse_table_parts(schema=True) 1073 return self.expression(exp.ToTableProperty, this=table) 1074 1075 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1076 def _parse_ttl(self) -> exp.Expression: 1077 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1078 this = self._parse_bitwise() 1079 1080 if self._match_text_seq("DELETE"): 1081 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1082 if self._match_text_seq("RECOMPRESS"): 1083 return self.expression( 1084 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1085 ) 1086 if self._match_text_seq("TO", "DISK"): 1087 return self.expression( 1088 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1089 ) 1090 if self._match_text_seq("TO", "VOLUME"): 1091 return self.expression( 1092 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1093 ) 1094 1095 return this 1096 1097 expressions = self._parse_csv(_parse_ttl_action) 1098 where = self._parse_where() 1099 group = self._parse_group() 1100 1101 aggregates = None 1102 if group and self._match(TokenType.SET): 1103 aggregates = self._parse_csv(self._parse_set_item) 1104 1105 return self.expression( 1106 exp.MergeTreeTTL, 1107 expressions=expressions, 1108 where=where, 1109 group=group, 1110 aggregates=aggregates, 1111 ) 1112 1113 def _parse_statement(self) -> t.Optional[exp.Expression]: 1114 if self._curr is None: 1115 return None 1116 1117 if self._match_set(self.STATEMENT_PARSERS): 1118 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1119 1120 if self._match_set(Tokenizer.COMMANDS): 1121 return self._parse_command() 1122 1123 expression = self._parse_expression() 1124 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1125 return self._parse_query_modifiers(expression) 1126 1127 def _parse_drop(self) -> exp.Drop | exp.Command: 1128 start = self._prev 1129 temporary = self._match(TokenType.TEMPORARY) 1130 materialized = self._match_text_seq("MATERIALIZED") 1131 1132 kind = self._match_set(self.CREATABLES) and self._prev.text 1133 if not kind: 1134 return self._parse_as_command(start) 1135 1136 return self.expression( 1137 exp.Drop, 1138 exists=self._parse_exists(), 1139 this=self._parse_table(schema=True), 1140 kind=kind, 1141 temporary=temporary, 1142 materialized=materialized, 1143 cascade=self._match_text_seq("CASCADE"), 1144 constraints=self._match_text_seq("CONSTRAINTS"), 1145 purge=self._match_text_seq("PURGE"), 1146 ) 1147 1148 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1149 return ( 1150 self._match(TokenType.IF) 1151 and (not not_ or self._match(TokenType.NOT)) 1152 and self._match(TokenType.EXISTS) 1153 ) 1154 1155 def _parse_create(self) -> exp.Create | exp.Command: 1156 # Note: this can't be None because we've matched a statement parser 1157 start = self._prev 1158 replace = start.text.upper() == "REPLACE" or self._match_pair( 1159 TokenType.OR, TokenType.REPLACE 1160 ) 1161 unique = self._match(TokenType.UNIQUE) 1162 1163 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1164 self._advance() 1165 1166 properties = None 1167 create_token = self._match_set(self.CREATABLES) and self._prev 1168 1169 if not create_token: 1170 # exp.Properties.Location.POST_CREATE 1171 properties = self._parse_properties() 1172 create_token = self._match_set(self.CREATABLES) and self._prev 1173 1174 if not properties or not create_token: 1175 return self._parse_as_command(start) 1176 1177 exists = self._parse_exists(not_=True) 1178 this = None 1179 expression = None 1180 indexes = None 1181 no_schema_binding = None 1182 begin = None 1183 clone = None 1184 1185 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1186 nonlocal properties 1187 if properties and temp_props: 1188 properties.expressions.extend(temp_props.expressions) 1189 elif temp_props: 1190 properties = temp_props 1191 1192 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1193 this = self._parse_user_defined_function(kind=create_token.token_type) 1194 1195 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1196 extend_props(self._parse_properties()) 1197 1198 self._match(TokenType.ALIAS) 1199 begin = self._match(TokenType.BEGIN) 1200 return_ = self._match_text_seq("RETURN") 1201 expression = self._parse_statement() 1202 1203 if return_: 1204 expression = self.expression(exp.Return, this=expression) 1205 elif create_token.token_type == TokenType.INDEX: 1206 this = self._parse_index(index=self._parse_id_var()) 1207 elif create_token.token_type in self.DB_CREATABLES: 1208 table_parts = self._parse_table_parts(schema=True) 1209 1210 # exp.Properties.Location.POST_NAME 1211 self._match(TokenType.COMMA) 1212 extend_props(self._parse_properties(before=True)) 1213 1214 this = self._parse_schema(this=table_parts) 1215 1216 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1217 extend_props(self._parse_properties()) 1218 1219 self._match(TokenType.ALIAS) 1220 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1221 # exp.Properties.Location.POST_ALIAS 1222 extend_props(self._parse_properties()) 1223 1224 expression = self._parse_ddl_select() 1225 1226 if create_token.token_type == TokenType.TABLE: 1227 indexes = [] 1228 while True: 1229 index = self._parse_index() 1230 1231 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1232 extend_props(self._parse_properties()) 1233 1234 if not index: 1235 break 1236 else: 1237 self._match(TokenType.COMMA) 1238 indexes.append(index) 1239 elif create_token.token_type == TokenType.VIEW: 1240 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1241 no_schema_binding = True 1242 1243 if self._match_text_seq("CLONE"): 1244 clone = self._parse_table(schema=True) 1245 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1246 clone_kind = ( 1247 self._match(TokenType.L_PAREN) 1248 and self._match_texts(self.CLONE_KINDS) 1249 and self._prev.text.upper() 1250 ) 1251 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1252 self._match(TokenType.R_PAREN) 1253 clone = self.expression( 1254 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1255 ) 1256 1257 return self.expression( 1258 exp.Create, 1259 this=this, 1260 kind=create_token.text, 1261 replace=replace, 1262 unique=unique, 1263 expression=expression, 1264 exists=exists, 1265 properties=properties, 1266 indexes=indexes, 1267 no_schema_binding=no_schema_binding, 1268 begin=begin, 1269 clone=clone, 1270 ) 1271 1272 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1273 # only used for teradata currently 1274 self._match(TokenType.COMMA) 1275 1276 kwargs = { 1277 "no": self._match_text_seq("NO"), 1278 "dual": self._match_text_seq("DUAL"), 1279 "before": self._match_text_seq("BEFORE"), 1280 "default": self._match_text_seq("DEFAULT"), 1281 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1282 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1283 "after": self._match_text_seq("AFTER"), 1284 "minimum": self._match_texts(("MIN", "MINIMUM")), 1285 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1286 } 1287 1288 if self._match_texts(self.PROPERTY_PARSERS): 1289 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1290 try: 1291 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1292 except TypeError: 1293 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1294 1295 return None 1296 1297 def _parse_property(self) -> t.Optional[exp.Expression]: 1298 if self._match_texts(self.PROPERTY_PARSERS): 1299 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1300 1301 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1302 return self._parse_character_set(default=True) 1303 1304 if self._match_text_seq("COMPOUND", "SORTKEY"): 1305 return self._parse_sortkey(compound=True) 1306 1307 if self._match_text_seq("SQL", "SECURITY"): 1308 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1309 1310 assignment = self._match_pair( 1311 TokenType.VAR, TokenType.EQ, advance=False 1312 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1313 1314 if assignment: 1315 key = self._parse_var_or_string() 1316 self._match(TokenType.EQ) 1317 return self.expression(exp.Property, this=key, value=self._parse_column()) 1318 1319 return None 1320 1321 def _parse_stored(self) -> exp.FileFormatProperty: 1322 self._match(TokenType.ALIAS) 1323 1324 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1325 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1326 1327 return self.expression( 1328 exp.FileFormatProperty, 1329 this=self.expression( 1330 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1331 ) 1332 if input_format or output_format 1333 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1334 ) 1335 1336 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1337 self._match(TokenType.EQ) 1338 self._match(TokenType.ALIAS) 1339 return self.expression(exp_class, this=self._parse_field()) 1340 1341 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1342 properties = [] 1343 while True: 1344 if before: 1345 prop = self._parse_property_before() 1346 else: 1347 prop = self._parse_property() 1348 1349 if not prop: 1350 break 1351 for p in ensure_list(prop): 1352 properties.append(p) 1353 1354 if properties: 1355 return self.expression(exp.Properties, expressions=properties) 1356 1357 return None 1358 1359 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1360 return self.expression( 1361 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1362 ) 1363 1364 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1365 if self._index >= 2: 1366 pre_volatile_token = self._tokens[self._index - 2] 1367 else: 1368 pre_volatile_token = None 1369 1370 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1371 return exp.VolatileProperty() 1372 1373 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1374 1375 def _parse_with_property( 1376 self, 1377 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1378 self._match(TokenType.WITH) 1379 if self._match(TokenType.L_PAREN, advance=False): 1380 return self._parse_wrapped_csv(self._parse_property) 1381 1382 if self._match_text_seq("JOURNAL"): 1383 return self._parse_withjournaltable() 1384 1385 if self._match_text_seq("DATA"): 1386 return self._parse_withdata(no=False) 1387 elif self._match_text_seq("NO", "DATA"): 1388 return self._parse_withdata(no=True) 1389 1390 if not self._next: 1391 return None 1392 1393 return self._parse_withisolatedloading() 1394 1395 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1396 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1397 self._match(TokenType.EQ) 1398 1399 user = self._parse_id_var() 1400 self._match(TokenType.PARAMETER) 1401 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1402 1403 if not user or not host: 1404 return None 1405 1406 return exp.DefinerProperty(this=f"{user}@{host}") 1407 1408 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1409 self._match(TokenType.TABLE) 1410 self._match(TokenType.EQ) 1411 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1412 1413 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1414 return self.expression(exp.LogProperty, no=no) 1415 1416 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1417 return self.expression(exp.JournalProperty, **kwargs) 1418 1419 def _parse_checksum(self) -> exp.ChecksumProperty: 1420 self._match(TokenType.EQ) 1421 1422 on = None 1423 if self._match(TokenType.ON): 1424 on = True 1425 elif self._match_text_seq("OFF"): 1426 on = False 1427 1428 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1429 1430 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1431 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1432 1433 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1434 if not self._match_text_seq("GRANTS"): 1435 self._retreat(self._index - 1) 1436 return None 1437 1438 return self.expression(exp.CopyGrantsProperty) 1439 1440 def _parse_freespace(self) -> exp.FreespaceProperty: 1441 self._match(TokenType.EQ) 1442 return self.expression( 1443 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1444 ) 1445 1446 def _parse_mergeblockratio( 1447 self, no: bool = False, default: bool = False 1448 ) -> exp.MergeBlockRatioProperty: 1449 if self._match(TokenType.EQ): 1450 return self.expression( 1451 exp.MergeBlockRatioProperty, 1452 this=self._parse_number(), 1453 percent=self._match(TokenType.PERCENT), 1454 ) 1455 1456 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1457 1458 def _parse_datablocksize( 1459 self, 1460 default: t.Optional[bool] = None, 1461 minimum: t.Optional[bool] = None, 1462 maximum: t.Optional[bool] = None, 1463 ) -> exp.DataBlocksizeProperty: 1464 self._match(TokenType.EQ) 1465 size = self._parse_number() 1466 1467 units = None 1468 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1469 units = self._prev.text 1470 1471 return self.expression( 1472 exp.DataBlocksizeProperty, 1473 size=size, 1474 units=units, 1475 default=default, 1476 minimum=minimum, 1477 maximum=maximum, 1478 ) 1479 1480 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1481 self._match(TokenType.EQ) 1482 always = self._match_text_seq("ALWAYS") 1483 manual = self._match_text_seq("MANUAL") 1484 never = self._match_text_seq("NEVER") 1485 default = self._match_text_seq("DEFAULT") 1486 1487 autotemp = None 1488 if self._match_text_seq("AUTOTEMP"): 1489 autotemp = self._parse_schema() 1490 1491 return self.expression( 1492 exp.BlockCompressionProperty, 1493 always=always, 1494 manual=manual, 1495 never=never, 1496 default=default, 1497 autotemp=autotemp, 1498 ) 1499 1500 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1501 no = self._match_text_seq("NO") 1502 concurrent = self._match_text_seq("CONCURRENT") 1503 self._match_text_seq("ISOLATED", "LOADING") 1504 for_all = self._match_text_seq("FOR", "ALL") 1505 for_insert = self._match_text_seq("FOR", "INSERT") 1506 for_none = self._match_text_seq("FOR", "NONE") 1507 return self.expression( 1508 exp.IsolatedLoadingProperty, 1509 no=no, 1510 concurrent=concurrent, 1511 for_all=for_all, 1512 for_insert=for_insert, 1513 for_none=for_none, 1514 ) 1515 1516 def _parse_locking(self) -> exp.LockingProperty: 1517 if self._match(TokenType.TABLE): 1518 kind = "TABLE" 1519 elif self._match(TokenType.VIEW): 1520 kind = "VIEW" 1521 elif self._match(TokenType.ROW): 1522 kind = "ROW" 1523 elif self._match_text_seq("DATABASE"): 1524 kind = "DATABASE" 1525 else: 1526 kind = None 1527 1528 if kind in ("DATABASE", "TABLE", "VIEW"): 1529 this = self._parse_table_parts() 1530 else: 1531 this = None 1532 1533 if self._match(TokenType.FOR): 1534 for_or_in = "FOR" 1535 elif self._match(TokenType.IN): 1536 for_or_in = "IN" 1537 else: 1538 for_or_in = None 1539 1540 if self._match_text_seq("ACCESS"): 1541 lock_type = "ACCESS" 1542 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1543 lock_type = "EXCLUSIVE" 1544 elif self._match_text_seq("SHARE"): 1545 lock_type = "SHARE" 1546 elif self._match_text_seq("READ"): 1547 lock_type = "READ" 1548 elif self._match_text_seq("WRITE"): 1549 lock_type = "WRITE" 1550 elif self._match_text_seq("CHECKSUM"): 1551 lock_type = "CHECKSUM" 1552 else: 1553 lock_type = None 1554 1555 override = self._match_text_seq("OVERRIDE") 1556 1557 return self.expression( 1558 exp.LockingProperty, 1559 this=this, 1560 kind=kind, 1561 for_or_in=for_or_in, 1562 lock_type=lock_type, 1563 override=override, 1564 ) 1565 1566 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1567 if self._match(TokenType.PARTITION_BY): 1568 return self._parse_csv(self._parse_conjunction) 1569 return [] 1570 1571 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1572 self._match(TokenType.EQ) 1573 return self.expression( 1574 exp.PartitionedByProperty, 1575 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1576 ) 1577 1578 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1579 if self._match_text_seq("AND", "STATISTICS"): 1580 statistics = True 1581 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1582 statistics = False 1583 else: 1584 statistics = None 1585 1586 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1587 1588 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1589 if self._match_text_seq("PRIMARY", "INDEX"): 1590 return exp.NoPrimaryIndexProperty() 1591 return None 1592 1593 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1594 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1595 return exp.OnCommitProperty() 1596 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1597 return exp.OnCommitProperty(delete=True) 1598 return None 1599 1600 def _parse_distkey(self) -> exp.DistKeyProperty: 1601 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1602 1603 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1604 table = self._parse_table(schema=True) 1605 1606 options = [] 1607 while self._match_texts(("INCLUDING", "EXCLUDING")): 1608 this = self._prev.text.upper() 1609 1610 id_var = self._parse_id_var() 1611 if not id_var: 1612 return None 1613 1614 options.append( 1615 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1616 ) 1617 1618 return self.expression(exp.LikeProperty, this=table, expressions=options) 1619 1620 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1621 return self.expression( 1622 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1623 ) 1624 1625 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1626 self._match(TokenType.EQ) 1627 return self.expression( 1628 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1629 ) 1630 1631 def _parse_returns(self) -> exp.ReturnsProperty: 1632 value: t.Optional[exp.Expression] 1633 is_table = self._match(TokenType.TABLE) 1634 1635 if is_table: 1636 if self._match(TokenType.LT): 1637 value = self.expression( 1638 exp.Schema, 1639 this="TABLE", 1640 expressions=self._parse_csv(self._parse_struct_types), 1641 ) 1642 if not self._match(TokenType.GT): 1643 self.raise_error("Expecting >") 1644 else: 1645 value = self._parse_schema(exp.var("TABLE")) 1646 else: 1647 value = self._parse_types() 1648 1649 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1650 1651 def _parse_describe(self) -> exp.Describe: 1652 kind = self._match_set(self.CREATABLES) and self._prev.text 1653 this = self._parse_table() 1654 return self.expression(exp.Describe, this=this, kind=kind) 1655 1656 def _parse_insert(self) -> exp.Insert: 1657 overwrite = self._match(TokenType.OVERWRITE) 1658 local = self._match_text_seq("LOCAL") 1659 alternative = None 1660 1661 if self._match_text_seq("DIRECTORY"): 1662 this: t.Optional[exp.Expression] = self.expression( 1663 exp.Directory, 1664 this=self._parse_var_or_string(), 1665 local=local, 1666 row_format=self._parse_row_format(match_row=True), 1667 ) 1668 else: 1669 if self._match(TokenType.OR): 1670 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1671 1672 self._match(TokenType.INTO) 1673 self._match(TokenType.TABLE) 1674 this = self._parse_table(schema=True) 1675 1676 return self.expression( 1677 exp.Insert, 1678 this=this, 1679 exists=self._parse_exists(), 1680 partition=self._parse_partition(), 1681 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1682 and self._parse_conjunction(), 1683 expression=self._parse_ddl_select(), 1684 conflict=self._parse_on_conflict(), 1685 returning=self._parse_returning(), 1686 overwrite=overwrite, 1687 alternative=alternative, 1688 ) 1689 1690 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1691 conflict = self._match_text_seq("ON", "CONFLICT") 1692 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1693 1694 if not conflict and not duplicate: 1695 return None 1696 1697 nothing = None 1698 expressions = None 1699 key = None 1700 constraint = None 1701 1702 if conflict: 1703 if self._match_text_seq("ON", "CONSTRAINT"): 1704 constraint = self._parse_id_var() 1705 else: 1706 key = self._parse_csv(self._parse_value) 1707 1708 self._match_text_seq("DO") 1709 if self._match_text_seq("NOTHING"): 1710 nothing = True 1711 else: 1712 self._match(TokenType.UPDATE) 1713 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1714 1715 return self.expression( 1716 exp.OnConflict, 1717 duplicate=duplicate, 1718 expressions=expressions, 1719 nothing=nothing, 1720 key=key, 1721 constraint=constraint, 1722 ) 1723 1724 def _parse_returning(self) -> t.Optional[exp.Returning]: 1725 if not self._match(TokenType.RETURNING): 1726 return None 1727 1728 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1729 1730 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1731 if not self._match(TokenType.FORMAT): 1732 return None 1733 return self._parse_row_format() 1734 1735 def _parse_row_format( 1736 self, match_row: bool = False 1737 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1738 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1739 return None 1740 1741 if self._match_text_seq("SERDE"): 1742 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1743 1744 self._match_text_seq("DELIMITED") 1745 1746 kwargs = {} 1747 1748 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1749 kwargs["fields"] = self._parse_string() 1750 if self._match_text_seq("ESCAPED", "BY"): 1751 kwargs["escaped"] = self._parse_string() 1752 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1753 kwargs["collection_items"] = self._parse_string() 1754 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1755 kwargs["map_keys"] = self._parse_string() 1756 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1757 kwargs["lines"] = self._parse_string() 1758 if self._match_text_seq("NULL", "DEFINED", "AS"): 1759 kwargs["null"] = self._parse_string() 1760 1761 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1762 1763 def _parse_load(self) -> exp.LoadData | exp.Command: 1764 if self._match_text_seq("DATA"): 1765 local = self._match_text_seq("LOCAL") 1766 self._match_text_seq("INPATH") 1767 inpath = self._parse_string() 1768 overwrite = self._match(TokenType.OVERWRITE) 1769 self._match_pair(TokenType.INTO, TokenType.TABLE) 1770 1771 return self.expression( 1772 exp.LoadData, 1773 this=self._parse_table(schema=True), 1774 local=local, 1775 overwrite=overwrite, 1776 inpath=inpath, 1777 partition=self._parse_partition(), 1778 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1779 serde=self._match_text_seq("SERDE") and self._parse_string(), 1780 ) 1781 return self._parse_as_command(self._prev) 1782 1783 def _parse_delete(self) -> exp.Delete: 1784 self._match(TokenType.FROM) 1785 1786 return self.expression( 1787 exp.Delete, 1788 this=self._parse_table(), 1789 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1790 where=self._parse_where(), 1791 returning=self._parse_returning(), 1792 limit=self._parse_limit(), 1793 ) 1794 1795 def _parse_update(self) -> exp.Update: 1796 return self.expression( 1797 exp.Update, 1798 **{ # type: ignore 1799 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1800 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1801 "from": self._parse_from(modifiers=True), 1802 "where": self._parse_where(), 1803 "returning": self._parse_returning(), 1804 "limit": self._parse_limit(), 1805 }, 1806 ) 1807 1808 def _parse_uncache(self) -> exp.Uncache: 1809 if not self._match(TokenType.TABLE): 1810 self.raise_error("Expecting TABLE after UNCACHE") 1811 1812 return self.expression( 1813 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1814 ) 1815 1816 def _parse_cache(self) -> exp.Cache: 1817 lazy = self._match_text_seq("LAZY") 1818 self._match(TokenType.TABLE) 1819 table = self._parse_table(schema=True) 1820 1821 options = [] 1822 if self._match_text_seq("OPTIONS"): 1823 self._match_l_paren() 1824 k = self._parse_string() 1825 self._match(TokenType.EQ) 1826 v = self._parse_string() 1827 options = [k, v] 1828 self._match_r_paren() 1829 1830 self._match(TokenType.ALIAS) 1831 return self.expression( 1832 exp.Cache, 1833 this=table, 1834 lazy=lazy, 1835 options=options, 1836 expression=self._parse_select(nested=True), 1837 ) 1838 1839 def _parse_partition(self) -> t.Optional[exp.Partition]: 1840 if not self._match(TokenType.PARTITION): 1841 return None 1842 1843 return self.expression( 1844 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1845 ) 1846 1847 def _parse_value(self) -> exp.Tuple: 1848 if self._match(TokenType.L_PAREN): 1849 expressions = self._parse_csv(self._parse_conjunction) 1850 self._match_r_paren() 1851 return self.expression(exp.Tuple, expressions=expressions) 1852 1853 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1854 # Source: https://prestodb.io/docs/current/sql/values.html 1855 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1856 1857 def _parse_select( 1858 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1859 ) -> t.Optional[exp.Expression]: 1860 cte = self._parse_with() 1861 if cte: 1862 this = self._parse_statement() 1863 1864 if not this: 1865 self.raise_error("Failed to parse any statement following CTE") 1866 return cte 1867 1868 if "with" in this.arg_types: 1869 this.set("with", cte) 1870 else: 1871 self.raise_error(f"{this.key} does not support CTE") 1872 this = cte 1873 elif self._match(TokenType.SELECT): 1874 comments = self._prev_comments 1875 1876 hint = self._parse_hint() 1877 all_ = self._match(TokenType.ALL) 1878 distinct = self._match(TokenType.DISTINCT) 1879 1880 kind = ( 1881 self._match(TokenType.ALIAS) 1882 and self._match_texts(("STRUCT", "VALUE")) 1883 and self._prev.text 1884 ) 1885 1886 if distinct: 1887 distinct = self.expression( 1888 exp.Distinct, 1889 on=self._parse_value() if self._match(TokenType.ON) else None, 1890 ) 1891 1892 if all_ and distinct: 1893 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1894 1895 limit = self._parse_limit(top=True) 1896 expressions = self._parse_csv(self._parse_expression) 1897 1898 this = self.expression( 1899 exp.Select, 1900 kind=kind, 1901 hint=hint, 1902 distinct=distinct, 1903 expressions=expressions, 1904 limit=limit, 1905 ) 1906 this.comments = comments 1907 1908 into = self._parse_into() 1909 if into: 1910 this.set("into", into) 1911 1912 from_ = self._parse_from() 1913 if from_: 1914 this.set("from", from_) 1915 1916 this = self._parse_query_modifiers(this) 1917 elif (table or nested) and self._match(TokenType.L_PAREN): 1918 if self._match(TokenType.PIVOT): 1919 this = self._parse_simplified_pivot() 1920 elif self._match(TokenType.FROM): 1921 this = exp.select("*").from_( 1922 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1923 ) 1924 else: 1925 this = self._parse_table() if table else self._parse_select(nested=True) 1926 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1927 1928 self._match_r_paren() 1929 1930 # early return so that subquery unions aren't parsed again 1931 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1932 # Union ALL should be a property of the top select node, not the subquery 1933 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1934 elif self._match(TokenType.VALUES): 1935 this = self.expression( 1936 exp.Values, 1937 expressions=self._parse_csv(self._parse_value), 1938 alias=self._parse_table_alias(), 1939 ) 1940 else: 1941 this = None 1942 1943 return self._parse_set_operations(this) 1944 1945 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1946 if not skip_with_token and not self._match(TokenType.WITH): 1947 return None 1948 1949 comments = self._prev_comments 1950 recursive = self._match(TokenType.RECURSIVE) 1951 1952 expressions = [] 1953 while True: 1954 expressions.append(self._parse_cte()) 1955 1956 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1957 break 1958 else: 1959 self._match(TokenType.WITH) 1960 1961 return self.expression( 1962 exp.With, comments=comments, expressions=expressions, recursive=recursive 1963 ) 1964 1965 def _parse_cte(self) -> exp.CTE: 1966 alias = self._parse_table_alias() 1967 if not alias or not alias.this: 1968 self.raise_error("Expected CTE to have alias") 1969 1970 self._match(TokenType.ALIAS) 1971 return self.expression( 1972 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1973 ) 1974 1975 def _parse_table_alias( 1976 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1977 ) -> t.Optional[exp.TableAlias]: 1978 any_token = self._match(TokenType.ALIAS) 1979 alias = ( 1980 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1981 or self._parse_string_as_identifier() 1982 ) 1983 1984 index = self._index 1985 if self._match(TokenType.L_PAREN): 1986 columns = self._parse_csv(self._parse_function_parameter) 1987 self._match_r_paren() if columns else self._retreat(index) 1988 else: 1989 columns = None 1990 1991 if not alias and not columns: 1992 return None 1993 1994 return self.expression(exp.TableAlias, this=alias, columns=columns) 1995 1996 def _parse_subquery( 1997 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1998 ) -> t.Optional[exp.Subquery]: 1999 if not this: 2000 return None 2001 2002 return self.expression( 2003 exp.Subquery, 2004 this=this, 2005 pivots=self._parse_pivots(), 2006 alias=self._parse_table_alias() if parse_alias else None, 2007 ) 2008 2009 def _parse_query_modifiers( 2010 self, this: t.Optional[exp.Expression] 2011 ) -> t.Optional[exp.Expression]: 2012 if isinstance(this, self.MODIFIABLES): 2013 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2014 expression = parser(self) 2015 2016 if expression: 2017 if key == "limit": 2018 offset = expression.args.pop("offset", None) 2019 if offset: 2020 this.set("offset", exp.Offset(expression=offset)) 2021 this.set(key, expression) 2022 return this 2023 2024 def _parse_hint(self) -> t.Optional[exp.Hint]: 2025 if self._match(TokenType.HINT): 2026 hints = self._parse_csv(self._parse_function) 2027 2028 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2029 self.raise_error("Expected */ after HINT") 2030 2031 return self.expression(exp.Hint, expressions=hints) 2032 2033 return None 2034 2035 def _parse_into(self) -> t.Optional[exp.Into]: 2036 if not self._match(TokenType.INTO): 2037 return None 2038 2039 temp = self._match(TokenType.TEMPORARY) 2040 unlogged = self._match_text_seq("UNLOGGED") 2041 self._match(TokenType.TABLE) 2042 2043 return self.expression( 2044 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2045 ) 2046 2047 def _parse_from( 2048 self, modifiers: bool = False, skip_from_token: bool = False 2049 ) -> t.Optional[exp.From]: 2050 if not skip_from_token and not self._match(TokenType.FROM): 2051 return None 2052 2053 comments = self._prev_comments 2054 this = self._parse_table() 2055 2056 return self.expression( 2057 exp.From, 2058 comments=comments, 2059 this=self._parse_query_modifiers(this) if modifiers else this, 2060 ) 2061 2062 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2063 if not self._match(TokenType.MATCH_RECOGNIZE): 2064 return None 2065 2066 self._match_l_paren() 2067 2068 partition = self._parse_partition_by() 2069 order = self._parse_order() 2070 measures = ( 2071 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2072 ) 2073 2074 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2075 rows = exp.var("ONE ROW PER MATCH") 2076 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2077 text = "ALL ROWS PER MATCH" 2078 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2079 text += f" SHOW EMPTY MATCHES" 2080 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2081 text += f" OMIT EMPTY MATCHES" 2082 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2083 text += f" WITH UNMATCHED ROWS" 2084 rows = exp.var(text) 2085 else: 2086 rows = None 2087 2088 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2089 text = "AFTER MATCH SKIP" 2090 if self._match_text_seq("PAST", "LAST", "ROW"): 2091 text += f" PAST LAST ROW" 2092 elif self._match_text_seq("TO", "NEXT", "ROW"): 2093 text += f" TO NEXT ROW" 2094 elif self._match_text_seq("TO", "FIRST"): 2095 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2096 elif self._match_text_seq("TO", "LAST"): 2097 text += f" TO LAST {self._advance_any().text}" # type: ignore 2098 after = exp.var(text) 2099 else: 2100 after = None 2101 2102 if self._match_text_seq("PATTERN"): 2103 self._match_l_paren() 2104 2105 if not self._curr: 2106 self.raise_error("Expecting )", self._curr) 2107 2108 paren = 1 2109 start = self._curr 2110 2111 while self._curr and paren > 0: 2112 if self._curr.token_type == TokenType.L_PAREN: 2113 paren += 1 2114 if self._curr.token_type == TokenType.R_PAREN: 2115 paren -= 1 2116 2117 end = self._prev 2118 self._advance() 2119 2120 if paren > 0: 2121 self.raise_error("Expecting )", self._curr) 2122 2123 pattern = exp.var(self._find_sql(start, end)) 2124 else: 2125 pattern = None 2126 2127 define = ( 2128 self._parse_csv( 2129 lambda: self.expression( 2130 exp.Alias, 2131 alias=self._parse_id_var(any_token=True), 2132 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2133 ) 2134 ) 2135 if self._match_text_seq("DEFINE") 2136 else None 2137 ) 2138 2139 self._match_r_paren() 2140 2141 return self.expression( 2142 exp.MatchRecognize, 2143 partition_by=partition, 2144 order=order, 2145 measures=measures, 2146 rows=rows, 2147 after=after, 2148 pattern=pattern, 2149 define=define, 2150 alias=self._parse_table_alias(), 2151 ) 2152 2153 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2154 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2155 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2156 2157 if outer_apply or cross_apply: 2158 this = self._parse_select(table=True) 2159 view = None 2160 outer = not cross_apply 2161 elif self._match(TokenType.LATERAL): 2162 this = self._parse_select(table=True) 2163 view = self._match(TokenType.VIEW) 2164 outer = self._match(TokenType.OUTER) 2165 else: 2166 return None 2167 2168 if not this: 2169 this = self._parse_function() or self._parse_id_var(any_token=False) 2170 while self._match(TokenType.DOT): 2171 this = exp.Dot( 2172 this=this, 2173 expression=self._parse_function() or self._parse_id_var(any_token=False), 2174 ) 2175 2176 if view: 2177 table = self._parse_id_var(any_token=False) 2178 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2179 table_alias: t.Optional[exp.TableAlias] = self.expression( 2180 exp.TableAlias, this=table, columns=columns 2181 ) 2182 elif isinstance(this, exp.Subquery) and this.alias: 2183 # Ensures parity between the Subquery's and the Lateral's "alias" args 2184 table_alias = this.args["alias"].copy() 2185 else: 2186 table_alias = self._parse_table_alias() 2187 2188 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2189 2190 def _parse_join_parts( 2191 self, 2192 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2193 return ( 2194 self._match_set(self.JOIN_METHODS) and self._prev, 2195 self._match_set(self.JOIN_SIDES) and self._prev, 2196 self._match_set(self.JOIN_KINDS) and self._prev, 2197 ) 2198 2199 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2200 if self._match(TokenType.COMMA): 2201 return self.expression(exp.Join, this=self._parse_table()) 2202 2203 index = self._index 2204 method, side, kind = self._parse_join_parts() 2205 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2206 join = self._match(TokenType.JOIN) 2207 2208 if not skip_join_token and not join: 2209 self._retreat(index) 2210 kind = None 2211 method = None 2212 side = None 2213 2214 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2215 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2216 2217 if not skip_join_token and not join and not outer_apply and not cross_apply: 2218 return None 2219 2220 if outer_apply: 2221 side = Token(TokenType.LEFT, "LEFT") 2222 2223 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2224 2225 if method: 2226 kwargs["method"] = method.text 2227 if side: 2228 kwargs["side"] = side.text 2229 if kind: 2230 kwargs["kind"] = kind.text 2231 if hint: 2232 kwargs["hint"] = hint 2233 2234 if self._match(TokenType.ON): 2235 kwargs["on"] = self._parse_conjunction() 2236 elif self._match(TokenType.USING): 2237 kwargs["using"] = self._parse_wrapped_id_vars() 2238 2239 return self.expression(exp.Join, **kwargs) 2240 2241 def _parse_index( 2242 self, 2243 index: t.Optional[exp.Expression] = None, 2244 ) -> t.Optional[exp.Index]: 2245 if index: 2246 unique = None 2247 primary = None 2248 amp = None 2249 2250 self._match(TokenType.ON) 2251 self._match(TokenType.TABLE) # hive 2252 table = self._parse_table_parts(schema=True) 2253 else: 2254 unique = self._match(TokenType.UNIQUE) 2255 primary = self._match_text_seq("PRIMARY") 2256 amp = self._match_text_seq("AMP") 2257 2258 if not self._match(TokenType.INDEX): 2259 return None 2260 2261 index = self._parse_id_var() 2262 table = None 2263 2264 using = self._parse_field() if self._match(TokenType.USING) else None 2265 2266 if self._match(TokenType.L_PAREN, advance=False): 2267 columns = self._parse_wrapped_csv(self._parse_ordered) 2268 else: 2269 columns = None 2270 2271 return self.expression( 2272 exp.Index, 2273 this=index, 2274 table=table, 2275 using=using, 2276 columns=columns, 2277 unique=unique, 2278 primary=primary, 2279 amp=amp, 2280 partition_by=self._parse_partition_by(), 2281 ) 2282 2283 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2284 hints: t.List[exp.Expression] = [] 2285 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2286 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2287 hints.append( 2288 self.expression( 2289 exp.WithTableHint, 2290 expressions=self._parse_csv( 2291 lambda: self._parse_function() or self._parse_var(any_token=True) 2292 ), 2293 ) 2294 ) 2295 self._match_r_paren() 2296 else: 2297 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2298 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2299 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2300 2301 self._match_texts({"INDEX", "KEY"}) 2302 if self._match(TokenType.FOR): 2303 hint.set("target", self._advance_any() and self._prev.text.upper()) 2304 2305 hint.set("expressions", self._parse_wrapped_id_vars()) 2306 hints.append(hint) 2307 2308 return hints or None 2309 2310 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2311 return ( 2312 (not schema and self._parse_function(optional_parens=False)) 2313 or self._parse_id_var(any_token=False) 2314 or self._parse_string_as_identifier() 2315 or self._parse_placeholder() 2316 ) 2317 2318 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2319 catalog = None 2320 db = None 2321 table = self._parse_table_part(schema=schema) 2322 2323 while self._match(TokenType.DOT): 2324 if catalog: 2325 # This allows nesting the table in arbitrarily many dot expressions if needed 2326 table = self.expression( 2327 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2328 ) 2329 else: 2330 catalog = db 2331 db = table 2332 table = self._parse_table_part(schema=schema) 2333 2334 if not table: 2335 self.raise_error(f"Expected table name but got {self._curr}") 2336 2337 return self.expression( 2338 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2339 ) 2340 2341 def _parse_table( 2342 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2343 ) -> t.Optional[exp.Expression]: 2344 lateral = self._parse_lateral() 2345 if lateral: 2346 return lateral 2347 2348 unnest = self._parse_unnest() 2349 if unnest: 2350 return unnest 2351 2352 values = self._parse_derived_table_values() 2353 if values: 2354 return values 2355 2356 subquery = self._parse_select(table=True) 2357 if subquery: 2358 if not subquery.args.get("pivots"): 2359 subquery.set("pivots", self._parse_pivots()) 2360 return subquery 2361 2362 this: exp.Expression = self._parse_table_parts(schema=schema) 2363 2364 if schema: 2365 return self._parse_schema(this=this) 2366 2367 if self.ALIAS_POST_TABLESAMPLE: 2368 table_sample = self._parse_table_sample() 2369 2370 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2371 if alias: 2372 this.set("alias", alias) 2373 2374 if not this.args.get("pivots"): 2375 this.set("pivots", self._parse_pivots()) 2376 2377 this.set("hints", self._parse_table_hints()) 2378 2379 if not self.ALIAS_POST_TABLESAMPLE: 2380 table_sample = self._parse_table_sample() 2381 2382 if table_sample: 2383 table_sample.set("this", this) 2384 this = table_sample 2385 2386 return this 2387 2388 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2389 if not self._match(TokenType.UNNEST): 2390 return None 2391 2392 expressions = self._parse_wrapped_csv(self._parse_type) 2393 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2394 2395 alias = self._parse_table_alias() if with_alias else None 2396 2397 if alias and self.UNNEST_COLUMN_ONLY: 2398 if alias.args.get("columns"): 2399 self.raise_error("Unexpected extra column alias in unnest.") 2400 2401 alias.set("columns", [alias.this]) 2402 alias.set("this", None) 2403 2404 offset = None 2405 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2406 self._match(TokenType.ALIAS) 2407 offset = self._parse_id_var() or exp.to_identifier("offset") 2408 2409 return self.expression( 2410 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2411 ) 2412 2413 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2414 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2415 if not is_derived and not self._match(TokenType.VALUES): 2416 return None 2417 2418 expressions = self._parse_csv(self._parse_value) 2419 alias = self._parse_table_alias() 2420 2421 if is_derived: 2422 self._match_r_paren() 2423 2424 return self.expression( 2425 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2426 ) 2427 2428 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2429 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2430 as_modifier and self._match_text_seq("USING", "SAMPLE") 2431 ): 2432 return None 2433 2434 bucket_numerator = None 2435 bucket_denominator = None 2436 bucket_field = None 2437 percent = None 2438 rows = None 2439 size = None 2440 seed = None 2441 2442 kind = ( 2443 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2444 ) 2445 method = self._parse_var(tokens=(TokenType.ROW,)) 2446 2447 self._match(TokenType.L_PAREN) 2448 2449 num = self._parse_number() 2450 2451 if self._match_text_seq("BUCKET"): 2452 bucket_numerator = self._parse_number() 2453 self._match_text_seq("OUT", "OF") 2454 bucket_denominator = bucket_denominator = self._parse_number() 2455 self._match(TokenType.ON) 2456 bucket_field = self._parse_field() 2457 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2458 percent = num 2459 elif self._match(TokenType.ROWS): 2460 rows = num 2461 else: 2462 size = num 2463 2464 self._match(TokenType.R_PAREN) 2465 2466 if self._match(TokenType.L_PAREN): 2467 method = self._parse_var() 2468 seed = self._match(TokenType.COMMA) and self._parse_number() 2469 self._match_r_paren() 2470 elif self._match_texts(("SEED", "REPEATABLE")): 2471 seed = self._parse_wrapped(self._parse_number) 2472 2473 return self.expression( 2474 exp.TableSample, 2475 method=method, 2476 bucket_numerator=bucket_numerator, 2477 bucket_denominator=bucket_denominator, 2478 bucket_field=bucket_field, 2479 percent=percent, 2480 rows=rows, 2481 size=size, 2482 seed=seed, 2483 kind=kind, 2484 ) 2485 2486 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2487 return list(iter(self._parse_pivot, None)) 2488 2489 # https://duckdb.org/docs/sql/statements/pivot 2490 def _parse_simplified_pivot(self) -> exp.Pivot: 2491 def _parse_on() -> t.Optional[exp.Expression]: 2492 this = self._parse_bitwise() 2493 return self._parse_in(this) if self._match(TokenType.IN) else this 2494 2495 this = self._parse_table() 2496 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2497 using = self._match(TokenType.USING) and self._parse_csv( 2498 lambda: self._parse_alias(self._parse_function()) 2499 ) 2500 group = self._parse_group() 2501 return self.expression( 2502 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2503 ) 2504 2505 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2506 index = self._index 2507 2508 if self._match(TokenType.PIVOT): 2509 unpivot = False 2510 elif self._match(TokenType.UNPIVOT): 2511 unpivot = True 2512 else: 2513 return None 2514 2515 expressions = [] 2516 field = None 2517 2518 if not self._match(TokenType.L_PAREN): 2519 self._retreat(index) 2520 return None 2521 2522 if unpivot: 2523 expressions = self._parse_csv(self._parse_column) 2524 else: 2525 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2526 2527 if not expressions: 2528 self.raise_error("Failed to parse PIVOT's aggregation list") 2529 2530 if not self._match(TokenType.FOR): 2531 self.raise_error("Expecting FOR") 2532 2533 value = self._parse_column() 2534 2535 if not self._match(TokenType.IN): 2536 self.raise_error("Expecting IN") 2537 2538 field = self._parse_in(value, alias=True) 2539 2540 self._match_r_paren() 2541 2542 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2543 2544 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2545 pivot.set("alias", self._parse_table_alias()) 2546 2547 if not unpivot: 2548 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2549 2550 columns: t.List[exp.Expression] = [] 2551 for fld in pivot.args["field"].expressions: 2552 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2553 for name in names: 2554 if self.PREFIXED_PIVOT_COLUMNS: 2555 name = f"{name}_{field_name}" if name else field_name 2556 else: 2557 name = f"{field_name}_{name}" if name else field_name 2558 2559 columns.append(exp.to_identifier(name)) 2560 2561 pivot.set("columns", columns) 2562 2563 return pivot 2564 2565 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2566 return [agg.alias for agg in aggregations] 2567 2568 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2569 if not skip_where_token and not self._match(TokenType.WHERE): 2570 return None 2571 2572 return self.expression( 2573 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2574 ) 2575 2576 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2577 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2578 return None 2579 2580 elements = defaultdict(list) 2581 2582 while True: 2583 expressions = self._parse_csv(self._parse_conjunction) 2584 if expressions: 2585 elements["expressions"].extend(expressions) 2586 2587 grouping_sets = self._parse_grouping_sets() 2588 if grouping_sets: 2589 elements["grouping_sets"].extend(grouping_sets) 2590 2591 rollup = None 2592 cube = None 2593 totals = None 2594 2595 with_ = self._match(TokenType.WITH) 2596 if self._match(TokenType.ROLLUP): 2597 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2598 elements["rollup"].extend(ensure_list(rollup)) 2599 2600 if self._match(TokenType.CUBE): 2601 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2602 elements["cube"].extend(ensure_list(cube)) 2603 2604 if self._match_text_seq("TOTALS"): 2605 totals = True 2606 elements["totals"] = True # type: ignore 2607 2608 if not (grouping_sets or rollup or cube or totals): 2609 break 2610 2611 return self.expression(exp.Group, **elements) # type: ignore 2612 2613 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2614 if not self._match(TokenType.GROUPING_SETS): 2615 return None 2616 2617 return self._parse_wrapped_csv(self._parse_grouping_set) 2618 2619 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2620 if self._match(TokenType.L_PAREN): 2621 grouping_set = self._parse_csv(self._parse_column) 2622 self._match_r_paren() 2623 return self.expression(exp.Tuple, expressions=grouping_set) 2624 2625 return self._parse_column() 2626 2627 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2628 if not skip_having_token and not self._match(TokenType.HAVING): 2629 return None 2630 return self.expression(exp.Having, this=self._parse_conjunction()) 2631 2632 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2633 if not self._match(TokenType.QUALIFY): 2634 return None 2635 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2636 2637 def _parse_order( 2638 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2639 ) -> t.Optional[exp.Expression]: 2640 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2641 return this 2642 2643 return self.expression( 2644 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2645 ) 2646 2647 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2648 if not self._match(token): 2649 return None 2650 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2651 2652 def _parse_ordered(self) -> exp.Ordered: 2653 this = self._parse_conjunction() 2654 self._match(TokenType.ASC) 2655 2656 is_desc = self._match(TokenType.DESC) 2657 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2658 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2659 desc = is_desc or False 2660 asc = not desc 2661 nulls_first = is_nulls_first or False 2662 explicitly_null_ordered = is_nulls_first or is_nulls_last 2663 2664 if ( 2665 not explicitly_null_ordered 2666 and ( 2667 (asc and self.NULL_ORDERING == "nulls_are_small") 2668 or (desc and self.NULL_ORDERING != "nulls_are_small") 2669 ) 2670 and self.NULL_ORDERING != "nulls_are_last" 2671 ): 2672 nulls_first = True 2673 2674 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2675 2676 def _parse_limit( 2677 self, this: t.Optional[exp.Expression] = None, top: bool = False 2678 ) -> t.Optional[exp.Expression]: 2679 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2680 limit_paren = self._match(TokenType.L_PAREN) 2681 expression = self._parse_number() if top else self._parse_term() 2682 2683 if self._match(TokenType.COMMA): 2684 offset = expression 2685 expression = self._parse_term() 2686 else: 2687 offset = None 2688 2689 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2690 2691 if limit_paren: 2692 self._match_r_paren() 2693 2694 return limit_exp 2695 2696 if self._match(TokenType.FETCH): 2697 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2698 direction = self._prev.text if direction else "FIRST" 2699 2700 count = self._parse_number() 2701 percent = self._match(TokenType.PERCENT) 2702 2703 self._match_set((TokenType.ROW, TokenType.ROWS)) 2704 2705 only = self._match_text_seq("ONLY") 2706 with_ties = self._match_text_seq("WITH", "TIES") 2707 2708 if only and with_ties: 2709 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2710 2711 return self.expression( 2712 exp.Fetch, 2713 direction=direction, 2714 count=count, 2715 percent=percent, 2716 with_ties=with_ties, 2717 ) 2718 2719 return this 2720 2721 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2722 if not self._match(TokenType.OFFSET): 2723 return this 2724 2725 count = self._parse_number() 2726 self._match_set((TokenType.ROW, TokenType.ROWS)) 2727 return self.expression(exp.Offset, this=this, expression=count) 2728 2729 def _parse_locks(self) -> t.List[exp.Lock]: 2730 locks = [] 2731 while True: 2732 if self._match_text_seq("FOR", "UPDATE"): 2733 update = True 2734 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2735 "LOCK", "IN", "SHARE", "MODE" 2736 ): 2737 update = False 2738 else: 2739 break 2740 2741 expressions = None 2742 if self._match_text_seq("OF"): 2743 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2744 2745 wait: t.Optional[bool | exp.Expression] = None 2746 if self._match_text_seq("NOWAIT"): 2747 wait = True 2748 elif self._match_text_seq("WAIT"): 2749 wait = self._parse_primary() 2750 elif self._match_text_seq("SKIP", "LOCKED"): 2751 wait = False 2752 2753 locks.append( 2754 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2755 ) 2756 2757 return locks 2758 2759 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2760 if not self._match_set(self.SET_OPERATIONS): 2761 return this 2762 2763 token_type = self._prev.token_type 2764 2765 if token_type == TokenType.UNION: 2766 expression = exp.Union 2767 elif token_type == TokenType.EXCEPT: 2768 expression = exp.Except 2769 else: 2770 expression = exp.Intersect 2771 2772 return self.expression( 2773 expression, 2774 this=this, 2775 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2776 expression=self._parse_set_operations(self._parse_select(nested=True)), 2777 ) 2778 2779 def _parse_expression(self) -> t.Optional[exp.Expression]: 2780 return self._parse_alias(self._parse_conjunction()) 2781 2782 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2783 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2784 2785 def _parse_equality(self) -> t.Optional[exp.Expression]: 2786 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2787 2788 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2789 return self._parse_tokens(self._parse_range, self.COMPARISON) 2790 2791 def _parse_range(self) -> t.Optional[exp.Expression]: 2792 this = self._parse_bitwise() 2793 negate = self._match(TokenType.NOT) 2794 2795 if self._match_set(self.RANGE_PARSERS): 2796 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2797 if not expression: 2798 return this 2799 2800 this = expression 2801 elif self._match(TokenType.ISNULL): 2802 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2803 2804 # Postgres supports ISNULL and NOTNULL for conditions. 2805 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2806 if self._match(TokenType.NOTNULL): 2807 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2808 this = self.expression(exp.Not, this=this) 2809 2810 if negate: 2811 this = self.expression(exp.Not, this=this) 2812 2813 if self._match(TokenType.IS): 2814 this = self._parse_is(this) 2815 2816 return this 2817 2818 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2819 index = self._index - 1 2820 negate = self._match(TokenType.NOT) 2821 2822 if self._match_text_seq("DISTINCT", "FROM"): 2823 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2824 return self.expression(klass, this=this, expression=self._parse_expression()) 2825 2826 expression = self._parse_null() or self._parse_boolean() 2827 if not expression: 2828 self._retreat(index) 2829 return None 2830 2831 this = self.expression(exp.Is, this=this, expression=expression) 2832 return self.expression(exp.Not, this=this) if negate else this 2833 2834 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2835 unnest = self._parse_unnest(with_alias=False) 2836 if unnest: 2837 this = self.expression(exp.In, this=this, unnest=unnest) 2838 elif self._match(TokenType.L_PAREN): 2839 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2840 2841 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2842 this = self.expression(exp.In, this=this, query=expressions[0]) 2843 else: 2844 this = self.expression(exp.In, this=this, expressions=expressions) 2845 2846 self._match_r_paren(this) 2847 else: 2848 this = self.expression(exp.In, this=this, field=self._parse_field()) 2849 2850 return this 2851 2852 def _parse_between(self, this: exp.Expression) -> exp.Between: 2853 low = self._parse_bitwise() 2854 self._match(TokenType.AND) 2855 high = self._parse_bitwise() 2856 return self.expression(exp.Between, this=this, low=low, high=high) 2857 2858 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2859 if not self._match(TokenType.ESCAPE): 2860 return this 2861 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2862 2863 def _parse_interval(self) -> t.Optional[exp.Interval]: 2864 if not self._match(TokenType.INTERVAL): 2865 return None 2866 2867 if self._match(TokenType.STRING, advance=False): 2868 this = self._parse_primary() 2869 else: 2870 this = self._parse_term() 2871 2872 unit = self._parse_function() or self._parse_var() 2873 2874 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2875 # each INTERVAL expression into this canonical form so it's easy to transpile 2876 if this and this.is_number: 2877 this = exp.Literal.string(this.name) 2878 elif this and this.is_string: 2879 parts = this.name.split() 2880 2881 if len(parts) == 2: 2882 if unit: 2883 # this is not actually a unit, it's something else 2884 unit = None 2885 self._retreat(self._index - 1) 2886 else: 2887 this = exp.Literal.string(parts[0]) 2888 unit = self.expression(exp.Var, this=parts[1]) 2889 2890 return self.expression(exp.Interval, this=this, unit=unit) 2891 2892 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2893 this = self._parse_term() 2894 2895 while True: 2896 if self._match_set(self.BITWISE): 2897 this = self.expression( 2898 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2899 ) 2900 elif self._match_pair(TokenType.LT, TokenType.LT): 2901 this = self.expression( 2902 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2903 ) 2904 elif self._match_pair(TokenType.GT, TokenType.GT): 2905 this = self.expression( 2906 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2907 ) 2908 else: 2909 break 2910 2911 return this 2912 2913 def _parse_term(self) -> t.Optional[exp.Expression]: 2914 return self._parse_tokens(self._parse_factor, self.TERM) 2915 2916 def _parse_factor(self) -> t.Optional[exp.Expression]: 2917 return self._parse_tokens(self._parse_unary, self.FACTOR) 2918 2919 def _parse_unary(self) -> t.Optional[exp.Expression]: 2920 if self._match_set(self.UNARY_PARSERS): 2921 return self.UNARY_PARSERS[self._prev.token_type](self) 2922 return self._parse_at_time_zone(self._parse_type()) 2923 2924 def _parse_type(self) -> t.Optional[exp.Expression]: 2925 interval = self._parse_interval() 2926 if interval: 2927 return interval 2928 2929 index = self._index 2930 data_type = self._parse_types(check_func=True) 2931 this = self._parse_column() 2932 2933 if data_type: 2934 if isinstance(this, exp.Literal): 2935 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2936 if parser: 2937 return parser(self, this, data_type) 2938 return self.expression(exp.Cast, this=this, to=data_type) 2939 if not data_type.expressions: 2940 self._retreat(index) 2941 return self._parse_column() 2942 return self._parse_column_ops(data_type) 2943 2944 return this 2945 2946 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2947 this = self._parse_type() 2948 if not this: 2949 return None 2950 2951 return self.expression( 2952 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2953 ) 2954 2955 def _parse_types( 2956 self, check_func: bool = False, schema: bool = False 2957 ) -> t.Optional[exp.Expression]: 2958 index = self._index 2959 2960 prefix = self._match_text_seq("SYSUDTLIB", ".") 2961 2962 if not self._match_set(self.TYPE_TOKENS): 2963 return None 2964 2965 type_token = self._prev.token_type 2966 2967 if type_token == TokenType.PSEUDO_TYPE: 2968 return self.expression(exp.PseudoType, this=self._prev.text) 2969 2970 nested = type_token in self.NESTED_TYPE_TOKENS 2971 is_struct = type_token == TokenType.STRUCT 2972 expressions = None 2973 maybe_func = False 2974 2975 if self._match(TokenType.L_PAREN): 2976 if is_struct: 2977 expressions = self._parse_csv(self._parse_struct_types) 2978 elif nested: 2979 expressions = self._parse_csv( 2980 lambda: self._parse_types(check_func=check_func, schema=schema) 2981 ) 2982 elif type_token in self.ENUM_TYPE_TOKENS: 2983 expressions = self._parse_csv(self._parse_primary) 2984 else: 2985 expressions = self._parse_csv(self._parse_type_size) 2986 2987 if not expressions or not self._match(TokenType.R_PAREN): 2988 self._retreat(index) 2989 return None 2990 2991 maybe_func = True 2992 2993 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2994 this = exp.DataType( 2995 this=exp.DataType.Type.ARRAY, 2996 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2997 nested=True, 2998 ) 2999 3000 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3001 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3002 3003 return this 3004 3005 if self._match(TokenType.L_BRACKET): 3006 self._retreat(index) 3007 return None 3008 3009 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3010 if nested and self._match(TokenType.LT): 3011 if is_struct: 3012 expressions = self._parse_csv(self._parse_struct_types) 3013 else: 3014 expressions = self._parse_csv( 3015 lambda: self._parse_types(check_func=check_func, schema=schema) 3016 ) 3017 3018 if not self._match(TokenType.GT): 3019 self.raise_error("Expecting >") 3020 3021 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3022 values = self._parse_csv(self._parse_conjunction) 3023 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3024 3025 value: t.Optional[exp.Expression] = None 3026 if type_token in self.TIMESTAMPS: 3027 if self._match_text_seq("WITH", "TIME", "ZONE"): 3028 maybe_func = False 3029 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3030 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3031 maybe_func = False 3032 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3033 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3034 maybe_func = False 3035 elif type_token == TokenType.INTERVAL: 3036 unit = self._parse_var() 3037 3038 if not unit: 3039 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3040 else: 3041 value = self.expression(exp.Interval, unit=unit) 3042 3043 if maybe_func and check_func: 3044 index2 = self._index 3045 peek = self._parse_string() 3046 3047 if not peek: 3048 self._retreat(index) 3049 return None 3050 3051 self._retreat(index2) 3052 3053 if value: 3054 return value 3055 3056 return exp.DataType( 3057 this=exp.DataType.Type[type_token.value.upper()], 3058 expressions=expressions, 3059 nested=nested, 3060 values=values, 3061 prefix=prefix, 3062 ) 3063 3064 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3065 this = self._parse_type() or self._parse_id_var() 3066 self._match(TokenType.COLON) 3067 return self._parse_column_def(this) 3068 3069 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3070 if not self._match_text_seq("AT", "TIME", "ZONE"): 3071 return this 3072 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3073 3074 def _parse_column(self) -> t.Optional[exp.Expression]: 3075 this = self._parse_field() 3076 if isinstance(this, exp.Identifier): 3077 this = self.expression(exp.Column, this=this) 3078 elif not this: 3079 return self._parse_bracket(this) 3080 return self._parse_column_ops(this) 3081 3082 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3083 this = self._parse_bracket(this) 3084 3085 while self._match_set(self.COLUMN_OPERATORS): 3086 op_token = self._prev.token_type 3087 op = self.COLUMN_OPERATORS.get(op_token) 3088 3089 if op_token == TokenType.DCOLON: 3090 field = self._parse_types() 3091 if not field: 3092 self.raise_error("Expected type") 3093 elif op and self._curr: 3094 self._advance() 3095 value = self._prev.text 3096 field = ( 3097 exp.Literal.number(value) 3098 if self._prev.token_type == TokenType.NUMBER 3099 else exp.Literal.string(value) 3100 ) 3101 else: 3102 field = self._parse_field(anonymous_func=True, any_token=True) 3103 3104 if isinstance(field, exp.Func): 3105 # bigquery allows function calls like x.y.count(...) 3106 # SAFE.SUBSTR(...) 3107 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3108 this = self._replace_columns_with_dots(this) 3109 3110 if op: 3111 this = op(self, this, field) 3112 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3113 this = self.expression( 3114 exp.Column, 3115 this=field, 3116 table=this.this, 3117 db=this.args.get("table"), 3118 catalog=this.args.get("db"), 3119 ) 3120 else: 3121 this = self.expression(exp.Dot, this=this, expression=field) 3122 this = self._parse_bracket(this) 3123 return this 3124 3125 def _parse_primary(self) -> t.Optional[exp.Expression]: 3126 if self._match_set(self.PRIMARY_PARSERS): 3127 token_type = self._prev.token_type 3128 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3129 3130 if token_type == TokenType.STRING: 3131 expressions = [primary] 3132 while self._match(TokenType.STRING): 3133 expressions.append(exp.Literal.string(self._prev.text)) 3134 3135 if len(expressions) > 1: 3136 return self.expression(exp.Concat, expressions=expressions) 3137 3138 return primary 3139 3140 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3141 return exp.Literal.number(f"0.{self._prev.text}") 3142 3143 if self._match(TokenType.L_PAREN): 3144 comments = self._prev_comments 3145 query = self._parse_select() 3146 3147 if query: 3148 expressions = [query] 3149 else: 3150 expressions = self._parse_csv(self._parse_expression) 3151 3152 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3153 3154 if isinstance(this, exp.Subqueryable): 3155 this = self._parse_set_operations( 3156 self._parse_subquery(this=this, parse_alias=False) 3157 ) 3158 elif len(expressions) > 1: 3159 this = self.expression(exp.Tuple, expressions=expressions) 3160 else: 3161 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3162 3163 if this: 3164 this.add_comments(comments) 3165 3166 self._match_r_paren(expression=this) 3167 return this 3168 3169 return None 3170 3171 def _parse_field( 3172 self, 3173 any_token: bool = False, 3174 tokens: t.Optional[t.Collection[TokenType]] = None, 3175 anonymous_func: bool = False, 3176 ) -> t.Optional[exp.Expression]: 3177 return ( 3178 self._parse_primary() 3179 or self._parse_function(anonymous=anonymous_func) 3180 or self._parse_id_var(any_token=any_token, tokens=tokens) 3181 ) 3182 3183 def _parse_function( 3184 self, 3185 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3186 anonymous: bool = False, 3187 optional_parens: bool = True, 3188 ) -> t.Optional[exp.Expression]: 3189 if not self._curr: 3190 return None 3191 3192 token_type = self._curr.token_type 3193 3194 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3195 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3196 3197 if not self._next or self._next.token_type != TokenType.L_PAREN: 3198 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3199 self._advance() 3200 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3201 3202 return None 3203 3204 if token_type not in self.FUNC_TOKENS: 3205 return None 3206 3207 this = self._curr.text 3208 upper = this.upper() 3209 self._advance(2) 3210 3211 parser = self.FUNCTION_PARSERS.get(upper) 3212 3213 if parser and not anonymous: 3214 this = parser(self) 3215 else: 3216 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3217 3218 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3219 this = self.expression(subquery_predicate, this=self._parse_select()) 3220 self._match_r_paren() 3221 return this 3222 3223 if functions is None: 3224 functions = self.FUNCTIONS 3225 3226 function = functions.get(upper) 3227 3228 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3229 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3230 3231 if function and not anonymous: 3232 this = self.validate_expression(function(args), args) 3233 else: 3234 this = self.expression(exp.Anonymous, this=this, expressions=args) 3235 3236 self._match_r_paren(this) 3237 return self._parse_window(this) 3238 3239 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3240 return self._parse_column_def(self._parse_id_var()) 3241 3242 def _parse_user_defined_function( 3243 self, kind: t.Optional[TokenType] = None 3244 ) -> t.Optional[exp.Expression]: 3245 this = self._parse_id_var() 3246 3247 while self._match(TokenType.DOT): 3248 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3249 3250 if not self._match(TokenType.L_PAREN): 3251 return this 3252 3253 expressions = self._parse_csv(self._parse_function_parameter) 3254 self._match_r_paren() 3255 return self.expression( 3256 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3257 ) 3258 3259 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3260 literal = self._parse_primary() 3261 if literal: 3262 return self.expression(exp.Introducer, this=token.text, expression=literal) 3263 3264 return self.expression(exp.Identifier, this=token.text) 3265 3266 def _parse_session_parameter(self) -> exp.SessionParameter: 3267 kind = None 3268 this = self._parse_id_var() or self._parse_primary() 3269 3270 if this and self._match(TokenType.DOT): 3271 kind = this.name 3272 this = self._parse_var() or self._parse_primary() 3273 3274 return self.expression(exp.SessionParameter, this=this, kind=kind) 3275 3276 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3277 index = self._index 3278 3279 if self._match(TokenType.L_PAREN): 3280 expressions = self._parse_csv(self._parse_id_var) 3281 3282 if not self._match(TokenType.R_PAREN): 3283 self._retreat(index) 3284 else: 3285 expressions = [self._parse_id_var()] 3286 3287 if self._match_set(self.LAMBDAS): 3288 return self.LAMBDAS[self._prev.token_type](self, expressions) 3289 3290 self._retreat(index) 3291 3292 this: t.Optional[exp.Expression] 3293 3294 if self._match(TokenType.DISTINCT): 3295 this = self.expression( 3296 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3297 ) 3298 else: 3299 this = self._parse_select_or_expression(alias=alias) 3300 3301 if isinstance(this, exp.EQ): 3302 left = this.this 3303 if isinstance(left, exp.Column): 3304 left.replace(exp.var(left.text("this"))) 3305 3306 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3307 3308 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3309 index = self._index 3310 3311 if not self.errors: 3312 try: 3313 if self._parse_select(nested=True): 3314 return this 3315 except ParseError: 3316 pass 3317 finally: 3318 self.errors.clear() 3319 self._retreat(index) 3320 3321 if not self._match(TokenType.L_PAREN): 3322 return this 3323 3324 args = self._parse_csv( 3325 lambda: self._parse_constraint() 3326 or self._parse_column_def(self._parse_field(any_token=True)) 3327 ) 3328 3329 self._match_r_paren() 3330 return self.expression(exp.Schema, this=this, expressions=args) 3331 3332 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3333 # column defs are not really columns, they're identifiers 3334 if isinstance(this, exp.Column): 3335 this = this.this 3336 3337 kind = self._parse_types(schema=True) 3338 3339 if self._match_text_seq("FOR", "ORDINALITY"): 3340 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3341 3342 constraints = [] 3343 while True: 3344 constraint = self._parse_column_constraint() 3345 if not constraint: 3346 break 3347 constraints.append(constraint) 3348 3349 if not kind and not constraints: 3350 return this 3351 3352 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3353 3354 def _parse_auto_increment( 3355 self, 3356 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3357 start = None 3358 increment = None 3359 3360 if self._match(TokenType.L_PAREN, advance=False): 3361 args = self._parse_wrapped_csv(self._parse_bitwise) 3362 start = seq_get(args, 0) 3363 increment = seq_get(args, 1) 3364 elif self._match_text_seq("START"): 3365 start = self._parse_bitwise() 3366 self._match_text_seq("INCREMENT") 3367 increment = self._parse_bitwise() 3368 3369 if start and increment: 3370 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3371 3372 return exp.AutoIncrementColumnConstraint() 3373 3374 def _parse_compress(self) -> exp.CompressColumnConstraint: 3375 if self._match(TokenType.L_PAREN, advance=False): 3376 return self.expression( 3377 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3378 ) 3379 3380 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3381 3382 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3383 if self._match_text_seq("BY", "DEFAULT"): 3384 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3385 this = self.expression( 3386 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3387 ) 3388 else: 3389 self._match_text_seq("ALWAYS") 3390 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3391 3392 self._match(TokenType.ALIAS) 3393 identity = self._match_text_seq("IDENTITY") 3394 3395 if self._match(TokenType.L_PAREN): 3396 if self._match_text_seq("START", "WITH"): 3397 this.set("start", self._parse_bitwise()) 3398 if self._match_text_seq("INCREMENT", "BY"): 3399 this.set("increment", self._parse_bitwise()) 3400 if self._match_text_seq("MINVALUE"): 3401 this.set("minvalue", self._parse_bitwise()) 3402 if self._match_text_seq("MAXVALUE"): 3403 this.set("maxvalue", self._parse_bitwise()) 3404 3405 if self._match_text_seq("CYCLE"): 3406 this.set("cycle", True) 3407 elif self._match_text_seq("NO", "CYCLE"): 3408 this.set("cycle", False) 3409 3410 if not identity: 3411 this.set("expression", self._parse_bitwise()) 3412 3413 self._match_r_paren() 3414 3415 return this 3416 3417 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3418 self._match_text_seq("LENGTH") 3419 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3420 3421 def _parse_not_constraint( 3422 self, 3423 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3424 if self._match_text_seq("NULL"): 3425 return self.expression(exp.NotNullColumnConstraint) 3426 if self._match_text_seq("CASESPECIFIC"): 3427 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3428 return None 3429 3430 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3431 if self._match(TokenType.CONSTRAINT): 3432 this = self._parse_id_var() 3433 else: 3434 this = None 3435 3436 if self._match_texts(self.CONSTRAINT_PARSERS): 3437 return self.expression( 3438 exp.ColumnConstraint, 3439 this=this, 3440 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3441 ) 3442 3443 return this 3444 3445 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3446 if not self._match(TokenType.CONSTRAINT): 3447 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3448 3449 this = self._parse_id_var() 3450 expressions = [] 3451 3452 while True: 3453 constraint = self._parse_unnamed_constraint() or self._parse_function() 3454 if not constraint: 3455 break 3456 expressions.append(constraint) 3457 3458 return self.expression(exp.Constraint, this=this, expressions=expressions) 3459 3460 def _parse_unnamed_constraint( 3461 self, constraints: t.Optional[t.Collection[str]] = None 3462 ) -> t.Optional[exp.Expression]: 3463 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3464 return None 3465 3466 constraint = self._prev.text.upper() 3467 if constraint not in self.CONSTRAINT_PARSERS: 3468 self.raise_error(f"No parser found for schema constraint {constraint}.") 3469 3470 return self.CONSTRAINT_PARSERS[constraint](self) 3471 3472 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3473 self._match_text_seq("KEY") 3474 return self.expression( 3475 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3476 ) 3477 3478 def _parse_key_constraint_options(self) -> t.List[str]: 3479 options = [] 3480 while True: 3481 if not self._curr: 3482 break 3483 3484 if self._match(TokenType.ON): 3485 action = None 3486 on = self._advance_any() and self._prev.text 3487 3488 if self._match_text_seq("NO", "ACTION"): 3489 action = "NO ACTION" 3490 elif self._match_text_seq("CASCADE"): 3491 action = "CASCADE" 3492 elif self._match_pair(TokenType.SET, TokenType.NULL): 3493 action = "SET NULL" 3494 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3495 action = "SET DEFAULT" 3496 else: 3497 self.raise_error("Invalid key constraint") 3498 3499 options.append(f"ON {on} {action}") 3500 elif self._match_text_seq("NOT", "ENFORCED"): 3501 options.append("NOT ENFORCED") 3502 elif self._match_text_seq("DEFERRABLE"): 3503 options.append("DEFERRABLE") 3504 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3505 options.append("INITIALLY DEFERRED") 3506 elif self._match_text_seq("NORELY"): 3507 options.append("NORELY") 3508 elif self._match_text_seq("MATCH", "FULL"): 3509 options.append("MATCH FULL") 3510 else: 3511 break 3512 3513 return options 3514 3515 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3516 if match and not self._match(TokenType.REFERENCES): 3517 return None 3518 3519 expressions = None 3520 this = self._parse_id_var() 3521 3522 if self._match(TokenType.L_PAREN, advance=False): 3523 expressions = self._parse_wrapped_id_vars() 3524 3525 options = self._parse_key_constraint_options() 3526 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3527 3528 def _parse_foreign_key(self) -> exp.ForeignKey: 3529 expressions = self._parse_wrapped_id_vars() 3530 reference = self._parse_references() 3531 options = {} 3532 3533 while self._match(TokenType.ON): 3534 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3535 self.raise_error("Expected DELETE or UPDATE") 3536 3537 kind = self._prev.text.lower() 3538 3539 if self._match_text_seq("NO", "ACTION"): 3540 action = "NO ACTION" 3541 elif self._match(TokenType.SET): 3542 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3543 action = "SET " + self._prev.text.upper() 3544 else: 3545 self._advance() 3546 action = self._prev.text.upper() 3547 3548 options[kind] = action 3549 3550 return self.expression( 3551 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3552 ) 3553 3554 def _parse_primary_key( 3555 self, wrapped_optional: bool = False, in_props: bool = False 3556 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3557 desc = ( 3558 self._match_set((TokenType.ASC, TokenType.DESC)) 3559 and self._prev.token_type == TokenType.DESC 3560 ) 3561 3562 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3563 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3564 3565 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3566 options = self._parse_key_constraint_options() 3567 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3568 3569 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3570 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3571 return this 3572 3573 bracket_kind = self._prev.token_type 3574 3575 if self._match(TokenType.COLON): 3576 expressions: t.List[t.Optional[exp.Expression]] = [ 3577 self.expression(exp.Slice, expression=self._parse_conjunction()) 3578 ] 3579 else: 3580 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3581 3582 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3583 if bracket_kind == TokenType.L_BRACE: 3584 this = self.expression(exp.Struct, expressions=expressions) 3585 elif not this or this.name.upper() == "ARRAY": 3586 this = self.expression(exp.Array, expressions=expressions) 3587 else: 3588 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3589 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3590 3591 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3592 self.raise_error("Expected ]") 3593 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3594 self.raise_error("Expected }") 3595 3596 self._add_comments(this) 3597 return self._parse_bracket(this) 3598 3599 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3600 if self._match(TokenType.COLON): 3601 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3602 return this 3603 3604 def _parse_case(self) -> t.Optional[exp.Expression]: 3605 ifs = [] 3606 default = None 3607 3608 expression = self._parse_conjunction() 3609 3610 while self._match(TokenType.WHEN): 3611 this = self._parse_conjunction() 3612 self._match(TokenType.THEN) 3613 then = self._parse_conjunction() 3614 ifs.append(self.expression(exp.If, this=this, true=then)) 3615 3616 if self._match(TokenType.ELSE): 3617 default = self._parse_conjunction() 3618 3619 if not self._match(TokenType.END): 3620 self.raise_error("Expected END after CASE", self._prev) 3621 3622 return self._parse_window( 3623 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3624 ) 3625 3626 def _parse_if(self) -> t.Optional[exp.Expression]: 3627 if self._match(TokenType.L_PAREN): 3628 args = self._parse_csv(self._parse_conjunction) 3629 this = self.validate_expression(exp.If.from_arg_list(args), args) 3630 self._match_r_paren() 3631 else: 3632 index = self._index - 1 3633 condition = self._parse_conjunction() 3634 3635 if not condition: 3636 self._retreat(index) 3637 return None 3638 3639 self._match(TokenType.THEN) 3640 true = self._parse_conjunction() 3641 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3642 self._match(TokenType.END) 3643 this = self.expression(exp.If, this=condition, true=true, false=false) 3644 3645 return self._parse_window(this) 3646 3647 def _parse_extract(self) -> exp.Extract: 3648 this = self._parse_function() or self._parse_var() or self._parse_type() 3649 3650 if self._match(TokenType.FROM): 3651 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3652 3653 if not self._match(TokenType.COMMA): 3654 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3655 3656 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3657 3658 def _parse_cast(self, strict: bool) -> exp.Expression: 3659 this = self._parse_conjunction() 3660 3661 if not self._match(TokenType.ALIAS): 3662 if self._match(TokenType.COMMA): 3663 return self.expression( 3664 exp.CastToStrType, this=this, expression=self._parse_string() 3665 ) 3666 else: 3667 self.raise_error("Expected AS after CAST") 3668 3669 to = self._parse_types() 3670 3671 if not to: 3672 self.raise_error("Expected TYPE after CAST") 3673 elif to.this == exp.DataType.Type.CHAR: 3674 if self._match(TokenType.CHARACTER_SET): 3675 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3676 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3677 fmt = self._parse_string() 3678 3679 return self.expression( 3680 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3681 this=this, 3682 format=exp.Literal.string( 3683 format_time( 3684 fmt.this if fmt else "", 3685 self.FORMAT_MAPPING or self.TIME_MAPPING, 3686 self.FORMAT_TRIE or self.TIME_TRIE, 3687 ) 3688 ), 3689 ) 3690 3691 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3692 3693 def _parse_concat(self) -> t.Optional[exp.Expression]: 3694 args = self._parse_csv(self._parse_conjunction) 3695 if self.CONCAT_NULL_OUTPUTS_STRING: 3696 args = [ 3697 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3698 for arg in args 3699 if arg 3700 ] 3701 3702 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3703 # we find such a call we replace it with its argument. 3704 if len(args) == 1: 3705 return args[0] 3706 3707 return self.expression( 3708 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3709 ) 3710 3711 def _parse_string_agg(self) -> exp.Expression: 3712 expression: t.Optional[exp.Expression] 3713 3714 if self._match(TokenType.DISTINCT): 3715 args = self._parse_csv(self._parse_conjunction) 3716 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3717 else: 3718 args = self._parse_csv(self._parse_conjunction) 3719 expression = seq_get(args, 0) 3720 3721 index = self._index 3722 if not self._match(TokenType.R_PAREN): 3723 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3724 order = self._parse_order(this=expression) 3725 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3726 3727 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3728 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3729 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3730 if not self._match_text_seq("WITHIN", "GROUP"): 3731 self._retreat(index) 3732 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3733 3734 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3735 order = self._parse_order(this=expression) 3736 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3737 3738 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3739 to: t.Optional[exp.Expression] 3740 this = self._parse_bitwise() 3741 3742 if self._match(TokenType.USING): 3743 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3744 elif self._match(TokenType.COMMA): 3745 to = self._parse_bitwise() 3746 else: 3747 to = None 3748 3749 # Swap the argument order if needed to produce the correct AST 3750 if self.CONVERT_TYPE_FIRST: 3751 this, to = to, this 3752 3753 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3754 3755 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3756 """ 3757 There are generally two variants of the DECODE function: 3758 3759 - DECODE(bin, charset) 3760 - DECODE(expression, search, result [, search, result] ... [, default]) 3761 3762 The second variant will always be parsed into a CASE expression. Note that NULL 3763 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3764 instead of relying on pattern matching. 3765 """ 3766 args = self._parse_csv(self._parse_conjunction) 3767 3768 if len(args) < 3: 3769 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3770 3771 expression, *expressions = args 3772 if not expression: 3773 return None 3774 3775 ifs = [] 3776 for search, result in zip(expressions[::2], expressions[1::2]): 3777 if not search or not result: 3778 return None 3779 3780 if isinstance(search, exp.Literal): 3781 ifs.append( 3782 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3783 ) 3784 elif isinstance(search, exp.Null): 3785 ifs.append( 3786 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3787 ) 3788 else: 3789 cond = exp.or_( 3790 exp.EQ(this=expression.copy(), expression=search), 3791 exp.and_( 3792 exp.Is(this=expression.copy(), expression=exp.Null()), 3793 exp.Is(this=search.copy(), expression=exp.Null()), 3794 copy=False, 3795 ), 3796 copy=False, 3797 ) 3798 ifs.append(exp.If(this=cond, true=result)) 3799 3800 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3801 3802 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3803 self._match_text_seq("KEY") 3804 key = self._parse_field() 3805 self._match(TokenType.COLON) 3806 self._match_text_seq("VALUE") 3807 value = self._parse_field() 3808 3809 if not key and not value: 3810 return None 3811 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3812 3813 def _parse_json_object(self) -> exp.JSONObject: 3814 star = self._parse_star() 3815 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3816 3817 null_handling = None 3818 if self._match_text_seq("NULL", "ON", "NULL"): 3819 null_handling = "NULL ON NULL" 3820 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3821 null_handling = "ABSENT ON NULL" 3822 3823 unique_keys = None 3824 if self._match_text_seq("WITH", "UNIQUE"): 3825 unique_keys = True 3826 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3827 unique_keys = False 3828 3829 self._match_text_seq("KEYS") 3830 3831 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3832 format_json = self._match_text_seq("FORMAT", "JSON") 3833 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3834 3835 return self.expression( 3836 exp.JSONObject, 3837 expressions=expressions, 3838 null_handling=null_handling, 3839 unique_keys=unique_keys, 3840 return_type=return_type, 3841 format_json=format_json, 3842 encoding=encoding, 3843 ) 3844 3845 def _parse_logarithm(self) -> exp.Func: 3846 # Default argument order is base, expression 3847 args = self._parse_csv(self._parse_range) 3848 3849 if len(args) > 1: 3850 if not self.LOG_BASE_FIRST: 3851 args.reverse() 3852 return exp.Log.from_arg_list(args) 3853 3854 return self.expression( 3855 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3856 ) 3857 3858 def _parse_match_against(self) -> exp.MatchAgainst: 3859 expressions = self._parse_csv(self._parse_column) 3860 3861 self._match_text_seq(")", "AGAINST", "(") 3862 3863 this = self._parse_string() 3864 3865 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3866 modifier = "IN NATURAL LANGUAGE MODE" 3867 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3868 modifier = f"{modifier} WITH QUERY EXPANSION" 3869 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3870 modifier = "IN BOOLEAN MODE" 3871 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3872 modifier = "WITH QUERY EXPANSION" 3873 else: 3874 modifier = None 3875 3876 return self.expression( 3877 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3878 ) 3879 3880 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3881 def _parse_open_json(self) -> exp.OpenJSON: 3882 this = self._parse_bitwise() 3883 path = self._match(TokenType.COMMA) and self._parse_string() 3884 3885 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3886 this = self._parse_field(any_token=True) 3887 kind = self._parse_types() 3888 path = self._parse_string() 3889 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3890 3891 return self.expression( 3892 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3893 ) 3894 3895 expressions = None 3896 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3897 self._match_l_paren() 3898 expressions = self._parse_csv(_parse_open_json_column_def) 3899 3900 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3901 3902 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3903 args = self._parse_csv(self._parse_bitwise) 3904 3905 if self._match(TokenType.IN): 3906 return self.expression( 3907 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3908 ) 3909 3910 if haystack_first: 3911 haystack = seq_get(args, 0) 3912 needle = seq_get(args, 1) 3913 else: 3914 needle = seq_get(args, 0) 3915 haystack = seq_get(args, 1) 3916 3917 return self.expression( 3918 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3919 ) 3920 3921 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3922 args = self._parse_csv(self._parse_table) 3923 return exp.JoinHint(this=func_name.upper(), expressions=args) 3924 3925 def _parse_substring(self) -> exp.Substring: 3926 # Postgres supports the form: substring(string [from int] [for int]) 3927 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3928 3929 args = self._parse_csv(self._parse_bitwise) 3930 3931 if self._match(TokenType.FROM): 3932 args.append(self._parse_bitwise()) 3933 if self._match(TokenType.FOR): 3934 args.append(self._parse_bitwise()) 3935 3936 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3937 3938 def _parse_trim(self) -> exp.Trim: 3939 # https://www.w3resource.com/sql/character-functions/trim.php 3940 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3941 3942 position = None 3943 collation = None 3944 3945 if self._match_texts(self.TRIM_TYPES): 3946 position = self._prev.text.upper() 3947 3948 expression = self._parse_bitwise() 3949 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3950 this = self._parse_bitwise() 3951 else: 3952 this = expression 3953 expression = None 3954 3955 if self._match(TokenType.COLLATE): 3956 collation = self._parse_bitwise() 3957 3958 return self.expression( 3959 exp.Trim, this=this, position=position, expression=expression, collation=collation 3960 ) 3961 3962 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3963 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3964 3965 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3966 return self._parse_window(self._parse_id_var(), alias=True) 3967 3968 def _parse_respect_or_ignore_nulls( 3969 self, this: t.Optional[exp.Expression] 3970 ) -> t.Optional[exp.Expression]: 3971 if self._match_text_seq("IGNORE", "NULLS"): 3972 return self.expression(exp.IgnoreNulls, this=this) 3973 if self._match_text_seq("RESPECT", "NULLS"): 3974 return self.expression(exp.RespectNulls, this=this) 3975 return this 3976 3977 def _parse_window( 3978 self, this: t.Optional[exp.Expression], alias: bool = False 3979 ) -> t.Optional[exp.Expression]: 3980 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3981 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3982 self._match_r_paren() 3983 3984 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3985 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3986 if self._match_text_seq("WITHIN", "GROUP"): 3987 order = self._parse_wrapped(self._parse_order) 3988 this = self.expression(exp.WithinGroup, this=this, expression=order) 3989 3990 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3991 # Some dialects choose to implement and some do not. 3992 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3993 3994 # There is some code above in _parse_lambda that handles 3995 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3996 3997 # The below changes handle 3998 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3999 4000 # Oracle allows both formats 4001 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4002 # and Snowflake chose to do the same for familiarity 4003 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4004 this = self._parse_respect_or_ignore_nulls(this) 4005 4006 # bigquery select from window x AS (partition by ...) 4007 if alias: 4008 over = None 4009 self._match(TokenType.ALIAS) 4010 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4011 return this 4012 else: 4013 over = self._prev.text.upper() 4014 4015 if not self._match(TokenType.L_PAREN): 4016 return self.expression( 4017 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4018 ) 4019 4020 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4021 4022 first = self._match(TokenType.FIRST) 4023 if self._match_text_seq("LAST"): 4024 first = False 4025 4026 partition = self._parse_partition_by() 4027 order = self._parse_order() 4028 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4029 4030 if kind: 4031 self._match(TokenType.BETWEEN) 4032 start = self._parse_window_spec() 4033 self._match(TokenType.AND) 4034 end = self._parse_window_spec() 4035 4036 spec = self.expression( 4037 exp.WindowSpec, 4038 kind=kind, 4039 start=start["value"], 4040 start_side=start["side"], 4041 end=end["value"], 4042 end_side=end["side"], 4043 ) 4044 else: 4045 spec = None 4046 4047 self._match_r_paren() 4048 4049 return self.expression( 4050 exp.Window, 4051 this=this, 4052 partition_by=partition, 4053 order=order, 4054 spec=spec, 4055 alias=window_alias, 4056 over=over, 4057 first=first, 4058 ) 4059 4060 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4061 self._match(TokenType.BETWEEN) 4062 4063 return { 4064 "value": ( 4065 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4066 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4067 or self._parse_bitwise() 4068 ), 4069 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4070 } 4071 4072 def _parse_alias( 4073 self, this: t.Optional[exp.Expression], explicit: bool = False 4074 ) -> t.Optional[exp.Expression]: 4075 any_token = self._match(TokenType.ALIAS) 4076 4077 if explicit and not any_token: 4078 return this 4079 4080 if self._match(TokenType.L_PAREN): 4081 aliases = self.expression( 4082 exp.Aliases, 4083 this=this, 4084 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4085 ) 4086 self._match_r_paren(aliases) 4087 return aliases 4088 4089 alias = self._parse_id_var(any_token) 4090 4091 if alias: 4092 return self.expression(exp.Alias, this=this, alias=alias) 4093 4094 return this 4095 4096 def _parse_id_var( 4097 self, 4098 any_token: bool = True, 4099 tokens: t.Optional[t.Collection[TokenType]] = None, 4100 ) -> t.Optional[exp.Expression]: 4101 identifier = self._parse_identifier() 4102 4103 if identifier: 4104 return identifier 4105 4106 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4107 quoted = self._prev.token_type == TokenType.STRING 4108 return exp.Identifier(this=self._prev.text, quoted=quoted) 4109 4110 return None 4111 4112 def _parse_string(self) -> t.Optional[exp.Expression]: 4113 if self._match(TokenType.STRING): 4114 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4115 return self._parse_placeholder() 4116 4117 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4118 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4119 4120 def _parse_number(self) -> t.Optional[exp.Expression]: 4121 if self._match(TokenType.NUMBER): 4122 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4123 return self._parse_placeholder() 4124 4125 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4126 if self._match(TokenType.IDENTIFIER): 4127 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4128 return self._parse_placeholder() 4129 4130 def _parse_var( 4131 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4132 ) -> t.Optional[exp.Expression]: 4133 if ( 4134 (any_token and self._advance_any()) 4135 or self._match(TokenType.VAR) 4136 or (self._match_set(tokens) if tokens else False) 4137 ): 4138 return self.expression(exp.Var, this=self._prev.text) 4139 return self._parse_placeholder() 4140 4141 def _advance_any(self) -> t.Optional[Token]: 4142 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4143 self._advance() 4144 return self._prev 4145 return None 4146 4147 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4148 return self._parse_var() or self._parse_string() 4149 4150 def _parse_null(self) -> t.Optional[exp.Expression]: 4151 if self._match(TokenType.NULL): 4152 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4153 return None 4154 4155 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4156 if self._match(TokenType.TRUE): 4157 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4158 if self._match(TokenType.FALSE): 4159 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4160 return None 4161 4162 def _parse_star(self) -> t.Optional[exp.Expression]: 4163 if self._match(TokenType.STAR): 4164 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4165 return None 4166 4167 def _parse_parameter(self) -> exp.Parameter: 4168 wrapped = self._match(TokenType.L_BRACE) 4169 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4170 self._match(TokenType.R_BRACE) 4171 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4172 4173 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4174 if self._match_set(self.PLACEHOLDER_PARSERS): 4175 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4176 if placeholder: 4177 return placeholder 4178 self._advance(-1) 4179 return None 4180 4181 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4182 if not self._match(TokenType.EXCEPT): 4183 return None 4184 if self._match(TokenType.L_PAREN, advance=False): 4185 return self._parse_wrapped_csv(self._parse_column) 4186 return self._parse_csv(self._parse_column) 4187 4188 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4189 if not self._match(TokenType.REPLACE): 4190 return None 4191 if self._match(TokenType.L_PAREN, advance=False): 4192 return self._parse_wrapped_csv(self._parse_expression) 4193 return self._parse_csv(self._parse_expression) 4194 4195 def _parse_csv( 4196 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4197 ) -> t.List[t.Optional[exp.Expression]]: 4198 parse_result = parse_method() 4199 items = [parse_result] if parse_result is not None else [] 4200 4201 while self._match(sep): 4202 self._add_comments(parse_result) 4203 parse_result = parse_method() 4204 if parse_result is not None: 4205 items.append(parse_result) 4206 4207 return items 4208 4209 def _parse_tokens( 4210 self, parse_method: t.Callable, expressions: t.Dict 4211 ) -> t.Optional[exp.Expression]: 4212 this = parse_method() 4213 4214 while self._match_set(expressions): 4215 this = self.expression( 4216 expressions[self._prev.token_type], 4217 this=this, 4218 comments=self._prev_comments, 4219 expression=parse_method(), 4220 ) 4221 4222 return this 4223 4224 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4225 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4226 4227 def _parse_wrapped_csv( 4228 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4229 ) -> t.List[t.Optional[exp.Expression]]: 4230 return self._parse_wrapped( 4231 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4232 ) 4233 4234 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4235 wrapped = self._match(TokenType.L_PAREN) 4236 if not wrapped and not optional: 4237 self.raise_error("Expecting (") 4238 parse_result = parse_method() 4239 if wrapped: 4240 self._match_r_paren() 4241 return parse_result 4242 4243 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4244 return self._parse_select() or self._parse_set_operations( 4245 self._parse_expression() if alias else self._parse_conjunction() 4246 ) 4247 4248 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4249 return self._parse_query_modifiers( 4250 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4251 ) 4252 4253 def _parse_transaction(self) -> exp.Transaction: 4254 this = None 4255 if self._match_texts(self.TRANSACTION_KIND): 4256 this = self._prev.text 4257 4258 self._match_texts({"TRANSACTION", "WORK"}) 4259 4260 modes = [] 4261 while True: 4262 mode = [] 4263 while self._match(TokenType.VAR): 4264 mode.append(self._prev.text) 4265 4266 if mode: 4267 modes.append(" ".join(mode)) 4268 if not self._match(TokenType.COMMA): 4269 break 4270 4271 return self.expression(exp.Transaction, this=this, modes=modes) 4272 4273 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4274 chain = None 4275 savepoint = None 4276 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4277 4278 self._match_texts({"TRANSACTION", "WORK"}) 4279 4280 if self._match_text_seq("TO"): 4281 self._match_text_seq("SAVEPOINT") 4282 savepoint = self._parse_id_var() 4283 4284 if self._match(TokenType.AND): 4285 chain = not self._match_text_seq("NO") 4286 self._match_text_seq("CHAIN") 4287 4288 if is_rollback: 4289 return self.expression(exp.Rollback, savepoint=savepoint) 4290 4291 return self.expression(exp.Commit, chain=chain) 4292 4293 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4294 if not self._match_text_seq("ADD"): 4295 return None 4296 4297 self._match(TokenType.COLUMN) 4298 exists_column = self._parse_exists(not_=True) 4299 expression = self._parse_column_def(self._parse_field(any_token=True)) 4300 4301 if expression: 4302 expression.set("exists", exists_column) 4303 4304 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4305 if self._match_texts(("FIRST", "AFTER")): 4306 position = self._prev.text 4307 column_position = self.expression( 4308 exp.ColumnPosition, this=self._parse_column(), position=position 4309 ) 4310 expression.set("position", column_position) 4311 4312 return expression 4313 4314 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4315 drop = self._match(TokenType.DROP) and self._parse_drop() 4316 if drop and not isinstance(drop, exp.Command): 4317 drop.set("kind", drop.args.get("kind", "COLUMN")) 4318 return drop 4319 4320 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4321 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4322 return self.expression( 4323 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4324 ) 4325 4326 def _parse_add_constraint(self) -> exp.AddConstraint: 4327 this = None 4328 kind = self._prev.token_type 4329 4330 if kind == TokenType.CONSTRAINT: 4331 this = self._parse_id_var() 4332 4333 if self._match_text_seq("CHECK"): 4334 expression = self._parse_wrapped(self._parse_conjunction) 4335 enforced = self._match_text_seq("ENFORCED") 4336 4337 return self.expression( 4338 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4339 ) 4340 4341 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4342 expression = self._parse_foreign_key() 4343 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4344 expression = self._parse_primary_key() 4345 else: 4346 expression = None 4347 4348 return self.expression(exp.AddConstraint, this=this, expression=expression) 4349 4350 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4351 index = self._index - 1 4352 4353 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4354 return self._parse_csv(self._parse_add_constraint) 4355 4356 self._retreat(index) 4357 return self._parse_csv(self._parse_add_column) 4358 4359 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4360 self._match(TokenType.COLUMN) 4361 column = self._parse_field(any_token=True) 4362 4363 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4364 return self.expression(exp.AlterColumn, this=column, drop=True) 4365 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4366 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4367 4368 self._match_text_seq("SET", "DATA") 4369 return self.expression( 4370 exp.AlterColumn, 4371 this=column, 4372 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4373 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4374 using=self._match(TokenType.USING) and self._parse_conjunction(), 4375 ) 4376 4377 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4378 index = self._index - 1 4379 4380 partition_exists = self._parse_exists() 4381 if self._match(TokenType.PARTITION, advance=False): 4382 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4383 4384 self._retreat(index) 4385 return self._parse_csv(self._parse_drop_column) 4386 4387 def _parse_alter_table_rename(self) -> exp.RenameTable: 4388 self._match_text_seq("TO") 4389 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4390 4391 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4392 start = self._prev 4393 4394 if not self._match(TokenType.TABLE): 4395 return self._parse_as_command(start) 4396 4397 exists = self._parse_exists() 4398 this = self._parse_table(schema=True) 4399 4400 if self._next: 4401 self._advance() 4402 4403 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4404 if parser: 4405 actions = ensure_list(parser(self)) 4406 4407 if not self._curr: 4408 return self.expression( 4409 exp.AlterTable, 4410 this=this, 4411 exists=exists, 4412 actions=actions, 4413 ) 4414 return self._parse_as_command(start) 4415 4416 def _parse_merge(self) -> exp.Merge: 4417 self._match(TokenType.INTO) 4418 target = self._parse_table() 4419 4420 self._match(TokenType.USING) 4421 using = self._parse_table() 4422 4423 self._match(TokenType.ON) 4424 on = self._parse_conjunction() 4425 4426 whens = [] 4427 while self._match(TokenType.WHEN): 4428 matched = not self._match(TokenType.NOT) 4429 self._match_text_seq("MATCHED") 4430 source = ( 4431 False 4432 if self._match_text_seq("BY", "TARGET") 4433 else self._match_text_seq("BY", "SOURCE") 4434 ) 4435 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4436 4437 self._match(TokenType.THEN) 4438 4439 if self._match(TokenType.INSERT): 4440 _this = self._parse_star() 4441 if _this: 4442 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4443 else: 4444 then = self.expression( 4445 exp.Insert, 4446 this=self._parse_value(), 4447 expression=self._match(TokenType.VALUES) and self._parse_value(), 4448 ) 4449 elif self._match(TokenType.UPDATE): 4450 expressions = self._parse_star() 4451 if expressions: 4452 then = self.expression(exp.Update, expressions=expressions) 4453 else: 4454 then = self.expression( 4455 exp.Update, 4456 expressions=self._match(TokenType.SET) 4457 and self._parse_csv(self._parse_equality), 4458 ) 4459 elif self._match(TokenType.DELETE): 4460 then = self.expression(exp.Var, this=self._prev.text) 4461 else: 4462 then = None 4463 4464 whens.append( 4465 self.expression( 4466 exp.When, 4467 matched=matched, 4468 source=source, 4469 condition=condition, 4470 then=then, 4471 ) 4472 ) 4473 4474 return self.expression( 4475 exp.Merge, 4476 this=target, 4477 using=using, 4478 on=on, 4479 expressions=whens, 4480 ) 4481 4482 def _parse_show(self) -> t.Optional[exp.Expression]: 4483 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4484 if parser: 4485 return parser(self) 4486 self._advance() 4487 return self.expression(exp.Show, this=self._prev.text.upper()) 4488 4489 def _parse_set_item_assignment( 4490 self, kind: t.Optional[str] = None 4491 ) -> t.Optional[exp.Expression]: 4492 index = self._index 4493 4494 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4495 return self._parse_set_transaction(global_=kind == "GLOBAL") 4496 4497 left = self._parse_primary() or self._parse_id_var() 4498 4499 if not self._match_texts(("=", "TO")): 4500 self._retreat(index) 4501 return None 4502 4503 right = self._parse_statement() or self._parse_id_var() 4504 this = self.expression(exp.EQ, this=left, expression=right) 4505 4506 return self.expression(exp.SetItem, this=this, kind=kind) 4507 4508 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4509 self._match_text_seq("TRANSACTION") 4510 characteristics = self._parse_csv( 4511 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4512 ) 4513 return self.expression( 4514 exp.SetItem, 4515 expressions=characteristics, 4516 kind="TRANSACTION", 4517 **{"global": global_}, # type: ignore 4518 ) 4519 4520 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4521 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4522 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4523 4524 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4525 index = self._index 4526 set_ = self.expression( 4527 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4528 ) 4529 4530 if self._curr: 4531 self._retreat(index) 4532 return self._parse_as_command(self._prev) 4533 4534 return set_ 4535 4536 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4537 for option in options: 4538 if self._match_text_seq(*option.split(" ")): 4539 return exp.var(option) 4540 return None 4541 4542 def _parse_as_command(self, start: Token) -> exp.Command: 4543 while self._curr: 4544 self._advance() 4545 text = self._find_sql(start, self._prev) 4546 size = len(start.text) 4547 return exp.Command(this=text[:size], expression=text[size:]) 4548 4549 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4550 settings = [] 4551 4552 self._match_l_paren() 4553 kind = self._parse_id_var() 4554 4555 if self._match(TokenType.L_PAREN): 4556 while True: 4557 key = self._parse_id_var() 4558 value = self._parse_primary() 4559 4560 if not key and value is None: 4561 break 4562 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4563 self._match(TokenType.R_PAREN) 4564 4565 self._match_r_paren() 4566 4567 return self.expression( 4568 exp.DictProperty, 4569 this=this, 4570 kind=kind.this if kind else None, 4571 settings=settings, 4572 ) 4573 4574 def _parse_dict_range(self, this: str) -> exp.DictRange: 4575 self._match_l_paren() 4576 has_min = self._match_text_seq("MIN") 4577 if has_min: 4578 min = self._parse_var() or self._parse_primary() 4579 self._match_text_seq("MAX") 4580 max = self._parse_var() or self._parse_primary() 4581 else: 4582 max = self._parse_var() or self._parse_primary() 4583 min = exp.Literal.number(0) 4584 self._match_r_paren() 4585 return self.expression(exp.DictRange, this=this, min=min, max=max) 4586 4587 def _find_parser( 4588 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4589 ) -> t.Optional[t.Callable]: 4590 if not self._curr: 4591 return None 4592 4593 index = self._index 4594 this = [] 4595 while True: 4596 # The current token might be multiple words 4597 curr = self._curr.text.upper() 4598 key = curr.split(" ") 4599 this.append(curr) 4600 4601 self._advance() 4602 result, trie = in_trie(trie, key) 4603 if result == TrieResult.FAILED: 4604 break 4605 4606 if result == TrieResult.EXISTS: 4607 subparser = parsers[" ".join(this)] 4608 return subparser 4609 4610 self._retreat(index) 4611 return None 4612 4613 def _match(self, token_type, advance=True, expression=None): 4614 if not self._curr: 4615 return None 4616 4617 if self._curr.token_type == token_type: 4618 if advance: 4619 self._advance() 4620 self._add_comments(expression) 4621 return True 4622 4623 return None 4624 4625 def _match_set(self, types, advance=True): 4626 if not self._curr: 4627 return None 4628 4629 if self._curr.token_type in types: 4630 if advance: 4631 self._advance() 4632 return True 4633 4634 return None 4635 4636 def _match_pair(self, token_type_a, token_type_b, advance=True): 4637 if not self._curr or not self._next: 4638 return None 4639 4640 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4641 if advance: 4642 self._advance(2) 4643 return True 4644 4645 return None 4646 4647 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4648 if not self._match(TokenType.L_PAREN, expression=expression): 4649 self.raise_error("Expecting (") 4650 4651 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4652 if not self._match(TokenType.R_PAREN, expression=expression): 4653 self.raise_error("Expecting )") 4654 4655 def _match_texts(self, texts, advance=True): 4656 if self._curr and self._curr.text.upper() in texts: 4657 if advance: 4658 self._advance() 4659 return True 4660 return False 4661 4662 def _match_text_seq(self, *texts, advance=True): 4663 index = self._index 4664 for text in texts: 4665 if self._curr and self._curr.text.upper() == text: 4666 self._advance() 4667 else: 4668 self._retreat(index) 4669 return False 4670 4671 if not advance: 4672 self._retreat(index) 4673 4674 return True 4675 4676 @t.overload 4677 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4678 ... 4679 4680 @t.overload 4681 def _replace_columns_with_dots( 4682 self, this: t.Optional[exp.Expression] 4683 ) -> t.Optional[exp.Expression]: 4684 ... 4685 4686 def _replace_columns_with_dots(self, this): 4687 if isinstance(this, exp.Dot): 4688 exp.replace_children(this, self._replace_columns_with_dots) 4689 elif isinstance(this, exp.Column): 4690 exp.replace_children(this, self._replace_columns_with_dots) 4691 table = this.args.get("table") 4692 this = ( 4693 self.expression(exp.Dot, this=table, expression=this.this) 4694 if table 4695 else self.expression(exp.Var, this=this.name) 4696 ) 4697 elif isinstance(this, exp.Identifier): 4698 this = self.expression(exp.Var, this=this.name) 4699 4700 return this 4701 4702 def _replace_lambda( 4703 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4704 ) -> t.Optional[exp.Expression]: 4705 if not node: 4706 return node 4707 4708 for column in node.find_all(exp.Column): 4709 if column.parts[0].name in lambda_variables: 4710 dot_or_id = column.to_dot() if column.table else column.this 4711 parent = column.parent 4712 4713 while isinstance(parent, exp.Dot): 4714 if not isinstance(parent.parent, exp.Dot): 4715 parent.replace(dot_or_id) 4716 break 4717 parent = parent.parent 4718 else: 4719 if column is node: 4720 node = dot_or_id 4721 else: 4722 column.replace(dot_or_id) 4723 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset()
854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 )
Logs or raises any found errors, depending on the chosen error level setting.
954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.