sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 589 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 590 "COPY": lambda self: self._parse_copy_property(), 591 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 592 "DEFINER": lambda self: self._parse_definer(), 593 "DETERMINISTIC": lambda self: self.expression( 594 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 595 ), 596 "DISTKEY": lambda self: self._parse_distkey(), 597 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 598 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 599 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 600 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 601 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 602 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 603 "FREESPACE": lambda self: self._parse_freespace(), 604 "IMMUTABLE": lambda self: self.expression( 605 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 606 ), 607 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 608 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 609 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 610 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 611 "LIKE": lambda self: self._parse_create_like(), 612 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 613 "LOCK": lambda self: self._parse_locking(), 614 "LOCKING": lambda self: self._parse_locking(), 615 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 616 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 617 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 618 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 619 "NO": lambda self: self._parse_no_property(), 620 "ON": lambda self: self._parse_on_property(), 621 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 622 "PARTITION BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 625 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 626 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 627 "RETURNS": lambda self: self._parse_returns(), 628 "ROW": lambda self: self._parse_row(), 629 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 630 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 631 "SETTINGS": lambda self: self.expression( 632 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 633 ), 634 "SORTKEY": lambda self: self._parse_sortkey(), 635 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 636 "STABLE": lambda self: self.expression( 637 exp.StabilityProperty, this=exp.Literal.string("STABLE") 638 ), 639 "STORED": lambda self: self._parse_stored(), 640 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 641 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 642 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 643 "TO": lambda self: self._parse_to_table(), 644 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 645 "TTL": lambda self: self._parse_ttl(), 646 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "VOLATILE": lambda self: self._parse_volatile_property(), 648 "WITH": lambda self: self._parse_with_property(), 649 } 650 651 CONSTRAINT_PARSERS = { 652 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 653 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 654 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 655 "CHARACTER SET": lambda self: self.expression( 656 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 657 ), 658 "CHECK": lambda self: self.expression( 659 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 660 ), 661 "COLLATE": lambda self: self.expression( 662 exp.CollateColumnConstraint, this=self._parse_var() 663 ), 664 "COMMENT": lambda self: self.expression( 665 exp.CommentColumnConstraint, this=self._parse_string() 666 ), 667 "COMPRESS": lambda self: self._parse_compress(), 668 "DEFAULT": lambda self: self.expression( 669 exp.DefaultColumnConstraint, this=self._parse_bitwise() 670 ), 671 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 672 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 673 "FORMAT": lambda self: self.expression( 674 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 675 ), 676 "GENERATED": lambda self: self._parse_generated_as_identity(), 677 "IDENTITY": lambda self: self._parse_auto_increment(), 678 "INLINE": lambda self: self._parse_inline(), 679 "LIKE": lambda self: self._parse_create_like(), 680 "NOT": lambda self: self._parse_not_constraint(), 681 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 682 "ON": lambda self: self._match(TokenType.UPDATE) 683 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 684 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 685 "PRIMARY KEY": lambda self: self._parse_primary_key(), 686 "REFERENCES": lambda self: self._parse_references(match=False), 687 "TITLE": lambda self: self.expression( 688 exp.TitleColumnConstraint, this=self._parse_var_or_string() 689 ), 690 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 691 "UNIQUE": lambda self: self._parse_unique(), 692 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 693 } 694 695 ALTER_PARSERS = { 696 "ADD": lambda self: self._parse_alter_table_add(), 697 "ALTER": lambda self: self._parse_alter_table_alter(), 698 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 699 "DROP": lambda self: self._parse_alter_table_drop(), 700 "RENAME": lambda self: self._parse_alter_table_rename(), 701 } 702 703 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 704 705 NO_PAREN_FUNCTION_PARSERS = { 706 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 707 TokenType.CASE: lambda self: self._parse_case(), 708 TokenType.IF: lambda self: self._parse_if(), 709 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 710 exp.NextValueFor, 711 this=self._parse_column(), 712 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 713 ), 714 } 715 716 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 717 718 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 719 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 720 "CONCAT": lambda self: self._parse_concat(), 721 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 722 "DECODE": lambda self: self._parse_decode(), 723 "EXTRACT": lambda self: self._parse_extract(), 724 "JSON_OBJECT": lambda self: self._parse_json_object(), 725 "LOG": lambda self: self._parse_logarithm(), 726 "MATCH": lambda self: self._parse_match_against(), 727 "OPENJSON": lambda self: self._parse_open_json(), 728 "POSITION": lambda self: self._parse_position(), 729 "SAFE_CAST": lambda self: self._parse_cast(False), 730 "STRING_AGG": lambda self: self._parse_string_agg(), 731 "SUBSTRING": lambda self: self._parse_substring(), 732 "TRIM": lambda self: self._parse_trim(), 733 "TRY_CAST": lambda self: self._parse_cast(False), 734 "TRY_CONVERT": lambda self: self._parse_convert(False), 735 } 736 737 QUERY_MODIFIER_PARSERS = { 738 "joins": lambda self: list(iter(self._parse_join, None)), 739 "laterals": lambda self: list(iter(self._parse_lateral, None)), 740 "match": lambda self: self._parse_match_recognize(), 741 "where": lambda self: self._parse_where(), 742 "group": lambda self: self._parse_group(), 743 "having": lambda self: self._parse_having(), 744 "qualify": lambda self: self._parse_qualify(), 745 "windows": lambda self: self._parse_window_clause(), 746 "order": lambda self: self._parse_order(), 747 "limit": lambda self: self._parse_limit(), 748 "offset": lambda self: self._parse_offset(), 749 "locks": lambda self: self._parse_locks(), 750 "sample": lambda self: self._parse_table_sample(as_modifier=True), 751 } 752 753 SET_PARSERS = { 754 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 755 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 756 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 757 "TRANSACTION": lambda self: self._parse_set_transaction(), 758 } 759 760 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 761 762 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 763 764 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 765 766 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 767 768 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 769 770 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 771 TRANSACTION_CHARACTERISTICS = { 772 "ISOLATION LEVEL REPEATABLE READ", 773 "ISOLATION LEVEL READ COMMITTED", 774 "ISOLATION LEVEL READ UNCOMMITTED", 775 "ISOLATION LEVEL SERIALIZABLE", 776 "READ WRITE", 777 "READ ONLY", 778 } 779 780 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 781 782 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 783 784 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 785 786 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 787 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 788 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 789 790 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 791 792 STRICT_CAST = True 793 794 # A NULL arg in CONCAT yields NULL by default 795 CONCAT_NULL_OUTPUTS_STRING = False 796 797 CONVERT_TYPE_FIRST = False 798 799 PREFIXED_PIVOT_COLUMNS = False 800 IDENTIFY_PIVOT_STRINGS = False 801 802 LOG_BASE_FIRST = True 803 LOG_DEFAULTS_TO_LN = False 804 805 __slots__ = ( 806 "error_level", 807 "error_message_context", 808 "max_errors", 809 "sql", 810 "errors", 811 "_tokens", 812 "_index", 813 "_curr", 814 "_next", 815 "_prev", 816 "_prev_comments", 817 ) 818 819 # Autofilled 820 INDEX_OFFSET: int = 0 821 UNNEST_COLUMN_ONLY: bool = False 822 ALIAS_POST_TABLESAMPLE: bool = False 823 STRICT_STRING_CONCAT = False 824 NULL_ORDERING: str = "nulls_are_small" 825 SHOW_TRIE: t.Dict = {} 826 SET_TRIE: t.Dict = {} 827 FORMAT_MAPPING: t.Dict[str, str] = {} 828 FORMAT_TRIE: t.Dict = {} 829 TIME_MAPPING: t.Dict[str, str] = {} 830 TIME_TRIE: t.Dict = {} 831 832 def __init__( 833 self, 834 error_level: t.Optional[ErrorLevel] = None, 835 error_message_context: int = 100, 836 max_errors: int = 3, 837 ): 838 self.error_level = error_level or ErrorLevel.IMMEDIATE 839 self.error_message_context = error_message_context 840 self.max_errors = max_errors 841 self.reset() 842 843 def reset(self): 844 self.sql = "" 845 self.errors = [] 846 self._tokens = [] 847 self._index = 0 848 self._curr = None 849 self._next = None 850 self._prev = None 851 self._prev_comments = None 852 853 def parse( 854 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 855 ) -> t.List[t.Optional[exp.Expression]]: 856 """ 857 Parses a list of tokens and returns a list of syntax trees, one tree 858 per parsed SQL statement. 859 860 Args: 861 raw_tokens: The list of tokens. 862 sql: The original SQL string, used to produce helpful debug messages. 863 864 Returns: 865 The list of the produced syntax trees. 866 """ 867 return self._parse( 868 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 869 ) 870 871 def parse_into( 872 self, 873 expression_types: exp.IntoType, 874 raw_tokens: t.List[Token], 875 sql: t.Optional[str] = None, 876 ) -> t.List[t.Optional[exp.Expression]]: 877 """ 878 Parses a list of tokens into a given Expression type. If a collection of Expression 879 types is given instead, this method will try to parse the token list into each one 880 of them, stopping at the first for which the parsing succeeds. 881 882 Args: 883 expression_types: The expression type(s) to try and parse the token list into. 884 raw_tokens: The list of tokens. 885 sql: The original SQL string, used to produce helpful debug messages. 886 887 Returns: 888 The target Expression. 889 """ 890 errors = [] 891 for expression_type in ensure_list(expression_types): 892 parser = self.EXPRESSION_PARSERS.get(expression_type) 893 if not parser: 894 raise TypeError(f"No parser registered for {expression_type}") 895 896 try: 897 return self._parse(parser, raw_tokens, sql) 898 except ParseError as e: 899 e.errors[0]["into_expression"] = expression_type 900 errors.append(e) 901 902 raise ParseError( 903 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 904 errors=merge_errors(errors), 905 ) from errors[-1] 906 907 def _parse( 908 self, 909 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 910 raw_tokens: t.List[Token], 911 sql: t.Optional[str] = None, 912 ) -> t.List[t.Optional[exp.Expression]]: 913 self.reset() 914 self.sql = sql or "" 915 916 total = len(raw_tokens) 917 chunks: t.List[t.List[Token]] = [[]] 918 919 for i, token in enumerate(raw_tokens): 920 if token.token_type == TokenType.SEMICOLON: 921 if i < total - 1: 922 chunks.append([]) 923 else: 924 chunks[-1].append(token) 925 926 expressions = [] 927 928 for tokens in chunks: 929 self._index = -1 930 self._tokens = tokens 931 self._advance() 932 933 expressions.append(parse_method(self)) 934 935 if self._index < len(self._tokens): 936 self.raise_error("Invalid expression / Unexpected token") 937 938 self.check_errors() 939 940 return expressions 941 942 def check_errors(self) -> None: 943 """Logs or raises any found errors, depending on the chosen error level setting.""" 944 if self.error_level == ErrorLevel.WARN: 945 for error in self.errors: 946 logger.error(str(error)) 947 elif self.error_level == ErrorLevel.RAISE and self.errors: 948 raise ParseError( 949 concat_messages(self.errors, self.max_errors), 950 errors=merge_errors(self.errors), 951 ) 952 953 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 954 """ 955 Appends an error in the list of recorded errors or raises it, depending on the chosen 956 error level setting. 957 """ 958 token = token or self._curr or self._prev or Token.string("") 959 start = token.start 960 end = token.end + 1 961 start_context = self.sql[max(start - self.error_message_context, 0) : start] 962 highlight = self.sql[start:end] 963 end_context = self.sql[end : end + self.error_message_context] 964 965 error = ParseError.new( 966 f"{message}. Line {token.line}, Col: {token.col}.\n" 967 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 968 description=message, 969 line=token.line, 970 col=token.col, 971 start_context=start_context, 972 highlight=highlight, 973 end_context=end_context, 974 ) 975 976 if self.error_level == ErrorLevel.IMMEDIATE: 977 raise error 978 979 self.errors.append(error) 980 981 def expression( 982 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 983 ) -> E: 984 """ 985 Creates a new, validated Expression. 986 987 Args: 988 exp_class: The expression class to instantiate. 989 comments: An optional list of comments to attach to the expression. 990 kwargs: The arguments to set for the expression along with their respective values. 991 992 Returns: 993 The target expression. 994 """ 995 instance = exp_class(**kwargs) 996 instance.add_comments(comments) if comments else self._add_comments(instance) 997 return self.validate_expression(instance) 998 999 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1000 if expression and self._prev_comments: 1001 expression.add_comments(self._prev_comments) 1002 self._prev_comments = None 1003 1004 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1005 """ 1006 Validates an Expression, making sure that all its mandatory arguments are set. 1007 1008 Args: 1009 expression: The expression to validate. 1010 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1011 1012 Returns: 1013 The validated expression. 1014 """ 1015 if self.error_level != ErrorLevel.IGNORE: 1016 for error_message in expression.error_messages(args): 1017 self.raise_error(error_message) 1018 1019 return expression 1020 1021 def _find_sql(self, start: Token, end: Token) -> str: 1022 return self.sql[start.start : end.end + 1] 1023 1024 def _advance(self, times: int = 1) -> None: 1025 self._index += times 1026 self._curr = seq_get(self._tokens, self._index) 1027 self._next = seq_get(self._tokens, self._index + 1) 1028 1029 if self._index > 0: 1030 self._prev = self._tokens[self._index - 1] 1031 self._prev_comments = self._prev.comments 1032 else: 1033 self._prev = None 1034 self._prev_comments = None 1035 1036 def _retreat(self, index: int) -> None: 1037 if index != self._index: 1038 self._advance(index - self._index) 1039 1040 def _parse_command(self) -> exp.Command: 1041 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1042 1043 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1044 start = self._prev 1045 exists = self._parse_exists() if allow_exists else None 1046 1047 self._match(TokenType.ON) 1048 1049 kind = self._match_set(self.CREATABLES) and self._prev 1050 if not kind: 1051 return self._parse_as_command(start) 1052 1053 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1054 this = self._parse_user_defined_function(kind=kind.token_type) 1055 elif kind.token_type == TokenType.TABLE: 1056 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1057 elif kind.token_type == TokenType.COLUMN: 1058 this = self._parse_column() 1059 else: 1060 this = self._parse_id_var() 1061 1062 self._match(TokenType.IS) 1063 1064 return self.expression( 1065 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1066 ) 1067 1068 def _parse_to_table( 1069 self, 1070 ) -> exp.ToTableProperty: 1071 table = self._parse_table_parts(schema=True) 1072 return self.expression(exp.ToTableProperty, this=table) 1073 1074 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1075 def _parse_ttl(self) -> exp.Expression: 1076 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1077 this = self._parse_bitwise() 1078 1079 if self._match_text_seq("DELETE"): 1080 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1081 if self._match_text_seq("RECOMPRESS"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1084 ) 1085 if self._match_text_seq("TO", "DISK"): 1086 return self.expression( 1087 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1088 ) 1089 if self._match_text_seq("TO", "VOLUME"): 1090 return self.expression( 1091 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1092 ) 1093 1094 return this 1095 1096 expressions = self._parse_csv(_parse_ttl_action) 1097 where = self._parse_where() 1098 group = self._parse_group() 1099 1100 aggregates = None 1101 if group and self._match(TokenType.SET): 1102 aggregates = self._parse_csv(self._parse_set_item) 1103 1104 return self.expression( 1105 exp.MergeTreeTTL, 1106 expressions=expressions, 1107 where=where, 1108 group=group, 1109 aggregates=aggregates, 1110 ) 1111 1112 def _parse_statement(self) -> t.Optional[exp.Expression]: 1113 if self._curr is None: 1114 return None 1115 1116 if self._match_set(self.STATEMENT_PARSERS): 1117 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1118 1119 if self._match_set(Tokenizer.COMMANDS): 1120 return self._parse_command() 1121 1122 expression = self._parse_expression() 1123 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1124 return self._parse_query_modifiers(expression) 1125 1126 def _parse_drop(self) -> exp.Drop | exp.Command: 1127 start = self._prev 1128 temporary = self._match(TokenType.TEMPORARY) 1129 materialized = self._match_text_seq("MATERIALIZED") 1130 1131 kind = self._match_set(self.CREATABLES) and self._prev.text 1132 if not kind: 1133 return self._parse_as_command(start) 1134 1135 return self.expression( 1136 exp.Drop, 1137 exists=self._parse_exists(), 1138 this=self._parse_table(schema=True), 1139 kind=kind, 1140 temporary=temporary, 1141 materialized=materialized, 1142 cascade=self._match_text_seq("CASCADE"), 1143 constraints=self._match_text_seq("CONSTRAINTS"), 1144 purge=self._match_text_seq("PURGE"), 1145 ) 1146 1147 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1148 return ( 1149 self._match(TokenType.IF) 1150 and (not not_ or self._match(TokenType.NOT)) 1151 and self._match(TokenType.EXISTS) 1152 ) 1153 1154 def _parse_create(self) -> exp.Create | exp.Command: 1155 # Note: this can't be None because we've matched a statement parser 1156 start = self._prev 1157 replace = start.text.upper() == "REPLACE" or self._match_pair( 1158 TokenType.OR, TokenType.REPLACE 1159 ) 1160 unique = self._match(TokenType.UNIQUE) 1161 1162 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1163 self._advance() 1164 1165 properties = None 1166 create_token = self._match_set(self.CREATABLES) and self._prev 1167 1168 if not create_token: 1169 # exp.Properties.Location.POST_CREATE 1170 properties = self._parse_properties() 1171 create_token = self._match_set(self.CREATABLES) and self._prev 1172 1173 if not properties or not create_token: 1174 return self._parse_as_command(start) 1175 1176 exists = self._parse_exists(not_=True) 1177 this = None 1178 expression = None 1179 indexes = None 1180 no_schema_binding = None 1181 begin = None 1182 clone = None 1183 1184 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1185 nonlocal properties 1186 if properties and temp_props: 1187 properties.expressions.extend(temp_props.expressions) 1188 elif temp_props: 1189 properties = temp_props 1190 1191 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1192 this = self._parse_user_defined_function(kind=create_token.token_type) 1193 1194 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1195 extend_props(self._parse_properties()) 1196 1197 self._match(TokenType.ALIAS) 1198 begin = self._match(TokenType.BEGIN) 1199 return_ = self._match_text_seq("RETURN") 1200 expression = self._parse_statement() 1201 1202 if return_: 1203 expression = self.expression(exp.Return, this=expression) 1204 elif create_token.token_type == TokenType.INDEX: 1205 this = self._parse_index(index=self._parse_id_var()) 1206 elif create_token.token_type in self.DB_CREATABLES: 1207 table_parts = self._parse_table_parts(schema=True) 1208 1209 # exp.Properties.Location.POST_NAME 1210 self._match(TokenType.COMMA) 1211 extend_props(self._parse_properties(before=True)) 1212 1213 this = self._parse_schema(this=table_parts) 1214 1215 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1216 extend_props(self._parse_properties()) 1217 1218 self._match(TokenType.ALIAS) 1219 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1220 # exp.Properties.Location.POST_ALIAS 1221 extend_props(self._parse_properties()) 1222 1223 expression = self._parse_ddl_select() 1224 1225 if create_token.token_type == TokenType.TABLE: 1226 indexes = [] 1227 while True: 1228 index = self._parse_index() 1229 1230 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1231 extend_props(self._parse_properties()) 1232 1233 if not index: 1234 break 1235 else: 1236 self._match(TokenType.COMMA) 1237 indexes.append(index) 1238 elif create_token.token_type == TokenType.VIEW: 1239 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1240 no_schema_binding = True 1241 1242 if self._match_text_seq("CLONE"): 1243 clone = self._parse_table(schema=True) 1244 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1245 clone_kind = ( 1246 self._match(TokenType.L_PAREN) 1247 and self._match_texts(self.CLONE_KINDS) 1248 and self._prev.text.upper() 1249 ) 1250 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1251 self._match(TokenType.R_PAREN) 1252 clone = self.expression( 1253 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1254 ) 1255 1256 return self.expression( 1257 exp.Create, 1258 this=this, 1259 kind=create_token.text, 1260 replace=replace, 1261 unique=unique, 1262 expression=expression, 1263 exists=exists, 1264 properties=properties, 1265 indexes=indexes, 1266 no_schema_binding=no_schema_binding, 1267 begin=begin, 1268 clone=clone, 1269 ) 1270 1271 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1272 # only used for teradata currently 1273 self._match(TokenType.COMMA) 1274 1275 kwargs = { 1276 "no": self._match_text_seq("NO"), 1277 "dual": self._match_text_seq("DUAL"), 1278 "before": self._match_text_seq("BEFORE"), 1279 "default": self._match_text_seq("DEFAULT"), 1280 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1281 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1282 "after": self._match_text_seq("AFTER"), 1283 "minimum": self._match_texts(("MIN", "MINIMUM")), 1284 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1285 } 1286 1287 if self._match_texts(self.PROPERTY_PARSERS): 1288 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1289 try: 1290 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1291 except TypeError: 1292 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1293 1294 return None 1295 1296 def _parse_property(self) -> t.Optional[exp.Expression]: 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1299 1300 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1301 return self._parse_character_set(default=True) 1302 1303 if self._match_text_seq("COMPOUND", "SORTKEY"): 1304 return self._parse_sortkey(compound=True) 1305 1306 if self._match_text_seq("SQL", "SECURITY"): 1307 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1308 1309 assignment = self._match_pair( 1310 TokenType.VAR, TokenType.EQ, advance=False 1311 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1312 1313 if assignment: 1314 key = self._parse_var_or_string() 1315 self._match(TokenType.EQ) 1316 return self.expression(exp.Property, this=key, value=self._parse_column()) 1317 1318 return None 1319 1320 def _parse_stored(self) -> exp.FileFormatProperty: 1321 self._match(TokenType.ALIAS) 1322 1323 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1324 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1325 1326 return self.expression( 1327 exp.FileFormatProperty, 1328 this=self.expression( 1329 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1330 ) 1331 if input_format or output_format 1332 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1333 ) 1334 1335 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1336 self._match(TokenType.EQ) 1337 self._match(TokenType.ALIAS) 1338 return self.expression(exp_class, this=self._parse_field()) 1339 1340 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1341 properties = [] 1342 while True: 1343 if before: 1344 prop = self._parse_property_before() 1345 else: 1346 prop = self._parse_property() 1347 1348 if not prop: 1349 break 1350 for p in ensure_list(prop): 1351 properties.append(p) 1352 1353 if properties: 1354 return self.expression(exp.Properties, expressions=properties) 1355 1356 return None 1357 1358 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1359 return self.expression( 1360 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1361 ) 1362 1363 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1364 if self._index >= 2: 1365 pre_volatile_token = self._tokens[self._index - 2] 1366 else: 1367 pre_volatile_token = None 1368 1369 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1370 return exp.VolatileProperty() 1371 1372 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1373 1374 def _parse_with_property( 1375 self, 1376 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1377 self._match(TokenType.WITH) 1378 if self._match(TokenType.L_PAREN, advance=False): 1379 return self._parse_wrapped_csv(self._parse_property) 1380 1381 if self._match_text_seq("JOURNAL"): 1382 return self._parse_withjournaltable() 1383 1384 if self._match_text_seq("DATA"): 1385 return self._parse_withdata(no=False) 1386 elif self._match_text_seq("NO", "DATA"): 1387 return self._parse_withdata(no=True) 1388 1389 if not self._next: 1390 return None 1391 1392 return self._parse_withisolatedloading() 1393 1394 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1395 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1396 self._match(TokenType.EQ) 1397 1398 user = self._parse_id_var() 1399 self._match(TokenType.PARAMETER) 1400 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1401 1402 if not user or not host: 1403 return None 1404 1405 return exp.DefinerProperty(this=f"{user}@{host}") 1406 1407 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1408 self._match(TokenType.TABLE) 1409 self._match(TokenType.EQ) 1410 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1411 1412 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1413 return self.expression(exp.LogProperty, no=no) 1414 1415 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1416 return self.expression(exp.JournalProperty, **kwargs) 1417 1418 def _parse_checksum(self) -> exp.ChecksumProperty: 1419 self._match(TokenType.EQ) 1420 1421 on = None 1422 if self._match(TokenType.ON): 1423 on = True 1424 elif self._match_text_seq("OFF"): 1425 on = False 1426 1427 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1428 1429 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1430 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1431 1432 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1433 if not self._match_text_seq("GRANTS"): 1434 self._retreat(self._index - 1) 1435 return None 1436 1437 return self.expression(exp.CopyGrantsProperty) 1438 1439 def _parse_freespace(self) -> exp.FreespaceProperty: 1440 self._match(TokenType.EQ) 1441 return self.expression( 1442 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1443 ) 1444 1445 def _parse_mergeblockratio( 1446 self, no: bool = False, default: bool = False 1447 ) -> exp.MergeBlockRatioProperty: 1448 if self._match(TokenType.EQ): 1449 return self.expression( 1450 exp.MergeBlockRatioProperty, 1451 this=self._parse_number(), 1452 percent=self._match(TokenType.PERCENT), 1453 ) 1454 1455 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1456 1457 def _parse_datablocksize( 1458 self, 1459 default: t.Optional[bool] = None, 1460 minimum: t.Optional[bool] = None, 1461 maximum: t.Optional[bool] = None, 1462 ) -> exp.DataBlocksizeProperty: 1463 self._match(TokenType.EQ) 1464 size = self._parse_number() 1465 1466 units = None 1467 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1468 units = self._prev.text 1469 1470 return self.expression( 1471 exp.DataBlocksizeProperty, 1472 size=size, 1473 units=units, 1474 default=default, 1475 minimum=minimum, 1476 maximum=maximum, 1477 ) 1478 1479 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1480 self._match(TokenType.EQ) 1481 always = self._match_text_seq("ALWAYS") 1482 manual = self._match_text_seq("MANUAL") 1483 never = self._match_text_seq("NEVER") 1484 default = self._match_text_seq("DEFAULT") 1485 1486 autotemp = None 1487 if self._match_text_seq("AUTOTEMP"): 1488 autotemp = self._parse_schema() 1489 1490 return self.expression( 1491 exp.BlockCompressionProperty, 1492 always=always, 1493 manual=manual, 1494 never=never, 1495 default=default, 1496 autotemp=autotemp, 1497 ) 1498 1499 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1500 no = self._match_text_seq("NO") 1501 concurrent = self._match_text_seq("CONCURRENT") 1502 self._match_text_seq("ISOLATED", "LOADING") 1503 for_all = self._match_text_seq("FOR", "ALL") 1504 for_insert = self._match_text_seq("FOR", "INSERT") 1505 for_none = self._match_text_seq("FOR", "NONE") 1506 return self.expression( 1507 exp.IsolatedLoadingProperty, 1508 no=no, 1509 concurrent=concurrent, 1510 for_all=for_all, 1511 for_insert=for_insert, 1512 for_none=for_none, 1513 ) 1514 1515 def _parse_locking(self) -> exp.LockingProperty: 1516 if self._match(TokenType.TABLE): 1517 kind = "TABLE" 1518 elif self._match(TokenType.VIEW): 1519 kind = "VIEW" 1520 elif self._match(TokenType.ROW): 1521 kind = "ROW" 1522 elif self._match_text_seq("DATABASE"): 1523 kind = "DATABASE" 1524 else: 1525 kind = None 1526 1527 if kind in ("DATABASE", "TABLE", "VIEW"): 1528 this = self._parse_table_parts() 1529 else: 1530 this = None 1531 1532 if self._match(TokenType.FOR): 1533 for_or_in = "FOR" 1534 elif self._match(TokenType.IN): 1535 for_or_in = "IN" 1536 else: 1537 for_or_in = None 1538 1539 if self._match_text_seq("ACCESS"): 1540 lock_type = "ACCESS" 1541 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1542 lock_type = "EXCLUSIVE" 1543 elif self._match_text_seq("SHARE"): 1544 lock_type = "SHARE" 1545 elif self._match_text_seq("READ"): 1546 lock_type = "READ" 1547 elif self._match_text_seq("WRITE"): 1548 lock_type = "WRITE" 1549 elif self._match_text_seq("CHECKSUM"): 1550 lock_type = "CHECKSUM" 1551 else: 1552 lock_type = None 1553 1554 override = self._match_text_seq("OVERRIDE") 1555 1556 return self.expression( 1557 exp.LockingProperty, 1558 this=this, 1559 kind=kind, 1560 for_or_in=for_or_in, 1561 lock_type=lock_type, 1562 override=override, 1563 ) 1564 1565 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1566 if self._match(TokenType.PARTITION_BY): 1567 return self._parse_csv(self._parse_conjunction) 1568 return [] 1569 1570 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1571 self._match(TokenType.EQ) 1572 return self.expression( 1573 exp.PartitionedByProperty, 1574 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1575 ) 1576 1577 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1578 if self._match_text_seq("AND", "STATISTICS"): 1579 statistics = True 1580 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1581 statistics = False 1582 else: 1583 statistics = None 1584 1585 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1586 1587 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1588 if self._match_text_seq("PRIMARY", "INDEX"): 1589 return exp.NoPrimaryIndexProperty() 1590 return None 1591 1592 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1593 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1594 return exp.OnCommitProperty() 1595 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1596 return exp.OnCommitProperty(delete=True) 1597 return None 1598 1599 def _parse_distkey(self) -> exp.DistKeyProperty: 1600 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1601 1602 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1603 table = self._parse_table(schema=True) 1604 1605 options = [] 1606 while self._match_texts(("INCLUDING", "EXCLUDING")): 1607 this = self._prev.text.upper() 1608 1609 id_var = self._parse_id_var() 1610 if not id_var: 1611 return None 1612 1613 options.append( 1614 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1615 ) 1616 1617 return self.expression(exp.LikeProperty, this=table, expressions=options) 1618 1619 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1620 return self.expression( 1621 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1622 ) 1623 1624 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1625 self._match(TokenType.EQ) 1626 return self.expression( 1627 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1628 ) 1629 1630 def _parse_returns(self) -> exp.ReturnsProperty: 1631 value: t.Optional[exp.Expression] 1632 is_table = self._match(TokenType.TABLE) 1633 1634 if is_table: 1635 if self._match(TokenType.LT): 1636 value = self.expression( 1637 exp.Schema, 1638 this="TABLE", 1639 expressions=self._parse_csv(self._parse_struct_types), 1640 ) 1641 if not self._match(TokenType.GT): 1642 self.raise_error("Expecting >") 1643 else: 1644 value = self._parse_schema(exp.var("TABLE")) 1645 else: 1646 value = self._parse_types() 1647 1648 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1649 1650 def _parse_describe(self) -> exp.Describe: 1651 kind = self._match_set(self.CREATABLES) and self._prev.text 1652 this = self._parse_table() 1653 return self.expression(exp.Describe, this=this, kind=kind) 1654 1655 def _parse_insert(self) -> exp.Insert: 1656 overwrite = self._match(TokenType.OVERWRITE) 1657 local = self._match_text_seq("LOCAL") 1658 alternative = None 1659 1660 if self._match_text_seq("DIRECTORY"): 1661 this: t.Optional[exp.Expression] = self.expression( 1662 exp.Directory, 1663 this=self._parse_var_or_string(), 1664 local=local, 1665 row_format=self._parse_row_format(match_row=True), 1666 ) 1667 else: 1668 if self._match(TokenType.OR): 1669 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1670 1671 self._match(TokenType.INTO) 1672 self._match(TokenType.TABLE) 1673 this = self._parse_table(schema=True) 1674 1675 return self.expression( 1676 exp.Insert, 1677 this=this, 1678 exists=self._parse_exists(), 1679 partition=self._parse_partition(), 1680 expression=self._parse_ddl_select(), 1681 conflict=self._parse_on_conflict(), 1682 returning=self._parse_returning(), 1683 overwrite=overwrite, 1684 alternative=alternative, 1685 ) 1686 1687 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1688 conflict = self._match_text_seq("ON", "CONFLICT") 1689 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1690 1691 if not conflict and not duplicate: 1692 return None 1693 1694 nothing = None 1695 expressions = None 1696 key = None 1697 constraint = None 1698 1699 if conflict: 1700 if self._match_text_seq("ON", "CONSTRAINT"): 1701 constraint = self._parse_id_var() 1702 else: 1703 key = self._parse_csv(self._parse_value) 1704 1705 self._match_text_seq("DO") 1706 if self._match_text_seq("NOTHING"): 1707 nothing = True 1708 else: 1709 self._match(TokenType.UPDATE) 1710 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1711 1712 return self.expression( 1713 exp.OnConflict, 1714 duplicate=duplicate, 1715 expressions=expressions, 1716 nothing=nothing, 1717 key=key, 1718 constraint=constraint, 1719 ) 1720 1721 def _parse_returning(self) -> t.Optional[exp.Returning]: 1722 if not self._match(TokenType.RETURNING): 1723 return None 1724 1725 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1726 1727 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1728 if not self._match(TokenType.FORMAT): 1729 return None 1730 return self._parse_row_format() 1731 1732 def _parse_row_format( 1733 self, match_row: bool = False 1734 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1735 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1736 return None 1737 1738 if self._match_text_seq("SERDE"): 1739 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1740 1741 self._match_text_seq("DELIMITED") 1742 1743 kwargs = {} 1744 1745 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1746 kwargs["fields"] = self._parse_string() 1747 if self._match_text_seq("ESCAPED", "BY"): 1748 kwargs["escaped"] = self._parse_string() 1749 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1750 kwargs["collection_items"] = self._parse_string() 1751 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1752 kwargs["map_keys"] = self._parse_string() 1753 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1754 kwargs["lines"] = self._parse_string() 1755 if self._match_text_seq("NULL", "DEFINED", "AS"): 1756 kwargs["null"] = self._parse_string() 1757 1758 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1759 1760 def _parse_load(self) -> exp.LoadData | exp.Command: 1761 if self._match_text_seq("DATA"): 1762 local = self._match_text_seq("LOCAL") 1763 self._match_text_seq("INPATH") 1764 inpath = self._parse_string() 1765 overwrite = self._match(TokenType.OVERWRITE) 1766 self._match_pair(TokenType.INTO, TokenType.TABLE) 1767 1768 return self.expression( 1769 exp.LoadData, 1770 this=self._parse_table(schema=True), 1771 local=local, 1772 overwrite=overwrite, 1773 inpath=inpath, 1774 partition=self._parse_partition(), 1775 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1776 serde=self._match_text_seq("SERDE") and self._parse_string(), 1777 ) 1778 return self._parse_as_command(self._prev) 1779 1780 def _parse_delete(self) -> exp.Delete: 1781 self._match(TokenType.FROM) 1782 1783 return self.expression( 1784 exp.Delete, 1785 this=self._parse_table(), 1786 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1787 where=self._parse_where(), 1788 returning=self._parse_returning(), 1789 limit=self._parse_limit(), 1790 ) 1791 1792 def _parse_update(self) -> exp.Update: 1793 return self.expression( 1794 exp.Update, 1795 **{ # type: ignore 1796 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1797 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1798 "from": self._parse_from(modifiers=True), 1799 "where": self._parse_where(), 1800 "returning": self._parse_returning(), 1801 "limit": self._parse_limit(), 1802 }, 1803 ) 1804 1805 def _parse_uncache(self) -> exp.Uncache: 1806 if not self._match(TokenType.TABLE): 1807 self.raise_error("Expecting TABLE after UNCACHE") 1808 1809 return self.expression( 1810 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1811 ) 1812 1813 def _parse_cache(self) -> exp.Cache: 1814 lazy = self._match_text_seq("LAZY") 1815 self._match(TokenType.TABLE) 1816 table = self._parse_table(schema=True) 1817 1818 options = [] 1819 if self._match_text_seq("OPTIONS"): 1820 self._match_l_paren() 1821 k = self._parse_string() 1822 self._match(TokenType.EQ) 1823 v = self._parse_string() 1824 options = [k, v] 1825 self._match_r_paren() 1826 1827 self._match(TokenType.ALIAS) 1828 return self.expression( 1829 exp.Cache, 1830 this=table, 1831 lazy=lazy, 1832 options=options, 1833 expression=self._parse_select(nested=True), 1834 ) 1835 1836 def _parse_partition(self) -> t.Optional[exp.Partition]: 1837 if not self._match(TokenType.PARTITION): 1838 return None 1839 1840 return self.expression( 1841 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1842 ) 1843 1844 def _parse_value(self) -> exp.Tuple: 1845 if self._match(TokenType.L_PAREN): 1846 expressions = self._parse_csv(self._parse_conjunction) 1847 self._match_r_paren() 1848 return self.expression(exp.Tuple, expressions=expressions) 1849 1850 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1851 # Source: https://prestodb.io/docs/current/sql/values.html 1852 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1853 1854 def _parse_select( 1855 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1856 ) -> t.Optional[exp.Expression]: 1857 cte = self._parse_with() 1858 if cte: 1859 this = self._parse_statement() 1860 1861 if not this: 1862 self.raise_error("Failed to parse any statement following CTE") 1863 return cte 1864 1865 if "with" in this.arg_types: 1866 this.set("with", cte) 1867 else: 1868 self.raise_error(f"{this.key} does not support CTE") 1869 this = cte 1870 elif self._match(TokenType.SELECT): 1871 comments = self._prev_comments 1872 1873 hint = self._parse_hint() 1874 all_ = self._match(TokenType.ALL) 1875 distinct = self._match(TokenType.DISTINCT) 1876 1877 kind = ( 1878 self._match(TokenType.ALIAS) 1879 and self._match_texts(("STRUCT", "VALUE")) 1880 and self._prev.text 1881 ) 1882 1883 if distinct: 1884 distinct = self.expression( 1885 exp.Distinct, 1886 on=self._parse_value() if self._match(TokenType.ON) else None, 1887 ) 1888 1889 if all_ and distinct: 1890 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1891 1892 limit = self._parse_limit(top=True) 1893 expressions = self._parse_csv(self._parse_expression) 1894 1895 this = self.expression( 1896 exp.Select, 1897 kind=kind, 1898 hint=hint, 1899 distinct=distinct, 1900 expressions=expressions, 1901 limit=limit, 1902 ) 1903 this.comments = comments 1904 1905 into = self._parse_into() 1906 if into: 1907 this.set("into", into) 1908 1909 from_ = self._parse_from() 1910 if from_: 1911 this.set("from", from_) 1912 1913 this = self._parse_query_modifiers(this) 1914 elif (table or nested) and self._match(TokenType.L_PAREN): 1915 if self._match(TokenType.PIVOT): 1916 this = self._parse_simplified_pivot() 1917 elif self._match(TokenType.FROM): 1918 this = exp.select("*").from_( 1919 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1920 ) 1921 else: 1922 this = self._parse_table() if table else self._parse_select(nested=True) 1923 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1924 1925 self._match_r_paren() 1926 1927 # early return so that subquery unions aren't parsed again 1928 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1929 # Union ALL should be a property of the top select node, not the subquery 1930 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1931 elif self._match(TokenType.VALUES): 1932 this = self.expression( 1933 exp.Values, 1934 expressions=self._parse_csv(self._parse_value), 1935 alias=self._parse_table_alias(), 1936 ) 1937 else: 1938 this = None 1939 1940 return self._parse_set_operations(this) 1941 1942 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1943 if not skip_with_token and not self._match(TokenType.WITH): 1944 return None 1945 1946 comments = self._prev_comments 1947 recursive = self._match(TokenType.RECURSIVE) 1948 1949 expressions = [] 1950 while True: 1951 expressions.append(self._parse_cte()) 1952 1953 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1954 break 1955 else: 1956 self._match(TokenType.WITH) 1957 1958 return self.expression( 1959 exp.With, comments=comments, expressions=expressions, recursive=recursive 1960 ) 1961 1962 def _parse_cte(self) -> exp.CTE: 1963 alias = self._parse_table_alias() 1964 if not alias or not alias.this: 1965 self.raise_error("Expected CTE to have alias") 1966 1967 self._match(TokenType.ALIAS) 1968 return self.expression( 1969 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1970 ) 1971 1972 def _parse_table_alias( 1973 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1974 ) -> t.Optional[exp.TableAlias]: 1975 any_token = self._match(TokenType.ALIAS) 1976 alias = ( 1977 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1978 or self._parse_string_as_identifier() 1979 ) 1980 1981 index = self._index 1982 if self._match(TokenType.L_PAREN): 1983 columns = self._parse_csv(self._parse_function_parameter) 1984 self._match_r_paren() if columns else self._retreat(index) 1985 else: 1986 columns = None 1987 1988 if not alias and not columns: 1989 return None 1990 1991 return self.expression(exp.TableAlias, this=alias, columns=columns) 1992 1993 def _parse_subquery( 1994 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1995 ) -> t.Optional[exp.Subquery]: 1996 if not this: 1997 return None 1998 1999 return self.expression( 2000 exp.Subquery, 2001 this=this, 2002 pivots=self._parse_pivots(), 2003 alias=self._parse_table_alias() if parse_alias else None, 2004 ) 2005 2006 def _parse_query_modifiers( 2007 self, this: t.Optional[exp.Expression] 2008 ) -> t.Optional[exp.Expression]: 2009 if isinstance(this, self.MODIFIABLES): 2010 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2011 expression = parser(self) 2012 2013 if expression: 2014 if key == "limit": 2015 offset = expression.args.pop("offset", None) 2016 if offset: 2017 this.set("offset", exp.Offset(expression=offset)) 2018 this.set(key, expression) 2019 return this 2020 2021 def _parse_hint(self) -> t.Optional[exp.Hint]: 2022 if self._match(TokenType.HINT): 2023 hints = self._parse_csv(self._parse_function) 2024 2025 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2026 self.raise_error("Expected */ after HINT") 2027 2028 return self.expression(exp.Hint, expressions=hints) 2029 2030 return None 2031 2032 def _parse_into(self) -> t.Optional[exp.Into]: 2033 if not self._match(TokenType.INTO): 2034 return None 2035 2036 temp = self._match(TokenType.TEMPORARY) 2037 unlogged = self._match_text_seq("UNLOGGED") 2038 self._match(TokenType.TABLE) 2039 2040 return self.expression( 2041 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2042 ) 2043 2044 def _parse_from( 2045 self, modifiers: bool = False, skip_from_token: bool = False 2046 ) -> t.Optional[exp.From]: 2047 if not skip_from_token and not self._match(TokenType.FROM): 2048 return None 2049 2050 comments = self._prev_comments 2051 this = self._parse_table() 2052 2053 return self.expression( 2054 exp.From, 2055 comments=comments, 2056 this=self._parse_query_modifiers(this) if modifiers else this, 2057 ) 2058 2059 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2060 if not self._match(TokenType.MATCH_RECOGNIZE): 2061 return None 2062 2063 self._match_l_paren() 2064 2065 partition = self._parse_partition_by() 2066 order = self._parse_order() 2067 measures = ( 2068 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2069 ) 2070 2071 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2072 rows = exp.var("ONE ROW PER MATCH") 2073 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2074 text = "ALL ROWS PER MATCH" 2075 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2076 text += f" SHOW EMPTY MATCHES" 2077 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2078 text += f" OMIT EMPTY MATCHES" 2079 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2080 text += f" WITH UNMATCHED ROWS" 2081 rows = exp.var(text) 2082 else: 2083 rows = None 2084 2085 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2086 text = "AFTER MATCH SKIP" 2087 if self._match_text_seq("PAST", "LAST", "ROW"): 2088 text += f" PAST LAST ROW" 2089 elif self._match_text_seq("TO", "NEXT", "ROW"): 2090 text += f" TO NEXT ROW" 2091 elif self._match_text_seq("TO", "FIRST"): 2092 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2093 elif self._match_text_seq("TO", "LAST"): 2094 text += f" TO LAST {self._advance_any().text}" # type: ignore 2095 after = exp.var(text) 2096 else: 2097 after = None 2098 2099 if self._match_text_seq("PATTERN"): 2100 self._match_l_paren() 2101 2102 if not self._curr: 2103 self.raise_error("Expecting )", self._curr) 2104 2105 paren = 1 2106 start = self._curr 2107 2108 while self._curr and paren > 0: 2109 if self._curr.token_type == TokenType.L_PAREN: 2110 paren += 1 2111 if self._curr.token_type == TokenType.R_PAREN: 2112 paren -= 1 2113 2114 end = self._prev 2115 self._advance() 2116 2117 if paren > 0: 2118 self.raise_error("Expecting )", self._curr) 2119 2120 pattern = exp.var(self._find_sql(start, end)) 2121 else: 2122 pattern = None 2123 2124 define = ( 2125 self._parse_csv( 2126 lambda: self.expression( 2127 exp.Alias, 2128 alias=self._parse_id_var(any_token=True), 2129 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2130 ) 2131 ) 2132 if self._match_text_seq("DEFINE") 2133 else None 2134 ) 2135 2136 self._match_r_paren() 2137 2138 return self.expression( 2139 exp.MatchRecognize, 2140 partition_by=partition, 2141 order=order, 2142 measures=measures, 2143 rows=rows, 2144 after=after, 2145 pattern=pattern, 2146 define=define, 2147 alias=self._parse_table_alias(), 2148 ) 2149 2150 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2151 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2152 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2153 2154 if outer_apply or cross_apply: 2155 this = self._parse_select(table=True) 2156 view = None 2157 outer = not cross_apply 2158 elif self._match(TokenType.LATERAL): 2159 this = self._parse_select(table=True) 2160 view = self._match(TokenType.VIEW) 2161 outer = self._match(TokenType.OUTER) 2162 else: 2163 return None 2164 2165 if not this: 2166 this = self._parse_function() or self._parse_id_var(any_token=False) 2167 while self._match(TokenType.DOT): 2168 this = exp.Dot( 2169 this=this, 2170 expression=self._parse_function() or self._parse_id_var(any_token=False), 2171 ) 2172 2173 if view: 2174 table = self._parse_id_var(any_token=False) 2175 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2176 table_alias: t.Optional[exp.TableAlias] = self.expression( 2177 exp.TableAlias, this=table, columns=columns 2178 ) 2179 elif isinstance(this, exp.Subquery) and this.alias: 2180 # Ensures parity between the Subquery's and the Lateral's "alias" args 2181 table_alias = this.args["alias"].copy() 2182 else: 2183 table_alias = self._parse_table_alias() 2184 2185 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2186 2187 def _parse_join_parts( 2188 self, 2189 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2190 return ( 2191 self._match_set(self.JOIN_METHODS) and self._prev, 2192 self._match_set(self.JOIN_SIDES) and self._prev, 2193 self._match_set(self.JOIN_KINDS) and self._prev, 2194 ) 2195 2196 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2197 if self._match(TokenType.COMMA): 2198 return self.expression(exp.Join, this=self._parse_table()) 2199 2200 index = self._index 2201 method, side, kind = self._parse_join_parts() 2202 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2203 join = self._match(TokenType.JOIN) 2204 2205 if not skip_join_token and not join: 2206 self._retreat(index) 2207 kind = None 2208 method = None 2209 side = None 2210 2211 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2212 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2213 2214 if not skip_join_token and not join and not outer_apply and not cross_apply: 2215 return None 2216 2217 if outer_apply: 2218 side = Token(TokenType.LEFT, "LEFT") 2219 2220 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2221 2222 if method: 2223 kwargs["method"] = method.text 2224 if side: 2225 kwargs["side"] = side.text 2226 if kind: 2227 kwargs["kind"] = kind.text 2228 if hint: 2229 kwargs["hint"] = hint 2230 2231 if self._match(TokenType.ON): 2232 kwargs["on"] = self._parse_conjunction() 2233 elif self._match(TokenType.USING): 2234 kwargs["using"] = self._parse_wrapped_id_vars() 2235 2236 return self.expression(exp.Join, **kwargs) 2237 2238 def _parse_index( 2239 self, 2240 index: t.Optional[exp.Expression] = None, 2241 ) -> t.Optional[exp.Index]: 2242 if index: 2243 unique = None 2244 primary = None 2245 amp = None 2246 2247 self._match(TokenType.ON) 2248 self._match(TokenType.TABLE) # hive 2249 table = self._parse_table_parts(schema=True) 2250 else: 2251 unique = self._match(TokenType.UNIQUE) 2252 primary = self._match_text_seq("PRIMARY") 2253 amp = self._match_text_seq("AMP") 2254 2255 if not self._match(TokenType.INDEX): 2256 return None 2257 2258 index = self._parse_id_var() 2259 table = None 2260 2261 using = self._parse_field() if self._match(TokenType.USING) else None 2262 2263 if self._match(TokenType.L_PAREN, advance=False): 2264 columns = self._parse_wrapped_csv(self._parse_ordered) 2265 else: 2266 columns = None 2267 2268 return self.expression( 2269 exp.Index, 2270 this=index, 2271 table=table, 2272 using=using, 2273 columns=columns, 2274 unique=unique, 2275 primary=primary, 2276 amp=amp, 2277 partition_by=self._parse_partition_by(), 2278 ) 2279 2280 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2281 hints: t.List[exp.Expression] = [] 2282 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2283 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2284 hints.append( 2285 self.expression( 2286 exp.WithTableHint, 2287 expressions=self._parse_csv( 2288 lambda: self._parse_function() or self._parse_var(any_token=True) 2289 ), 2290 ) 2291 ) 2292 self._match_r_paren() 2293 else: 2294 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2295 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2296 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2297 2298 self._match_texts({"INDEX", "KEY"}) 2299 if self._match(TokenType.FOR): 2300 hint.set("target", self._advance_any() and self._prev.text.upper()) 2301 2302 hint.set("expressions", self._parse_wrapped_id_vars()) 2303 hints.append(hint) 2304 2305 return hints or None 2306 2307 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2308 return ( 2309 (not schema and self._parse_function(optional_parens=False)) 2310 or self._parse_id_var(any_token=False) 2311 or self._parse_string_as_identifier() 2312 or self._parse_placeholder() 2313 ) 2314 2315 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2316 catalog = None 2317 db = None 2318 table = self._parse_table_part(schema=schema) 2319 2320 while self._match(TokenType.DOT): 2321 if catalog: 2322 # This allows nesting the table in arbitrarily many dot expressions if needed 2323 table = self.expression( 2324 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2325 ) 2326 else: 2327 catalog = db 2328 db = table 2329 table = self._parse_table_part(schema=schema) 2330 2331 if not table: 2332 self.raise_error(f"Expected table name but got {self._curr}") 2333 2334 return self.expression( 2335 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2336 ) 2337 2338 def _parse_table( 2339 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2340 ) -> t.Optional[exp.Expression]: 2341 lateral = self._parse_lateral() 2342 if lateral: 2343 return lateral 2344 2345 unnest = self._parse_unnest() 2346 if unnest: 2347 return unnest 2348 2349 values = self._parse_derived_table_values() 2350 if values: 2351 return values 2352 2353 subquery = self._parse_select(table=True) 2354 if subquery: 2355 if not subquery.args.get("pivots"): 2356 subquery.set("pivots", self._parse_pivots()) 2357 return subquery 2358 2359 this: exp.Expression = self._parse_table_parts(schema=schema) 2360 2361 if schema: 2362 return self._parse_schema(this=this) 2363 2364 if self.ALIAS_POST_TABLESAMPLE: 2365 table_sample = self._parse_table_sample() 2366 2367 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2368 if alias: 2369 this.set("alias", alias) 2370 2371 if not this.args.get("pivots"): 2372 this.set("pivots", self._parse_pivots()) 2373 2374 this.set("hints", self._parse_table_hints()) 2375 2376 if not self.ALIAS_POST_TABLESAMPLE: 2377 table_sample = self._parse_table_sample() 2378 2379 if table_sample: 2380 table_sample.set("this", this) 2381 this = table_sample 2382 2383 return this 2384 2385 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2386 if not self._match(TokenType.UNNEST): 2387 return None 2388 2389 expressions = self._parse_wrapped_csv(self._parse_type) 2390 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2391 2392 alias = self._parse_table_alias() if with_alias else None 2393 2394 if alias and self.UNNEST_COLUMN_ONLY: 2395 if alias.args.get("columns"): 2396 self.raise_error("Unexpected extra column alias in unnest.") 2397 2398 alias.set("columns", [alias.this]) 2399 alias.set("this", None) 2400 2401 offset = None 2402 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2403 self._match(TokenType.ALIAS) 2404 offset = self._parse_id_var() or exp.to_identifier("offset") 2405 2406 return self.expression( 2407 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2408 ) 2409 2410 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2411 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2412 if not is_derived and not self._match(TokenType.VALUES): 2413 return None 2414 2415 expressions = self._parse_csv(self._parse_value) 2416 alias = self._parse_table_alias() 2417 2418 if is_derived: 2419 self._match_r_paren() 2420 2421 return self.expression( 2422 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2423 ) 2424 2425 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2426 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2427 as_modifier and self._match_text_seq("USING", "SAMPLE") 2428 ): 2429 return None 2430 2431 bucket_numerator = None 2432 bucket_denominator = None 2433 bucket_field = None 2434 percent = None 2435 rows = None 2436 size = None 2437 seed = None 2438 2439 kind = ( 2440 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2441 ) 2442 method = self._parse_var(tokens=(TokenType.ROW,)) 2443 2444 self._match(TokenType.L_PAREN) 2445 2446 num = self._parse_number() 2447 2448 if self._match_text_seq("BUCKET"): 2449 bucket_numerator = self._parse_number() 2450 self._match_text_seq("OUT", "OF") 2451 bucket_denominator = bucket_denominator = self._parse_number() 2452 self._match(TokenType.ON) 2453 bucket_field = self._parse_field() 2454 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2455 percent = num 2456 elif self._match(TokenType.ROWS): 2457 rows = num 2458 else: 2459 size = num 2460 2461 self._match(TokenType.R_PAREN) 2462 2463 if self._match(TokenType.L_PAREN): 2464 method = self._parse_var() 2465 seed = self._match(TokenType.COMMA) and self._parse_number() 2466 self._match_r_paren() 2467 elif self._match_texts(("SEED", "REPEATABLE")): 2468 seed = self._parse_wrapped(self._parse_number) 2469 2470 return self.expression( 2471 exp.TableSample, 2472 method=method, 2473 bucket_numerator=bucket_numerator, 2474 bucket_denominator=bucket_denominator, 2475 bucket_field=bucket_field, 2476 percent=percent, 2477 rows=rows, 2478 size=size, 2479 seed=seed, 2480 kind=kind, 2481 ) 2482 2483 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2484 return list(iter(self._parse_pivot, None)) 2485 2486 # https://duckdb.org/docs/sql/statements/pivot 2487 def _parse_simplified_pivot(self) -> exp.Pivot: 2488 def _parse_on() -> t.Optional[exp.Expression]: 2489 this = self._parse_bitwise() 2490 return self._parse_in(this) if self._match(TokenType.IN) else this 2491 2492 this = self._parse_table() 2493 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2494 using = self._match(TokenType.USING) and self._parse_csv( 2495 lambda: self._parse_alias(self._parse_function()) 2496 ) 2497 group = self._parse_group() 2498 return self.expression( 2499 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2500 ) 2501 2502 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2503 index = self._index 2504 2505 if self._match(TokenType.PIVOT): 2506 unpivot = False 2507 elif self._match(TokenType.UNPIVOT): 2508 unpivot = True 2509 else: 2510 return None 2511 2512 expressions = [] 2513 field = None 2514 2515 if not self._match(TokenType.L_PAREN): 2516 self._retreat(index) 2517 return None 2518 2519 if unpivot: 2520 expressions = self._parse_csv(self._parse_column) 2521 else: 2522 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2523 2524 if not expressions: 2525 self.raise_error("Failed to parse PIVOT's aggregation list") 2526 2527 if not self._match(TokenType.FOR): 2528 self.raise_error("Expecting FOR") 2529 2530 value = self._parse_column() 2531 2532 if not self._match(TokenType.IN): 2533 self.raise_error("Expecting IN") 2534 2535 field = self._parse_in(value, alias=True) 2536 2537 self._match_r_paren() 2538 2539 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2540 2541 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2542 pivot.set("alias", self._parse_table_alias()) 2543 2544 if not unpivot: 2545 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2546 2547 columns: t.List[exp.Expression] = [] 2548 for fld in pivot.args["field"].expressions: 2549 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2550 for name in names: 2551 if self.PREFIXED_PIVOT_COLUMNS: 2552 name = f"{name}_{field_name}" if name else field_name 2553 else: 2554 name = f"{field_name}_{name}" if name else field_name 2555 2556 columns.append(exp.to_identifier(name)) 2557 2558 pivot.set("columns", columns) 2559 2560 return pivot 2561 2562 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2563 return [agg.alias for agg in aggregations] 2564 2565 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2566 if not skip_where_token and not self._match(TokenType.WHERE): 2567 return None 2568 2569 return self.expression( 2570 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2571 ) 2572 2573 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2574 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2575 return None 2576 2577 elements = defaultdict(list) 2578 2579 while True: 2580 expressions = self._parse_csv(self._parse_conjunction) 2581 if expressions: 2582 elements["expressions"].extend(expressions) 2583 2584 grouping_sets = self._parse_grouping_sets() 2585 if grouping_sets: 2586 elements["grouping_sets"].extend(grouping_sets) 2587 2588 rollup = None 2589 cube = None 2590 totals = None 2591 2592 with_ = self._match(TokenType.WITH) 2593 if self._match(TokenType.ROLLUP): 2594 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2595 elements["rollup"].extend(ensure_list(rollup)) 2596 2597 if self._match(TokenType.CUBE): 2598 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2599 elements["cube"].extend(ensure_list(cube)) 2600 2601 if self._match_text_seq("TOTALS"): 2602 totals = True 2603 elements["totals"] = True # type: ignore 2604 2605 if not (grouping_sets or rollup or cube or totals): 2606 break 2607 2608 return self.expression(exp.Group, **elements) # type: ignore 2609 2610 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2611 if not self._match(TokenType.GROUPING_SETS): 2612 return None 2613 2614 return self._parse_wrapped_csv(self._parse_grouping_set) 2615 2616 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2617 if self._match(TokenType.L_PAREN): 2618 grouping_set = self._parse_csv(self._parse_column) 2619 self._match_r_paren() 2620 return self.expression(exp.Tuple, expressions=grouping_set) 2621 2622 return self._parse_column() 2623 2624 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2625 if not skip_having_token and not self._match(TokenType.HAVING): 2626 return None 2627 return self.expression(exp.Having, this=self._parse_conjunction()) 2628 2629 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2630 if not self._match(TokenType.QUALIFY): 2631 return None 2632 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2633 2634 def _parse_order( 2635 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2636 ) -> t.Optional[exp.Expression]: 2637 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2638 return this 2639 2640 return self.expression( 2641 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2642 ) 2643 2644 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2645 if not self._match(token): 2646 return None 2647 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2648 2649 def _parse_ordered(self) -> exp.Ordered: 2650 this = self._parse_conjunction() 2651 self._match(TokenType.ASC) 2652 2653 is_desc = self._match(TokenType.DESC) 2654 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2655 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2656 desc = is_desc or False 2657 asc = not desc 2658 nulls_first = is_nulls_first or False 2659 explicitly_null_ordered = is_nulls_first or is_nulls_last 2660 2661 if ( 2662 not explicitly_null_ordered 2663 and ( 2664 (asc and self.NULL_ORDERING == "nulls_are_small") 2665 or (desc and self.NULL_ORDERING != "nulls_are_small") 2666 ) 2667 and self.NULL_ORDERING != "nulls_are_last" 2668 ): 2669 nulls_first = True 2670 2671 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2672 2673 def _parse_limit( 2674 self, this: t.Optional[exp.Expression] = None, top: bool = False 2675 ) -> t.Optional[exp.Expression]: 2676 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2677 limit_paren = self._match(TokenType.L_PAREN) 2678 expression = self._parse_number() if top else self._parse_term() 2679 2680 if self._match(TokenType.COMMA): 2681 offset = expression 2682 expression = self._parse_term() 2683 else: 2684 offset = None 2685 2686 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2687 2688 if limit_paren: 2689 self._match_r_paren() 2690 2691 return limit_exp 2692 2693 if self._match(TokenType.FETCH): 2694 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2695 direction = self._prev.text if direction else "FIRST" 2696 2697 count = self._parse_number() 2698 percent = self._match(TokenType.PERCENT) 2699 2700 self._match_set((TokenType.ROW, TokenType.ROWS)) 2701 2702 only = self._match_text_seq("ONLY") 2703 with_ties = self._match_text_seq("WITH", "TIES") 2704 2705 if only and with_ties: 2706 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2707 2708 return self.expression( 2709 exp.Fetch, 2710 direction=direction, 2711 count=count, 2712 percent=percent, 2713 with_ties=with_ties, 2714 ) 2715 2716 return this 2717 2718 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2719 if not self._match(TokenType.OFFSET): 2720 return this 2721 2722 count = self._parse_number() 2723 self._match_set((TokenType.ROW, TokenType.ROWS)) 2724 return self.expression(exp.Offset, this=this, expression=count) 2725 2726 def _parse_locks(self) -> t.List[exp.Lock]: 2727 locks = [] 2728 while True: 2729 if self._match_text_seq("FOR", "UPDATE"): 2730 update = True 2731 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2732 "LOCK", "IN", "SHARE", "MODE" 2733 ): 2734 update = False 2735 else: 2736 break 2737 2738 expressions = None 2739 if self._match_text_seq("OF"): 2740 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2741 2742 wait: t.Optional[bool | exp.Expression] = None 2743 if self._match_text_seq("NOWAIT"): 2744 wait = True 2745 elif self._match_text_seq("WAIT"): 2746 wait = self._parse_primary() 2747 elif self._match_text_seq("SKIP", "LOCKED"): 2748 wait = False 2749 2750 locks.append( 2751 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2752 ) 2753 2754 return locks 2755 2756 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2757 if not self._match_set(self.SET_OPERATIONS): 2758 return this 2759 2760 token_type = self._prev.token_type 2761 2762 if token_type == TokenType.UNION: 2763 expression = exp.Union 2764 elif token_type == TokenType.EXCEPT: 2765 expression = exp.Except 2766 else: 2767 expression = exp.Intersect 2768 2769 return self.expression( 2770 expression, 2771 this=this, 2772 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2773 expression=self._parse_set_operations(self._parse_select(nested=True)), 2774 ) 2775 2776 def _parse_expression(self) -> t.Optional[exp.Expression]: 2777 return self._parse_alias(self._parse_conjunction()) 2778 2779 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2780 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2781 2782 def _parse_equality(self) -> t.Optional[exp.Expression]: 2783 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2784 2785 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2786 return self._parse_tokens(self._parse_range, self.COMPARISON) 2787 2788 def _parse_range(self) -> t.Optional[exp.Expression]: 2789 this = self._parse_bitwise() 2790 negate = self._match(TokenType.NOT) 2791 2792 if self._match_set(self.RANGE_PARSERS): 2793 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2794 if not expression: 2795 return this 2796 2797 this = expression 2798 elif self._match(TokenType.ISNULL): 2799 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2800 2801 # Postgres supports ISNULL and NOTNULL for conditions. 2802 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2803 if self._match(TokenType.NOTNULL): 2804 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2805 this = self.expression(exp.Not, this=this) 2806 2807 if negate: 2808 this = self.expression(exp.Not, this=this) 2809 2810 if self._match(TokenType.IS): 2811 this = self._parse_is(this) 2812 2813 return this 2814 2815 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2816 index = self._index - 1 2817 negate = self._match(TokenType.NOT) 2818 2819 if self._match_text_seq("DISTINCT", "FROM"): 2820 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2821 return self.expression(klass, this=this, expression=self._parse_expression()) 2822 2823 expression = self._parse_null() or self._parse_boolean() 2824 if not expression: 2825 self._retreat(index) 2826 return None 2827 2828 this = self.expression(exp.Is, this=this, expression=expression) 2829 return self.expression(exp.Not, this=this) if negate else this 2830 2831 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2832 unnest = self._parse_unnest(with_alias=False) 2833 if unnest: 2834 this = self.expression(exp.In, this=this, unnest=unnest) 2835 elif self._match(TokenType.L_PAREN): 2836 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2837 2838 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2839 this = self.expression(exp.In, this=this, query=expressions[0]) 2840 else: 2841 this = self.expression(exp.In, this=this, expressions=expressions) 2842 2843 self._match_r_paren(this) 2844 else: 2845 this = self.expression(exp.In, this=this, field=self._parse_field()) 2846 2847 return this 2848 2849 def _parse_between(self, this: exp.Expression) -> exp.Between: 2850 low = self._parse_bitwise() 2851 self._match(TokenType.AND) 2852 high = self._parse_bitwise() 2853 return self.expression(exp.Between, this=this, low=low, high=high) 2854 2855 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2856 if not self._match(TokenType.ESCAPE): 2857 return this 2858 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2859 2860 def _parse_interval(self) -> t.Optional[exp.Interval]: 2861 if not self._match(TokenType.INTERVAL): 2862 return None 2863 2864 this = self._parse_primary() or self._parse_term() 2865 unit = self._parse_function() or self._parse_var() 2866 2867 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2868 # each INTERVAL expression into this canonical form so it's easy to transpile 2869 if this and this.is_number: 2870 this = exp.Literal.string(this.name) 2871 elif this and this.is_string: 2872 parts = this.name.split() 2873 2874 if len(parts) == 2: 2875 if unit: 2876 # this is not actually a unit, it's something else 2877 unit = None 2878 self._retreat(self._index - 1) 2879 else: 2880 this = exp.Literal.string(parts[0]) 2881 unit = self.expression(exp.Var, this=parts[1]) 2882 2883 return self.expression(exp.Interval, this=this, unit=unit) 2884 2885 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2886 this = self._parse_term() 2887 2888 while True: 2889 if self._match_set(self.BITWISE): 2890 this = self.expression( 2891 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2892 ) 2893 elif self._match_pair(TokenType.LT, TokenType.LT): 2894 this = self.expression( 2895 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2896 ) 2897 elif self._match_pair(TokenType.GT, TokenType.GT): 2898 this = self.expression( 2899 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2900 ) 2901 else: 2902 break 2903 2904 return this 2905 2906 def _parse_term(self) -> t.Optional[exp.Expression]: 2907 return self._parse_tokens(self._parse_factor, self.TERM) 2908 2909 def _parse_factor(self) -> t.Optional[exp.Expression]: 2910 return self._parse_tokens(self._parse_unary, self.FACTOR) 2911 2912 def _parse_unary(self) -> t.Optional[exp.Expression]: 2913 if self._match_set(self.UNARY_PARSERS): 2914 return self.UNARY_PARSERS[self._prev.token_type](self) 2915 return self._parse_at_time_zone(self._parse_type()) 2916 2917 def _parse_type(self) -> t.Optional[exp.Expression]: 2918 interval = self._parse_interval() 2919 if interval: 2920 return interval 2921 2922 index = self._index 2923 data_type = self._parse_types(check_func=True) 2924 this = self._parse_column() 2925 2926 if data_type: 2927 if isinstance(this, exp.Literal): 2928 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2929 if parser: 2930 return parser(self, this, data_type) 2931 return self.expression(exp.Cast, this=this, to=data_type) 2932 if not data_type.expressions: 2933 self._retreat(index) 2934 return self._parse_column() 2935 return self._parse_column_ops(data_type) 2936 2937 return this 2938 2939 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2940 this = self._parse_type() 2941 if not this: 2942 return None 2943 2944 return self.expression( 2945 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2946 ) 2947 2948 def _parse_types( 2949 self, check_func: bool = False, schema: bool = False 2950 ) -> t.Optional[exp.Expression]: 2951 index = self._index 2952 2953 prefix = self._match_text_seq("SYSUDTLIB", ".") 2954 2955 if not self._match_set(self.TYPE_TOKENS): 2956 return None 2957 2958 type_token = self._prev.token_type 2959 2960 if type_token == TokenType.PSEUDO_TYPE: 2961 return self.expression(exp.PseudoType, this=self._prev.text) 2962 2963 nested = type_token in self.NESTED_TYPE_TOKENS 2964 is_struct = type_token == TokenType.STRUCT 2965 expressions = None 2966 maybe_func = False 2967 2968 if self._match(TokenType.L_PAREN): 2969 if is_struct: 2970 expressions = self._parse_csv(self._parse_struct_types) 2971 elif nested: 2972 expressions = self._parse_csv( 2973 lambda: self._parse_types(check_func=check_func, schema=schema) 2974 ) 2975 elif type_token in self.ENUM_TYPE_TOKENS: 2976 expressions = self._parse_csv(self._parse_primary) 2977 else: 2978 expressions = self._parse_csv(self._parse_type_size) 2979 2980 if not expressions or not self._match(TokenType.R_PAREN): 2981 self._retreat(index) 2982 return None 2983 2984 maybe_func = True 2985 2986 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2987 this = exp.DataType( 2988 this=exp.DataType.Type.ARRAY, 2989 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2990 nested=True, 2991 ) 2992 2993 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2994 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2995 2996 return this 2997 2998 if self._match(TokenType.L_BRACKET): 2999 self._retreat(index) 3000 return None 3001 3002 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3003 if nested and self._match(TokenType.LT): 3004 if is_struct: 3005 expressions = self._parse_csv(self._parse_struct_types) 3006 else: 3007 expressions = self._parse_csv( 3008 lambda: self._parse_types(check_func=check_func, schema=schema) 3009 ) 3010 3011 if not self._match(TokenType.GT): 3012 self.raise_error("Expecting >") 3013 3014 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3015 values = self._parse_csv(self._parse_conjunction) 3016 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3017 3018 value: t.Optional[exp.Expression] = None 3019 if type_token in self.TIMESTAMPS: 3020 if self._match_text_seq("WITH", "TIME", "ZONE"): 3021 maybe_func = False 3022 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3023 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3024 maybe_func = False 3025 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3026 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3027 maybe_func = False 3028 elif type_token == TokenType.INTERVAL: 3029 unit = self._parse_var() 3030 3031 if not unit: 3032 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3033 else: 3034 value = self.expression(exp.Interval, unit=unit) 3035 3036 if maybe_func and check_func: 3037 index2 = self._index 3038 peek = self._parse_string() 3039 3040 if not peek: 3041 self._retreat(index) 3042 return None 3043 3044 self._retreat(index2) 3045 3046 if value: 3047 return value 3048 3049 return exp.DataType( 3050 this=exp.DataType.Type[type_token.value.upper()], 3051 expressions=expressions, 3052 nested=nested, 3053 values=values, 3054 prefix=prefix, 3055 ) 3056 3057 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3058 this = self._parse_type() or self._parse_id_var() 3059 self._match(TokenType.COLON) 3060 return self._parse_column_def(this) 3061 3062 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3063 if not self._match_text_seq("AT", "TIME", "ZONE"): 3064 return this 3065 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3066 3067 def _parse_column(self) -> t.Optional[exp.Expression]: 3068 this = self._parse_field() 3069 if isinstance(this, exp.Identifier): 3070 this = self.expression(exp.Column, this=this) 3071 elif not this: 3072 return self._parse_bracket(this) 3073 return self._parse_column_ops(this) 3074 3075 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3076 this = self._parse_bracket(this) 3077 3078 while self._match_set(self.COLUMN_OPERATORS): 3079 op_token = self._prev.token_type 3080 op = self.COLUMN_OPERATORS.get(op_token) 3081 3082 if op_token == TokenType.DCOLON: 3083 field = self._parse_types() 3084 if not field: 3085 self.raise_error("Expected type") 3086 elif op and self._curr: 3087 self._advance() 3088 value = self._prev.text 3089 field = ( 3090 exp.Literal.number(value) 3091 if self._prev.token_type == TokenType.NUMBER 3092 else exp.Literal.string(value) 3093 ) 3094 else: 3095 field = self._parse_field(anonymous_func=True, any_token=True) 3096 3097 if isinstance(field, exp.Func): 3098 # bigquery allows function calls like x.y.count(...) 3099 # SAFE.SUBSTR(...) 3100 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3101 this = self._replace_columns_with_dots(this) 3102 3103 if op: 3104 this = op(self, this, field) 3105 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3106 this = self.expression( 3107 exp.Column, 3108 this=field, 3109 table=this.this, 3110 db=this.args.get("table"), 3111 catalog=this.args.get("db"), 3112 ) 3113 else: 3114 this = self.expression(exp.Dot, this=this, expression=field) 3115 this = self._parse_bracket(this) 3116 return this 3117 3118 def _parse_primary(self) -> t.Optional[exp.Expression]: 3119 if self._match_set(self.PRIMARY_PARSERS): 3120 token_type = self._prev.token_type 3121 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3122 3123 if token_type == TokenType.STRING: 3124 expressions = [primary] 3125 while self._match(TokenType.STRING): 3126 expressions.append(exp.Literal.string(self._prev.text)) 3127 3128 if len(expressions) > 1: 3129 return self.expression(exp.Concat, expressions=expressions) 3130 3131 return primary 3132 3133 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3134 return exp.Literal.number(f"0.{self._prev.text}") 3135 3136 if self._match(TokenType.L_PAREN): 3137 comments = self._prev_comments 3138 query = self._parse_select() 3139 3140 if query: 3141 expressions = [query] 3142 else: 3143 expressions = self._parse_csv(self._parse_expression) 3144 3145 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3146 3147 if isinstance(this, exp.Subqueryable): 3148 this = self._parse_set_operations( 3149 self._parse_subquery(this=this, parse_alias=False) 3150 ) 3151 elif len(expressions) > 1: 3152 this = self.expression(exp.Tuple, expressions=expressions) 3153 else: 3154 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3155 3156 if this: 3157 this.add_comments(comments) 3158 3159 self._match_r_paren(expression=this) 3160 return this 3161 3162 return None 3163 3164 def _parse_field( 3165 self, 3166 any_token: bool = False, 3167 tokens: t.Optional[t.Collection[TokenType]] = None, 3168 anonymous_func: bool = False, 3169 ) -> t.Optional[exp.Expression]: 3170 return ( 3171 self._parse_primary() 3172 or self._parse_function(anonymous=anonymous_func) 3173 or self._parse_id_var(any_token=any_token, tokens=tokens) 3174 ) 3175 3176 def _parse_function( 3177 self, 3178 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3179 anonymous: bool = False, 3180 optional_parens: bool = True, 3181 ) -> t.Optional[exp.Expression]: 3182 if not self._curr: 3183 return None 3184 3185 token_type = self._curr.token_type 3186 3187 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3188 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3189 3190 if not self._next or self._next.token_type != TokenType.L_PAREN: 3191 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3192 self._advance() 3193 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3194 3195 return None 3196 3197 if token_type not in self.FUNC_TOKENS: 3198 return None 3199 3200 this = self._curr.text 3201 upper = this.upper() 3202 self._advance(2) 3203 3204 parser = self.FUNCTION_PARSERS.get(upper) 3205 3206 if parser and not anonymous: 3207 this = parser(self) 3208 else: 3209 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3210 3211 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3212 this = self.expression(subquery_predicate, this=self._parse_select()) 3213 self._match_r_paren() 3214 return this 3215 3216 if functions is None: 3217 functions = self.FUNCTIONS 3218 3219 function = functions.get(upper) 3220 3221 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3222 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3223 3224 if function and not anonymous: 3225 this = self.validate_expression(function(args), args) 3226 else: 3227 this = self.expression(exp.Anonymous, this=this, expressions=args) 3228 3229 self._match_r_paren(this) 3230 return self._parse_window(this) 3231 3232 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3233 return self._parse_column_def(self._parse_id_var()) 3234 3235 def _parse_user_defined_function( 3236 self, kind: t.Optional[TokenType] = None 3237 ) -> t.Optional[exp.Expression]: 3238 this = self._parse_id_var() 3239 3240 while self._match(TokenType.DOT): 3241 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3242 3243 if not self._match(TokenType.L_PAREN): 3244 return this 3245 3246 expressions = self._parse_csv(self._parse_function_parameter) 3247 self._match_r_paren() 3248 return self.expression( 3249 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3250 ) 3251 3252 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3253 literal = self._parse_primary() 3254 if literal: 3255 return self.expression(exp.Introducer, this=token.text, expression=literal) 3256 3257 return self.expression(exp.Identifier, this=token.text) 3258 3259 def _parse_session_parameter(self) -> exp.SessionParameter: 3260 kind = None 3261 this = self._parse_id_var() or self._parse_primary() 3262 3263 if this and self._match(TokenType.DOT): 3264 kind = this.name 3265 this = self._parse_var() or self._parse_primary() 3266 3267 return self.expression(exp.SessionParameter, this=this, kind=kind) 3268 3269 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3270 index = self._index 3271 3272 if self._match(TokenType.L_PAREN): 3273 expressions = self._parse_csv(self._parse_id_var) 3274 3275 if not self._match(TokenType.R_PAREN): 3276 self._retreat(index) 3277 else: 3278 expressions = [self._parse_id_var()] 3279 3280 if self._match_set(self.LAMBDAS): 3281 return self.LAMBDAS[self._prev.token_type](self, expressions) 3282 3283 self._retreat(index) 3284 3285 this: t.Optional[exp.Expression] 3286 3287 if self._match(TokenType.DISTINCT): 3288 this = self.expression( 3289 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3290 ) 3291 else: 3292 this = self._parse_select_or_expression(alias=alias) 3293 3294 if isinstance(this, exp.EQ): 3295 left = this.this 3296 if isinstance(left, exp.Column): 3297 left.replace(exp.var(left.text("this"))) 3298 3299 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3300 3301 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3302 index = self._index 3303 3304 if not self.errors: 3305 try: 3306 if self._parse_select(nested=True): 3307 return this 3308 except ParseError: 3309 pass 3310 finally: 3311 self.errors.clear() 3312 self._retreat(index) 3313 3314 if not self._match(TokenType.L_PAREN): 3315 return this 3316 3317 args = self._parse_csv( 3318 lambda: self._parse_constraint() 3319 or self._parse_column_def(self._parse_field(any_token=True)) 3320 ) 3321 3322 self._match_r_paren() 3323 return self.expression(exp.Schema, this=this, expressions=args) 3324 3325 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3326 # column defs are not really columns, they're identifiers 3327 if isinstance(this, exp.Column): 3328 this = this.this 3329 3330 kind = self._parse_types(schema=True) 3331 3332 if self._match_text_seq("FOR", "ORDINALITY"): 3333 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3334 3335 constraints = [] 3336 while True: 3337 constraint = self._parse_column_constraint() 3338 if not constraint: 3339 break 3340 constraints.append(constraint) 3341 3342 if not kind and not constraints: 3343 return this 3344 3345 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3346 3347 def _parse_auto_increment( 3348 self, 3349 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3350 start = None 3351 increment = None 3352 3353 if self._match(TokenType.L_PAREN, advance=False): 3354 args = self._parse_wrapped_csv(self._parse_bitwise) 3355 start = seq_get(args, 0) 3356 increment = seq_get(args, 1) 3357 elif self._match_text_seq("START"): 3358 start = self._parse_bitwise() 3359 self._match_text_seq("INCREMENT") 3360 increment = self._parse_bitwise() 3361 3362 if start and increment: 3363 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3364 3365 return exp.AutoIncrementColumnConstraint() 3366 3367 def _parse_compress(self) -> exp.CompressColumnConstraint: 3368 if self._match(TokenType.L_PAREN, advance=False): 3369 return self.expression( 3370 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3371 ) 3372 3373 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3374 3375 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3376 if self._match_text_seq("BY", "DEFAULT"): 3377 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3378 this = self.expression( 3379 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3380 ) 3381 else: 3382 self._match_text_seq("ALWAYS") 3383 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3384 3385 self._match(TokenType.ALIAS) 3386 identity = self._match_text_seq("IDENTITY") 3387 3388 if self._match(TokenType.L_PAREN): 3389 if self._match_text_seq("START", "WITH"): 3390 this.set("start", self._parse_bitwise()) 3391 if self._match_text_seq("INCREMENT", "BY"): 3392 this.set("increment", self._parse_bitwise()) 3393 if self._match_text_seq("MINVALUE"): 3394 this.set("minvalue", self._parse_bitwise()) 3395 if self._match_text_seq("MAXVALUE"): 3396 this.set("maxvalue", self._parse_bitwise()) 3397 3398 if self._match_text_seq("CYCLE"): 3399 this.set("cycle", True) 3400 elif self._match_text_seq("NO", "CYCLE"): 3401 this.set("cycle", False) 3402 3403 if not identity: 3404 this.set("expression", self._parse_bitwise()) 3405 3406 self._match_r_paren() 3407 3408 return this 3409 3410 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3411 self._match_text_seq("LENGTH") 3412 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3413 3414 def _parse_not_constraint( 3415 self, 3416 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3417 if self._match_text_seq("NULL"): 3418 return self.expression(exp.NotNullColumnConstraint) 3419 if self._match_text_seq("CASESPECIFIC"): 3420 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3421 return None 3422 3423 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3424 if self._match(TokenType.CONSTRAINT): 3425 this = self._parse_id_var() 3426 else: 3427 this = None 3428 3429 if self._match_texts(self.CONSTRAINT_PARSERS): 3430 return self.expression( 3431 exp.ColumnConstraint, 3432 this=this, 3433 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3434 ) 3435 3436 return this 3437 3438 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3439 if not self._match(TokenType.CONSTRAINT): 3440 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3441 3442 this = self._parse_id_var() 3443 expressions = [] 3444 3445 while True: 3446 constraint = self._parse_unnamed_constraint() or self._parse_function() 3447 if not constraint: 3448 break 3449 expressions.append(constraint) 3450 3451 return self.expression(exp.Constraint, this=this, expressions=expressions) 3452 3453 def _parse_unnamed_constraint( 3454 self, constraints: t.Optional[t.Collection[str]] = None 3455 ) -> t.Optional[exp.Expression]: 3456 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3457 return None 3458 3459 constraint = self._prev.text.upper() 3460 if constraint not in self.CONSTRAINT_PARSERS: 3461 self.raise_error(f"No parser found for schema constraint {constraint}.") 3462 3463 return self.CONSTRAINT_PARSERS[constraint](self) 3464 3465 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3466 self._match_text_seq("KEY") 3467 return self.expression( 3468 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3469 ) 3470 3471 def _parse_key_constraint_options(self) -> t.List[str]: 3472 options = [] 3473 while True: 3474 if not self._curr: 3475 break 3476 3477 if self._match(TokenType.ON): 3478 action = None 3479 on = self._advance_any() and self._prev.text 3480 3481 if self._match_text_seq("NO", "ACTION"): 3482 action = "NO ACTION" 3483 elif self._match_text_seq("CASCADE"): 3484 action = "CASCADE" 3485 elif self._match_pair(TokenType.SET, TokenType.NULL): 3486 action = "SET NULL" 3487 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3488 action = "SET DEFAULT" 3489 else: 3490 self.raise_error("Invalid key constraint") 3491 3492 options.append(f"ON {on} {action}") 3493 elif self._match_text_seq("NOT", "ENFORCED"): 3494 options.append("NOT ENFORCED") 3495 elif self._match_text_seq("DEFERRABLE"): 3496 options.append("DEFERRABLE") 3497 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3498 options.append("INITIALLY DEFERRED") 3499 elif self._match_text_seq("NORELY"): 3500 options.append("NORELY") 3501 elif self._match_text_seq("MATCH", "FULL"): 3502 options.append("MATCH FULL") 3503 else: 3504 break 3505 3506 return options 3507 3508 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3509 if match and not self._match(TokenType.REFERENCES): 3510 return None 3511 3512 expressions = None 3513 this = self._parse_id_var() 3514 3515 if self._match(TokenType.L_PAREN, advance=False): 3516 expressions = self._parse_wrapped_id_vars() 3517 3518 options = self._parse_key_constraint_options() 3519 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3520 3521 def _parse_foreign_key(self) -> exp.ForeignKey: 3522 expressions = self._parse_wrapped_id_vars() 3523 reference = self._parse_references() 3524 options = {} 3525 3526 while self._match(TokenType.ON): 3527 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3528 self.raise_error("Expected DELETE or UPDATE") 3529 3530 kind = self._prev.text.lower() 3531 3532 if self._match_text_seq("NO", "ACTION"): 3533 action = "NO ACTION" 3534 elif self._match(TokenType.SET): 3535 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3536 action = "SET " + self._prev.text.upper() 3537 else: 3538 self._advance() 3539 action = self._prev.text.upper() 3540 3541 options[kind] = action 3542 3543 return self.expression( 3544 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3545 ) 3546 3547 def _parse_primary_key( 3548 self, wrapped_optional: bool = False, in_props: bool = False 3549 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3550 desc = ( 3551 self._match_set((TokenType.ASC, TokenType.DESC)) 3552 and self._prev.token_type == TokenType.DESC 3553 ) 3554 3555 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3556 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3557 3558 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3559 options = self._parse_key_constraint_options() 3560 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3561 3562 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3563 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3564 return this 3565 3566 bracket_kind = self._prev.token_type 3567 3568 if self._match(TokenType.COLON): 3569 expressions: t.List[t.Optional[exp.Expression]] = [ 3570 self.expression(exp.Slice, expression=self._parse_conjunction()) 3571 ] 3572 else: 3573 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3574 3575 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3576 if bracket_kind == TokenType.L_BRACE: 3577 this = self.expression(exp.Struct, expressions=expressions) 3578 elif not this or this.name.upper() == "ARRAY": 3579 this = self.expression(exp.Array, expressions=expressions) 3580 else: 3581 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3582 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3583 3584 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3585 self.raise_error("Expected ]") 3586 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3587 self.raise_error("Expected }") 3588 3589 self._add_comments(this) 3590 return self._parse_bracket(this) 3591 3592 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3593 if self._match(TokenType.COLON): 3594 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3595 return this 3596 3597 def _parse_case(self) -> t.Optional[exp.Expression]: 3598 ifs = [] 3599 default = None 3600 3601 expression = self._parse_conjunction() 3602 3603 while self._match(TokenType.WHEN): 3604 this = self._parse_conjunction() 3605 self._match(TokenType.THEN) 3606 then = self._parse_conjunction() 3607 ifs.append(self.expression(exp.If, this=this, true=then)) 3608 3609 if self._match(TokenType.ELSE): 3610 default = self._parse_conjunction() 3611 3612 if not self._match(TokenType.END): 3613 self.raise_error("Expected END after CASE", self._prev) 3614 3615 return self._parse_window( 3616 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3617 ) 3618 3619 def _parse_if(self) -> t.Optional[exp.Expression]: 3620 if self._match(TokenType.L_PAREN): 3621 args = self._parse_csv(self._parse_conjunction) 3622 this = self.validate_expression(exp.If.from_arg_list(args), args) 3623 self._match_r_paren() 3624 else: 3625 index = self._index - 1 3626 condition = self._parse_conjunction() 3627 3628 if not condition: 3629 self._retreat(index) 3630 return None 3631 3632 self._match(TokenType.THEN) 3633 true = self._parse_conjunction() 3634 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3635 self._match(TokenType.END) 3636 this = self.expression(exp.If, this=condition, true=true, false=false) 3637 3638 return self._parse_window(this) 3639 3640 def _parse_extract(self) -> exp.Extract: 3641 this = self._parse_function() or self._parse_var() or self._parse_type() 3642 3643 if self._match(TokenType.FROM): 3644 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3645 3646 if not self._match(TokenType.COMMA): 3647 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3648 3649 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3650 3651 def _parse_cast(self, strict: bool) -> exp.Expression: 3652 this = self._parse_conjunction() 3653 3654 if not self._match(TokenType.ALIAS): 3655 if self._match(TokenType.COMMA): 3656 return self.expression( 3657 exp.CastToStrType, this=this, expression=self._parse_string() 3658 ) 3659 else: 3660 self.raise_error("Expected AS after CAST") 3661 3662 to = self._parse_types() 3663 3664 if not to: 3665 self.raise_error("Expected TYPE after CAST") 3666 elif to.this == exp.DataType.Type.CHAR: 3667 if self._match(TokenType.CHARACTER_SET): 3668 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3669 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3670 fmt = self._parse_string() 3671 3672 return self.expression( 3673 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3674 this=this, 3675 format=exp.Literal.string( 3676 format_time( 3677 fmt.this if fmt else "", 3678 self.FORMAT_MAPPING or self.TIME_MAPPING, 3679 self.FORMAT_TRIE or self.TIME_TRIE, 3680 ) 3681 ), 3682 ) 3683 3684 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3685 3686 def _parse_concat(self) -> t.Optional[exp.Expression]: 3687 args = self._parse_csv(self._parse_conjunction) 3688 if self.CONCAT_NULL_OUTPUTS_STRING: 3689 args = [ 3690 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3691 for arg in args 3692 if arg 3693 ] 3694 3695 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3696 # we find such a call we replace it with its argument. 3697 if len(args) == 1: 3698 return args[0] 3699 3700 return self.expression( 3701 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3702 ) 3703 3704 def _parse_string_agg(self) -> exp.Expression: 3705 expression: t.Optional[exp.Expression] 3706 3707 if self._match(TokenType.DISTINCT): 3708 args = self._parse_csv(self._parse_conjunction) 3709 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3710 else: 3711 args = self._parse_csv(self._parse_conjunction) 3712 expression = seq_get(args, 0) 3713 3714 index = self._index 3715 if not self._match(TokenType.R_PAREN): 3716 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3717 order = self._parse_order(this=expression) 3718 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3719 3720 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3721 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3722 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3723 if not self._match_text_seq("WITHIN", "GROUP"): 3724 self._retreat(index) 3725 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3726 3727 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3728 order = self._parse_order(this=expression) 3729 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3730 3731 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3732 to: t.Optional[exp.Expression] 3733 this = self._parse_bitwise() 3734 3735 if self._match(TokenType.USING): 3736 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3737 elif self._match(TokenType.COMMA): 3738 to = self._parse_bitwise() 3739 else: 3740 to = None 3741 3742 # Swap the argument order if needed to produce the correct AST 3743 if self.CONVERT_TYPE_FIRST: 3744 this, to = to, this 3745 3746 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3747 3748 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3749 """ 3750 There are generally two variants of the DECODE function: 3751 3752 - DECODE(bin, charset) 3753 - DECODE(expression, search, result [, search, result] ... [, default]) 3754 3755 The second variant will always be parsed into a CASE expression. Note that NULL 3756 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3757 instead of relying on pattern matching. 3758 """ 3759 args = self._parse_csv(self._parse_conjunction) 3760 3761 if len(args) < 3: 3762 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3763 3764 expression, *expressions = args 3765 if not expression: 3766 return None 3767 3768 ifs = [] 3769 for search, result in zip(expressions[::2], expressions[1::2]): 3770 if not search or not result: 3771 return None 3772 3773 if isinstance(search, exp.Literal): 3774 ifs.append( 3775 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3776 ) 3777 elif isinstance(search, exp.Null): 3778 ifs.append( 3779 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3780 ) 3781 else: 3782 cond = exp.or_( 3783 exp.EQ(this=expression.copy(), expression=search), 3784 exp.and_( 3785 exp.Is(this=expression.copy(), expression=exp.Null()), 3786 exp.Is(this=search.copy(), expression=exp.Null()), 3787 copy=False, 3788 ), 3789 copy=False, 3790 ) 3791 ifs.append(exp.If(this=cond, true=result)) 3792 3793 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3794 3795 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3796 self._match_text_seq("KEY") 3797 key = self._parse_field() 3798 self._match(TokenType.COLON) 3799 self._match_text_seq("VALUE") 3800 value = self._parse_field() 3801 3802 if not key and not value: 3803 return None 3804 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3805 3806 def _parse_json_object(self) -> exp.JSONObject: 3807 star = self._parse_star() 3808 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3809 3810 null_handling = None 3811 if self._match_text_seq("NULL", "ON", "NULL"): 3812 null_handling = "NULL ON NULL" 3813 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3814 null_handling = "ABSENT ON NULL" 3815 3816 unique_keys = None 3817 if self._match_text_seq("WITH", "UNIQUE"): 3818 unique_keys = True 3819 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3820 unique_keys = False 3821 3822 self._match_text_seq("KEYS") 3823 3824 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3825 format_json = self._match_text_seq("FORMAT", "JSON") 3826 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3827 3828 return self.expression( 3829 exp.JSONObject, 3830 expressions=expressions, 3831 null_handling=null_handling, 3832 unique_keys=unique_keys, 3833 return_type=return_type, 3834 format_json=format_json, 3835 encoding=encoding, 3836 ) 3837 3838 def _parse_logarithm(self) -> exp.Func: 3839 # Default argument order is base, expression 3840 args = self._parse_csv(self._parse_range) 3841 3842 if len(args) > 1: 3843 if not self.LOG_BASE_FIRST: 3844 args.reverse() 3845 return exp.Log.from_arg_list(args) 3846 3847 return self.expression( 3848 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3849 ) 3850 3851 def _parse_match_against(self) -> exp.MatchAgainst: 3852 expressions = self._parse_csv(self._parse_column) 3853 3854 self._match_text_seq(")", "AGAINST", "(") 3855 3856 this = self._parse_string() 3857 3858 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3859 modifier = "IN NATURAL LANGUAGE MODE" 3860 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3861 modifier = f"{modifier} WITH QUERY EXPANSION" 3862 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3863 modifier = "IN BOOLEAN MODE" 3864 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3865 modifier = "WITH QUERY EXPANSION" 3866 else: 3867 modifier = None 3868 3869 return self.expression( 3870 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3871 ) 3872 3873 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3874 def _parse_open_json(self) -> exp.OpenJSON: 3875 this = self._parse_bitwise() 3876 path = self._match(TokenType.COMMA) and self._parse_string() 3877 3878 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3879 this = self._parse_field(any_token=True) 3880 kind = self._parse_types() 3881 path = self._parse_string() 3882 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3883 3884 return self.expression( 3885 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3886 ) 3887 3888 expressions = None 3889 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3890 self._match_l_paren() 3891 expressions = self._parse_csv(_parse_open_json_column_def) 3892 3893 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3894 3895 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3896 args = self._parse_csv(self._parse_bitwise) 3897 3898 if self._match(TokenType.IN): 3899 return self.expression( 3900 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3901 ) 3902 3903 if haystack_first: 3904 haystack = seq_get(args, 0) 3905 needle = seq_get(args, 1) 3906 else: 3907 needle = seq_get(args, 0) 3908 haystack = seq_get(args, 1) 3909 3910 return self.expression( 3911 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3912 ) 3913 3914 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3915 args = self._parse_csv(self._parse_table) 3916 return exp.JoinHint(this=func_name.upper(), expressions=args) 3917 3918 def _parse_substring(self) -> exp.Substring: 3919 # Postgres supports the form: substring(string [from int] [for int]) 3920 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3921 3922 args = self._parse_csv(self._parse_bitwise) 3923 3924 if self._match(TokenType.FROM): 3925 args.append(self._parse_bitwise()) 3926 if self._match(TokenType.FOR): 3927 args.append(self._parse_bitwise()) 3928 3929 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3930 3931 def _parse_trim(self) -> exp.Trim: 3932 # https://www.w3resource.com/sql/character-functions/trim.php 3933 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3934 3935 position = None 3936 collation = None 3937 3938 if self._match_texts(self.TRIM_TYPES): 3939 position = self._prev.text.upper() 3940 3941 expression = self._parse_bitwise() 3942 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3943 this = self._parse_bitwise() 3944 else: 3945 this = expression 3946 expression = None 3947 3948 if self._match(TokenType.COLLATE): 3949 collation = self._parse_bitwise() 3950 3951 return self.expression( 3952 exp.Trim, this=this, position=position, expression=expression, collation=collation 3953 ) 3954 3955 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3956 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3957 3958 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3959 return self._parse_window(self._parse_id_var(), alias=True) 3960 3961 def _parse_respect_or_ignore_nulls( 3962 self, this: t.Optional[exp.Expression] 3963 ) -> t.Optional[exp.Expression]: 3964 if self._match_text_seq("IGNORE", "NULLS"): 3965 return self.expression(exp.IgnoreNulls, this=this) 3966 if self._match_text_seq("RESPECT", "NULLS"): 3967 return self.expression(exp.RespectNulls, this=this) 3968 return this 3969 3970 def _parse_window( 3971 self, this: t.Optional[exp.Expression], alias: bool = False 3972 ) -> t.Optional[exp.Expression]: 3973 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3974 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3975 self._match_r_paren() 3976 3977 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3978 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3979 if self._match_text_seq("WITHIN", "GROUP"): 3980 order = self._parse_wrapped(self._parse_order) 3981 this = self.expression(exp.WithinGroup, this=this, expression=order) 3982 3983 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3984 # Some dialects choose to implement and some do not. 3985 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3986 3987 # There is some code above in _parse_lambda that handles 3988 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3989 3990 # The below changes handle 3991 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3992 3993 # Oracle allows both formats 3994 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3995 # and Snowflake chose to do the same for familiarity 3996 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3997 this = self._parse_respect_or_ignore_nulls(this) 3998 3999 # bigquery select from window x AS (partition by ...) 4000 if alias: 4001 over = None 4002 self._match(TokenType.ALIAS) 4003 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4004 return this 4005 else: 4006 over = self._prev.text.upper() 4007 4008 if not self._match(TokenType.L_PAREN): 4009 return self.expression( 4010 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4011 ) 4012 4013 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4014 4015 first = self._match(TokenType.FIRST) 4016 if self._match_text_seq("LAST"): 4017 first = False 4018 4019 partition = self._parse_partition_by() 4020 order = self._parse_order() 4021 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4022 4023 if kind: 4024 self._match(TokenType.BETWEEN) 4025 start = self._parse_window_spec() 4026 self._match(TokenType.AND) 4027 end = self._parse_window_spec() 4028 4029 spec = self.expression( 4030 exp.WindowSpec, 4031 kind=kind, 4032 start=start["value"], 4033 start_side=start["side"], 4034 end=end["value"], 4035 end_side=end["side"], 4036 ) 4037 else: 4038 spec = None 4039 4040 self._match_r_paren() 4041 4042 return self.expression( 4043 exp.Window, 4044 this=this, 4045 partition_by=partition, 4046 order=order, 4047 spec=spec, 4048 alias=window_alias, 4049 over=over, 4050 first=first, 4051 ) 4052 4053 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4054 self._match(TokenType.BETWEEN) 4055 4056 return { 4057 "value": ( 4058 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4059 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4060 or self._parse_bitwise() 4061 ), 4062 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4063 } 4064 4065 def _parse_alias( 4066 self, this: t.Optional[exp.Expression], explicit: bool = False 4067 ) -> t.Optional[exp.Expression]: 4068 any_token = self._match(TokenType.ALIAS) 4069 4070 if explicit and not any_token: 4071 return this 4072 4073 if self._match(TokenType.L_PAREN): 4074 aliases = self.expression( 4075 exp.Aliases, 4076 this=this, 4077 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4078 ) 4079 self._match_r_paren(aliases) 4080 return aliases 4081 4082 alias = self._parse_id_var(any_token) 4083 4084 if alias: 4085 return self.expression(exp.Alias, this=this, alias=alias) 4086 4087 return this 4088 4089 def _parse_id_var( 4090 self, 4091 any_token: bool = True, 4092 tokens: t.Optional[t.Collection[TokenType]] = None, 4093 ) -> t.Optional[exp.Expression]: 4094 identifier = self._parse_identifier() 4095 4096 if identifier: 4097 return identifier 4098 4099 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4100 quoted = self._prev.token_type == TokenType.STRING 4101 return exp.Identifier(this=self._prev.text, quoted=quoted) 4102 4103 return None 4104 4105 def _parse_string(self) -> t.Optional[exp.Expression]: 4106 if self._match(TokenType.STRING): 4107 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4108 return self._parse_placeholder() 4109 4110 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4111 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4112 4113 def _parse_number(self) -> t.Optional[exp.Expression]: 4114 if self._match(TokenType.NUMBER): 4115 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4116 return self._parse_placeholder() 4117 4118 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4119 if self._match(TokenType.IDENTIFIER): 4120 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4121 return self._parse_placeholder() 4122 4123 def _parse_var( 4124 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4125 ) -> t.Optional[exp.Expression]: 4126 if ( 4127 (any_token and self._advance_any()) 4128 or self._match(TokenType.VAR) 4129 or (self._match_set(tokens) if tokens else False) 4130 ): 4131 return self.expression(exp.Var, this=self._prev.text) 4132 return self._parse_placeholder() 4133 4134 def _advance_any(self) -> t.Optional[Token]: 4135 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4136 self._advance() 4137 return self._prev 4138 return None 4139 4140 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4141 return self._parse_var() or self._parse_string() 4142 4143 def _parse_null(self) -> t.Optional[exp.Expression]: 4144 if self._match(TokenType.NULL): 4145 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4146 return None 4147 4148 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4149 if self._match(TokenType.TRUE): 4150 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4151 if self._match(TokenType.FALSE): 4152 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4153 return None 4154 4155 def _parse_star(self) -> t.Optional[exp.Expression]: 4156 if self._match(TokenType.STAR): 4157 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4158 return None 4159 4160 def _parse_parameter(self) -> exp.Parameter: 4161 wrapped = self._match(TokenType.L_BRACE) 4162 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4163 self._match(TokenType.R_BRACE) 4164 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4165 4166 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4167 if self._match_set(self.PLACEHOLDER_PARSERS): 4168 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4169 if placeholder: 4170 return placeholder 4171 self._advance(-1) 4172 return None 4173 4174 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4175 if not self._match(TokenType.EXCEPT): 4176 return None 4177 if self._match(TokenType.L_PAREN, advance=False): 4178 return self._parse_wrapped_csv(self._parse_column) 4179 return self._parse_csv(self._parse_column) 4180 4181 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4182 if not self._match(TokenType.REPLACE): 4183 return None 4184 if self._match(TokenType.L_PAREN, advance=False): 4185 return self._parse_wrapped_csv(self._parse_expression) 4186 return self._parse_csv(self._parse_expression) 4187 4188 def _parse_csv( 4189 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4190 ) -> t.List[t.Optional[exp.Expression]]: 4191 parse_result = parse_method() 4192 items = [parse_result] if parse_result is not None else [] 4193 4194 while self._match(sep): 4195 self._add_comments(parse_result) 4196 parse_result = parse_method() 4197 if parse_result is not None: 4198 items.append(parse_result) 4199 4200 return items 4201 4202 def _parse_tokens( 4203 self, parse_method: t.Callable, expressions: t.Dict 4204 ) -> t.Optional[exp.Expression]: 4205 this = parse_method() 4206 4207 while self._match_set(expressions): 4208 this = self.expression( 4209 expressions[self._prev.token_type], 4210 this=this, 4211 comments=self._prev_comments, 4212 expression=parse_method(), 4213 ) 4214 4215 return this 4216 4217 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4218 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4219 4220 def _parse_wrapped_csv( 4221 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4222 ) -> t.List[t.Optional[exp.Expression]]: 4223 return self._parse_wrapped( 4224 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4225 ) 4226 4227 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4228 wrapped = self._match(TokenType.L_PAREN) 4229 if not wrapped and not optional: 4230 self.raise_error("Expecting (") 4231 parse_result = parse_method() 4232 if wrapped: 4233 self._match_r_paren() 4234 return parse_result 4235 4236 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4237 return self._parse_select() or self._parse_set_operations( 4238 self._parse_expression() if alias else self._parse_conjunction() 4239 ) 4240 4241 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4242 return self._parse_query_modifiers( 4243 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4244 ) 4245 4246 def _parse_transaction(self) -> exp.Transaction: 4247 this = None 4248 if self._match_texts(self.TRANSACTION_KIND): 4249 this = self._prev.text 4250 4251 self._match_texts({"TRANSACTION", "WORK"}) 4252 4253 modes = [] 4254 while True: 4255 mode = [] 4256 while self._match(TokenType.VAR): 4257 mode.append(self._prev.text) 4258 4259 if mode: 4260 modes.append(" ".join(mode)) 4261 if not self._match(TokenType.COMMA): 4262 break 4263 4264 return self.expression(exp.Transaction, this=this, modes=modes) 4265 4266 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4267 chain = None 4268 savepoint = None 4269 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4270 4271 self._match_texts({"TRANSACTION", "WORK"}) 4272 4273 if self._match_text_seq("TO"): 4274 self._match_text_seq("SAVEPOINT") 4275 savepoint = self._parse_id_var() 4276 4277 if self._match(TokenType.AND): 4278 chain = not self._match_text_seq("NO") 4279 self._match_text_seq("CHAIN") 4280 4281 if is_rollback: 4282 return self.expression(exp.Rollback, savepoint=savepoint) 4283 4284 return self.expression(exp.Commit, chain=chain) 4285 4286 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4287 if not self._match_text_seq("ADD"): 4288 return None 4289 4290 self._match(TokenType.COLUMN) 4291 exists_column = self._parse_exists(not_=True) 4292 expression = self._parse_column_def(self._parse_field(any_token=True)) 4293 4294 if expression: 4295 expression.set("exists", exists_column) 4296 4297 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4298 if self._match_texts(("FIRST", "AFTER")): 4299 position = self._prev.text 4300 column_position = self.expression( 4301 exp.ColumnPosition, this=self._parse_column(), position=position 4302 ) 4303 expression.set("position", column_position) 4304 4305 return expression 4306 4307 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4308 drop = self._match(TokenType.DROP) and self._parse_drop() 4309 if drop and not isinstance(drop, exp.Command): 4310 drop.set("kind", drop.args.get("kind", "COLUMN")) 4311 return drop 4312 4313 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4314 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4315 return self.expression( 4316 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4317 ) 4318 4319 def _parse_add_constraint(self) -> exp.AddConstraint: 4320 this = None 4321 kind = self._prev.token_type 4322 4323 if kind == TokenType.CONSTRAINT: 4324 this = self._parse_id_var() 4325 4326 if self._match_text_seq("CHECK"): 4327 expression = self._parse_wrapped(self._parse_conjunction) 4328 enforced = self._match_text_seq("ENFORCED") 4329 4330 return self.expression( 4331 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4332 ) 4333 4334 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4335 expression = self._parse_foreign_key() 4336 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4337 expression = self._parse_primary_key() 4338 else: 4339 expression = None 4340 4341 return self.expression(exp.AddConstraint, this=this, expression=expression) 4342 4343 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4344 index = self._index - 1 4345 4346 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4347 return self._parse_csv(self._parse_add_constraint) 4348 4349 self._retreat(index) 4350 return self._parse_csv(self._parse_add_column) 4351 4352 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4353 self._match(TokenType.COLUMN) 4354 column = self._parse_field(any_token=True) 4355 4356 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4357 return self.expression(exp.AlterColumn, this=column, drop=True) 4358 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4359 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4360 4361 self._match_text_seq("SET", "DATA") 4362 return self.expression( 4363 exp.AlterColumn, 4364 this=column, 4365 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4366 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4367 using=self._match(TokenType.USING) and self._parse_conjunction(), 4368 ) 4369 4370 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4371 index = self._index - 1 4372 4373 partition_exists = self._parse_exists() 4374 if self._match(TokenType.PARTITION, advance=False): 4375 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4376 4377 self._retreat(index) 4378 return self._parse_csv(self._parse_drop_column) 4379 4380 def _parse_alter_table_rename(self) -> exp.RenameTable: 4381 self._match_text_seq("TO") 4382 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4383 4384 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4385 start = self._prev 4386 4387 if not self._match(TokenType.TABLE): 4388 return self._parse_as_command(start) 4389 4390 exists = self._parse_exists() 4391 this = self._parse_table(schema=True) 4392 4393 if self._next: 4394 self._advance() 4395 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4396 4397 if parser: 4398 actions = ensure_list(parser(self)) 4399 4400 if not self._curr: 4401 return self.expression( 4402 exp.AlterTable, 4403 this=this, 4404 exists=exists, 4405 actions=actions, 4406 ) 4407 return self._parse_as_command(start) 4408 4409 def _parse_merge(self) -> exp.Merge: 4410 self._match(TokenType.INTO) 4411 target = self._parse_table() 4412 4413 self._match(TokenType.USING) 4414 using = self._parse_table() 4415 4416 self._match(TokenType.ON) 4417 on = self._parse_conjunction() 4418 4419 whens = [] 4420 while self._match(TokenType.WHEN): 4421 matched = not self._match(TokenType.NOT) 4422 self._match_text_seq("MATCHED") 4423 source = ( 4424 False 4425 if self._match_text_seq("BY", "TARGET") 4426 else self._match_text_seq("BY", "SOURCE") 4427 ) 4428 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4429 4430 self._match(TokenType.THEN) 4431 4432 if self._match(TokenType.INSERT): 4433 _this = self._parse_star() 4434 if _this: 4435 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4436 else: 4437 then = self.expression( 4438 exp.Insert, 4439 this=self._parse_value(), 4440 expression=self._match(TokenType.VALUES) and self._parse_value(), 4441 ) 4442 elif self._match(TokenType.UPDATE): 4443 expressions = self._parse_star() 4444 if expressions: 4445 then = self.expression(exp.Update, expressions=expressions) 4446 else: 4447 then = self.expression( 4448 exp.Update, 4449 expressions=self._match(TokenType.SET) 4450 and self._parse_csv(self._parse_equality), 4451 ) 4452 elif self._match(TokenType.DELETE): 4453 then = self.expression(exp.Var, this=self._prev.text) 4454 else: 4455 then = None 4456 4457 whens.append( 4458 self.expression( 4459 exp.When, 4460 matched=matched, 4461 source=source, 4462 condition=condition, 4463 then=then, 4464 ) 4465 ) 4466 4467 return self.expression( 4468 exp.Merge, 4469 this=target, 4470 using=using, 4471 on=on, 4472 expressions=whens, 4473 ) 4474 4475 def _parse_show(self) -> t.Optional[exp.Expression]: 4476 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4477 if parser: 4478 return parser(self) 4479 self._advance() 4480 return self.expression(exp.Show, this=self._prev.text.upper()) 4481 4482 def _parse_set_item_assignment( 4483 self, kind: t.Optional[str] = None 4484 ) -> t.Optional[exp.Expression]: 4485 index = self._index 4486 4487 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4488 return self._parse_set_transaction(global_=kind == "GLOBAL") 4489 4490 left = self._parse_primary() or self._parse_id_var() 4491 4492 if not self._match_texts(("=", "TO")): 4493 self._retreat(index) 4494 return None 4495 4496 right = self._parse_statement() or self._parse_id_var() 4497 this = self.expression(exp.EQ, this=left, expression=right) 4498 4499 return self.expression(exp.SetItem, this=this, kind=kind) 4500 4501 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4502 self._match_text_seq("TRANSACTION") 4503 characteristics = self._parse_csv( 4504 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4505 ) 4506 return self.expression( 4507 exp.SetItem, 4508 expressions=characteristics, 4509 kind="TRANSACTION", 4510 **{"global": global_}, # type: ignore 4511 ) 4512 4513 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4514 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4515 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4516 4517 def _parse_set(self) -> exp.Set | exp.Command: 4518 index = self._index 4519 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4520 4521 if self._curr: 4522 self._retreat(index) 4523 return self._parse_as_command(self._prev) 4524 4525 return set_ 4526 4527 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4528 for option in options: 4529 if self._match_text_seq(*option.split(" ")): 4530 return exp.var(option) 4531 return None 4532 4533 def _parse_as_command(self, start: Token) -> exp.Command: 4534 while self._curr: 4535 self._advance() 4536 text = self._find_sql(start, self._prev) 4537 size = len(start.text) 4538 return exp.Command(this=text[:size], expression=text[size:]) 4539 4540 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4541 settings = [] 4542 4543 self._match_l_paren() 4544 kind = self._parse_id_var() 4545 4546 if self._match(TokenType.L_PAREN): 4547 while True: 4548 key = self._parse_id_var() 4549 value = self._parse_primary() 4550 4551 if not key and value is None: 4552 break 4553 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4554 self._match(TokenType.R_PAREN) 4555 4556 self._match_r_paren() 4557 4558 return self.expression( 4559 exp.DictProperty, 4560 this=this, 4561 kind=kind.this if kind else None, 4562 settings=settings, 4563 ) 4564 4565 def _parse_dict_range(self, this: str) -> exp.DictRange: 4566 self._match_l_paren() 4567 has_min = self._match_text_seq("MIN") 4568 if has_min: 4569 min = self._parse_var() or self._parse_primary() 4570 self._match_text_seq("MAX") 4571 max = self._parse_var() or self._parse_primary() 4572 else: 4573 max = self._parse_var() or self._parse_primary() 4574 min = exp.Literal.number(0) 4575 self._match_r_paren() 4576 return self.expression(exp.DictRange, this=this, min=min, max=max) 4577 4578 def _find_parser( 4579 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4580 ) -> t.Optional[t.Callable]: 4581 if not self._curr: 4582 return None 4583 4584 index = self._index 4585 this = [] 4586 while True: 4587 # The current token might be multiple words 4588 curr = self._curr.text.upper() 4589 key = curr.split(" ") 4590 this.append(curr) 4591 4592 self._advance() 4593 result, trie = in_trie(trie, key) 4594 if result == TrieResult.FAILED: 4595 break 4596 4597 if result == TrieResult.EXISTS: 4598 subparser = parsers[" ".join(this)] 4599 return subparser 4600 4601 self._retreat(index) 4602 return None 4603 4604 def _match(self, token_type, advance=True, expression=None): 4605 if not self._curr: 4606 return None 4607 4608 if self._curr.token_type == token_type: 4609 if advance: 4610 self._advance() 4611 self._add_comments(expression) 4612 return True 4613 4614 return None 4615 4616 def _match_set(self, types, advance=True): 4617 if not self._curr: 4618 return None 4619 4620 if self._curr.token_type in types: 4621 if advance: 4622 self._advance() 4623 return True 4624 4625 return None 4626 4627 def _match_pair(self, token_type_a, token_type_b, advance=True): 4628 if not self._curr or not self._next: 4629 return None 4630 4631 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4632 if advance: 4633 self._advance(2) 4634 return True 4635 4636 return None 4637 4638 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4639 if not self._match(TokenType.L_PAREN, expression=expression): 4640 self.raise_error("Expecting (") 4641 4642 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4643 if not self._match(TokenType.R_PAREN, expression=expression): 4644 self.raise_error("Expecting )") 4645 4646 def _match_texts(self, texts, advance=True): 4647 if self._curr and self._curr.text.upper() in texts: 4648 if advance: 4649 self._advance() 4650 return True 4651 return False 4652 4653 def _match_text_seq(self, *texts, advance=True): 4654 index = self._index 4655 for text in texts: 4656 if self._curr and self._curr.text.upper() == text: 4657 self._advance() 4658 else: 4659 self._retreat(index) 4660 return False 4661 4662 if not advance: 4663 self._retreat(index) 4664 4665 return True 4666 4667 @t.overload 4668 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4669 ... 4670 4671 @t.overload 4672 def _replace_columns_with_dots( 4673 self, this: t.Optional[exp.Expression] 4674 ) -> t.Optional[exp.Expression]: 4675 ... 4676 4677 def _replace_columns_with_dots(self, this): 4678 if isinstance(this, exp.Dot): 4679 exp.replace_children(this, self._replace_columns_with_dots) 4680 elif isinstance(this, exp.Column): 4681 exp.replace_children(this, self._replace_columns_with_dots) 4682 table = this.args.get("table") 4683 this = ( 4684 self.expression(exp.Dot, this=table, expression=this.this) 4685 if table 4686 else self.expression(exp.Var, this=this.name) 4687 ) 4688 elif isinstance(this, exp.Identifier): 4689 this = self.expression(exp.Var, this=this.name) 4690 4691 return this 4692 4693 def _replace_lambda( 4694 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4695 ) -> t.Optional[exp.Expression]: 4696 if not node: 4697 return node 4698 4699 for column in node.find_all(exp.Column): 4700 if column.parts[0].name in lambda_variables: 4701 dot_or_id = column.to_dot() if column.table else column.this 4702 parent = column.parent 4703 4704 while isinstance(parent, exp.Dot): 4705 if not isinstance(parent.parent, exp.Dot): 4706 parent.replace(dot_or_id) 4707 break 4708 parent = parent.parent 4709 else: 4710 if column is node: 4711 node = dot_or_id 4712 else: 4713 column.replace(dot_or_id) 4714 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 721 "CONCAT": lambda self: self._parse_concat(), 722 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 723 "DECODE": lambda self: self._parse_decode(), 724 "EXTRACT": lambda self: self._parse_extract(), 725 "JSON_OBJECT": lambda self: self._parse_json_object(), 726 "LOG": lambda self: self._parse_logarithm(), 727 "MATCH": lambda self: self._parse_match_against(), 728 "OPENJSON": lambda self: self._parse_open_json(), 729 "POSITION": lambda self: self._parse_position(), 730 "SAFE_CAST": lambda self: self._parse_cast(False), 731 "STRING_AGG": lambda self: self._parse_string_agg(), 732 "SUBSTRING": lambda self: self._parse_substring(), 733 "TRIM": lambda self: self._parse_trim(), 734 "TRY_CAST": lambda self: self._parse_cast(False), 735 "TRY_CONVERT": lambda self: self._parse_convert(False), 736 } 737 738 QUERY_MODIFIER_PARSERS = { 739 "joins": lambda self: list(iter(self._parse_join, None)), 740 "laterals": lambda self: list(iter(self._parse_lateral, None)), 741 "match": lambda self: self._parse_match_recognize(), 742 "where": lambda self: self._parse_where(), 743 "group": lambda self: self._parse_group(), 744 "having": lambda self: self._parse_having(), 745 "qualify": lambda self: self._parse_qualify(), 746 "windows": lambda self: self._parse_window_clause(), 747 "order": lambda self: self._parse_order(), 748 "limit": lambda self: self._parse_limit(), 749 "offset": lambda self: self._parse_offset(), 750 "locks": lambda self: self._parse_locks(), 751 "sample": lambda self: self._parse_table_sample(as_modifier=True), 752 } 753 754 SET_PARSERS = { 755 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 756 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 757 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 758 "TRANSACTION": lambda self: self._parse_set_transaction(), 759 } 760 761 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 762 763 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 764 765 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 766 767 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 768 769 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 770 771 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 772 TRANSACTION_CHARACTERISTICS = { 773 "ISOLATION LEVEL REPEATABLE READ", 774 "ISOLATION LEVEL READ COMMITTED", 775 "ISOLATION LEVEL READ UNCOMMITTED", 776 "ISOLATION LEVEL SERIALIZABLE", 777 "READ WRITE", 778 "READ ONLY", 779 } 780 781 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 782 783 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 784 785 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 786 787 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 788 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 789 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 790 791 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 792 793 STRICT_CAST = True 794 795 # A NULL arg in CONCAT yields NULL by default 796 CONCAT_NULL_OUTPUTS_STRING = False 797 798 CONVERT_TYPE_FIRST = False 799 800 PREFIXED_PIVOT_COLUMNS = False 801 IDENTIFY_PIVOT_STRINGS = False 802 803 LOG_BASE_FIRST = True 804 LOG_DEFAULTS_TO_LN = False 805 806 __slots__ = ( 807 "error_level", 808 "error_message_context", 809 "max_errors", 810 "sql", 811 "errors", 812 "_tokens", 813 "_index", 814 "_curr", 815 "_next", 816 "_prev", 817 "_prev_comments", 818 ) 819 820 # Autofilled 821 INDEX_OFFSET: int = 0 822 UNNEST_COLUMN_ONLY: bool = False 823 ALIAS_POST_TABLESAMPLE: bool = False 824 STRICT_STRING_CONCAT = False 825 NULL_ORDERING: str = "nulls_are_small" 826 SHOW_TRIE: t.Dict = {} 827 SET_TRIE: t.Dict = {} 828 FORMAT_MAPPING: t.Dict[str, str] = {} 829 FORMAT_TRIE: t.Dict = {} 830 TIME_MAPPING: t.Dict[str, str] = {} 831 TIME_TRIE: t.Dict = {} 832 833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset() 843 844 def reset(self): 845 self.sql = "" 846 self.errors = [] 847 self._tokens = [] 848 self._index = 0 849 self._curr = None 850 self._next = None 851 self._prev = None 852 self._prev_comments = None 853 854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 ) 871 872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1] 907 908 def _parse( 909 self, 910 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 911 raw_tokens: t.List[Token], 912 sql: t.Optional[str] = None, 913 ) -> t.List[t.Optional[exp.Expression]]: 914 self.reset() 915 self.sql = sql or "" 916 917 total = len(raw_tokens) 918 chunks: t.List[t.List[Token]] = [[]] 919 920 for i, token in enumerate(raw_tokens): 921 if token.token_type == TokenType.SEMICOLON: 922 if i < total - 1: 923 chunks.append([]) 924 else: 925 chunks[-1].append(token) 926 927 expressions = [] 928 929 for tokens in chunks: 930 self._index = -1 931 self._tokens = tokens 932 self._advance() 933 934 expressions.append(parse_method(self)) 935 936 if self._index < len(self._tokens): 937 self.raise_error("Invalid expression / Unexpected token") 938 939 self.check_errors() 940 941 return expressions 942 943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 ) 953 954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error) 981 982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance) 999 1000 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1001 if expression and self._prev_comments: 1002 expression.add_comments(self._prev_comments) 1003 self._prev_comments = None 1004 1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression 1021 1022 def _find_sql(self, start: Token, end: Token) -> str: 1023 return self.sql[start.start : end.end + 1] 1024 1025 def _advance(self, times: int = 1) -> None: 1026 self._index += times 1027 self._curr = seq_get(self._tokens, self._index) 1028 self._next = seq_get(self._tokens, self._index + 1) 1029 1030 if self._index > 0: 1031 self._prev = self._tokens[self._index - 1] 1032 self._prev_comments = self._prev.comments 1033 else: 1034 self._prev = None 1035 self._prev_comments = None 1036 1037 def _retreat(self, index: int) -> None: 1038 if index != self._index: 1039 self._advance(index - self._index) 1040 1041 def _parse_command(self) -> exp.Command: 1042 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1043 1044 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1045 start = self._prev 1046 exists = self._parse_exists() if allow_exists else None 1047 1048 self._match(TokenType.ON) 1049 1050 kind = self._match_set(self.CREATABLES) and self._prev 1051 if not kind: 1052 return self._parse_as_command(start) 1053 1054 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1055 this = self._parse_user_defined_function(kind=kind.token_type) 1056 elif kind.token_type == TokenType.TABLE: 1057 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1058 elif kind.token_type == TokenType.COLUMN: 1059 this = self._parse_column() 1060 else: 1061 this = self._parse_id_var() 1062 1063 self._match(TokenType.IS) 1064 1065 return self.expression( 1066 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1067 ) 1068 1069 def _parse_to_table( 1070 self, 1071 ) -> exp.ToTableProperty: 1072 table = self._parse_table_parts(schema=True) 1073 return self.expression(exp.ToTableProperty, this=table) 1074 1075 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1076 def _parse_ttl(self) -> exp.Expression: 1077 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1078 this = self._parse_bitwise() 1079 1080 if self._match_text_seq("DELETE"): 1081 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1082 if self._match_text_seq("RECOMPRESS"): 1083 return self.expression( 1084 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1085 ) 1086 if self._match_text_seq("TO", "DISK"): 1087 return self.expression( 1088 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1089 ) 1090 if self._match_text_seq("TO", "VOLUME"): 1091 return self.expression( 1092 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1093 ) 1094 1095 return this 1096 1097 expressions = self._parse_csv(_parse_ttl_action) 1098 where = self._parse_where() 1099 group = self._parse_group() 1100 1101 aggregates = None 1102 if group and self._match(TokenType.SET): 1103 aggregates = self._parse_csv(self._parse_set_item) 1104 1105 return self.expression( 1106 exp.MergeTreeTTL, 1107 expressions=expressions, 1108 where=where, 1109 group=group, 1110 aggregates=aggregates, 1111 ) 1112 1113 def _parse_statement(self) -> t.Optional[exp.Expression]: 1114 if self._curr is None: 1115 return None 1116 1117 if self._match_set(self.STATEMENT_PARSERS): 1118 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1119 1120 if self._match_set(Tokenizer.COMMANDS): 1121 return self._parse_command() 1122 1123 expression = self._parse_expression() 1124 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1125 return self._parse_query_modifiers(expression) 1126 1127 def _parse_drop(self) -> exp.Drop | exp.Command: 1128 start = self._prev 1129 temporary = self._match(TokenType.TEMPORARY) 1130 materialized = self._match_text_seq("MATERIALIZED") 1131 1132 kind = self._match_set(self.CREATABLES) and self._prev.text 1133 if not kind: 1134 return self._parse_as_command(start) 1135 1136 return self.expression( 1137 exp.Drop, 1138 exists=self._parse_exists(), 1139 this=self._parse_table(schema=True), 1140 kind=kind, 1141 temporary=temporary, 1142 materialized=materialized, 1143 cascade=self._match_text_seq("CASCADE"), 1144 constraints=self._match_text_seq("CONSTRAINTS"), 1145 purge=self._match_text_seq("PURGE"), 1146 ) 1147 1148 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1149 return ( 1150 self._match(TokenType.IF) 1151 and (not not_ or self._match(TokenType.NOT)) 1152 and self._match(TokenType.EXISTS) 1153 ) 1154 1155 def _parse_create(self) -> exp.Create | exp.Command: 1156 # Note: this can't be None because we've matched a statement parser 1157 start = self._prev 1158 replace = start.text.upper() == "REPLACE" or self._match_pair( 1159 TokenType.OR, TokenType.REPLACE 1160 ) 1161 unique = self._match(TokenType.UNIQUE) 1162 1163 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1164 self._advance() 1165 1166 properties = None 1167 create_token = self._match_set(self.CREATABLES) and self._prev 1168 1169 if not create_token: 1170 # exp.Properties.Location.POST_CREATE 1171 properties = self._parse_properties() 1172 create_token = self._match_set(self.CREATABLES) and self._prev 1173 1174 if not properties or not create_token: 1175 return self._parse_as_command(start) 1176 1177 exists = self._parse_exists(not_=True) 1178 this = None 1179 expression = None 1180 indexes = None 1181 no_schema_binding = None 1182 begin = None 1183 clone = None 1184 1185 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1186 nonlocal properties 1187 if properties and temp_props: 1188 properties.expressions.extend(temp_props.expressions) 1189 elif temp_props: 1190 properties = temp_props 1191 1192 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1193 this = self._parse_user_defined_function(kind=create_token.token_type) 1194 1195 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1196 extend_props(self._parse_properties()) 1197 1198 self._match(TokenType.ALIAS) 1199 begin = self._match(TokenType.BEGIN) 1200 return_ = self._match_text_seq("RETURN") 1201 expression = self._parse_statement() 1202 1203 if return_: 1204 expression = self.expression(exp.Return, this=expression) 1205 elif create_token.token_type == TokenType.INDEX: 1206 this = self._parse_index(index=self._parse_id_var()) 1207 elif create_token.token_type in self.DB_CREATABLES: 1208 table_parts = self._parse_table_parts(schema=True) 1209 1210 # exp.Properties.Location.POST_NAME 1211 self._match(TokenType.COMMA) 1212 extend_props(self._parse_properties(before=True)) 1213 1214 this = self._parse_schema(this=table_parts) 1215 1216 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1217 extend_props(self._parse_properties()) 1218 1219 self._match(TokenType.ALIAS) 1220 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1221 # exp.Properties.Location.POST_ALIAS 1222 extend_props(self._parse_properties()) 1223 1224 expression = self._parse_ddl_select() 1225 1226 if create_token.token_type == TokenType.TABLE: 1227 indexes = [] 1228 while True: 1229 index = self._parse_index() 1230 1231 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1232 extend_props(self._parse_properties()) 1233 1234 if not index: 1235 break 1236 else: 1237 self._match(TokenType.COMMA) 1238 indexes.append(index) 1239 elif create_token.token_type == TokenType.VIEW: 1240 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1241 no_schema_binding = True 1242 1243 if self._match_text_seq("CLONE"): 1244 clone = self._parse_table(schema=True) 1245 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1246 clone_kind = ( 1247 self._match(TokenType.L_PAREN) 1248 and self._match_texts(self.CLONE_KINDS) 1249 and self._prev.text.upper() 1250 ) 1251 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1252 self._match(TokenType.R_PAREN) 1253 clone = self.expression( 1254 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1255 ) 1256 1257 return self.expression( 1258 exp.Create, 1259 this=this, 1260 kind=create_token.text, 1261 replace=replace, 1262 unique=unique, 1263 expression=expression, 1264 exists=exists, 1265 properties=properties, 1266 indexes=indexes, 1267 no_schema_binding=no_schema_binding, 1268 begin=begin, 1269 clone=clone, 1270 ) 1271 1272 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1273 # only used for teradata currently 1274 self._match(TokenType.COMMA) 1275 1276 kwargs = { 1277 "no": self._match_text_seq("NO"), 1278 "dual": self._match_text_seq("DUAL"), 1279 "before": self._match_text_seq("BEFORE"), 1280 "default": self._match_text_seq("DEFAULT"), 1281 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1282 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1283 "after": self._match_text_seq("AFTER"), 1284 "minimum": self._match_texts(("MIN", "MINIMUM")), 1285 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1286 } 1287 1288 if self._match_texts(self.PROPERTY_PARSERS): 1289 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1290 try: 1291 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1292 except TypeError: 1293 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1294 1295 return None 1296 1297 def _parse_property(self) -> t.Optional[exp.Expression]: 1298 if self._match_texts(self.PROPERTY_PARSERS): 1299 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1300 1301 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1302 return self._parse_character_set(default=True) 1303 1304 if self._match_text_seq("COMPOUND", "SORTKEY"): 1305 return self._parse_sortkey(compound=True) 1306 1307 if self._match_text_seq("SQL", "SECURITY"): 1308 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1309 1310 assignment = self._match_pair( 1311 TokenType.VAR, TokenType.EQ, advance=False 1312 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1313 1314 if assignment: 1315 key = self._parse_var_or_string() 1316 self._match(TokenType.EQ) 1317 return self.expression(exp.Property, this=key, value=self._parse_column()) 1318 1319 return None 1320 1321 def _parse_stored(self) -> exp.FileFormatProperty: 1322 self._match(TokenType.ALIAS) 1323 1324 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1325 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1326 1327 return self.expression( 1328 exp.FileFormatProperty, 1329 this=self.expression( 1330 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1331 ) 1332 if input_format or output_format 1333 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1334 ) 1335 1336 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1337 self._match(TokenType.EQ) 1338 self._match(TokenType.ALIAS) 1339 return self.expression(exp_class, this=self._parse_field()) 1340 1341 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1342 properties = [] 1343 while True: 1344 if before: 1345 prop = self._parse_property_before() 1346 else: 1347 prop = self._parse_property() 1348 1349 if not prop: 1350 break 1351 for p in ensure_list(prop): 1352 properties.append(p) 1353 1354 if properties: 1355 return self.expression(exp.Properties, expressions=properties) 1356 1357 return None 1358 1359 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1360 return self.expression( 1361 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1362 ) 1363 1364 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1365 if self._index >= 2: 1366 pre_volatile_token = self._tokens[self._index - 2] 1367 else: 1368 pre_volatile_token = None 1369 1370 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1371 return exp.VolatileProperty() 1372 1373 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1374 1375 def _parse_with_property( 1376 self, 1377 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1378 self._match(TokenType.WITH) 1379 if self._match(TokenType.L_PAREN, advance=False): 1380 return self._parse_wrapped_csv(self._parse_property) 1381 1382 if self._match_text_seq("JOURNAL"): 1383 return self._parse_withjournaltable() 1384 1385 if self._match_text_seq("DATA"): 1386 return self._parse_withdata(no=False) 1387 elif self._match_text_seq("NO", "DATA"): 1388 return self._parse_withdata(no=True) 1389 1390 if not self._next: 1391 return None 1392 1393 return self._parse_withisolatedloading() 1394 1395 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1396 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1397 self._match(TokenType.EQ) 1398 1399 user = self._parse_id_var() 1400 self._match(TokenType.PARAMETER) 1401 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1402 1403 if not user or not host: 1404 return None 1405 1406 return exp.DefinerProperty(this=f"{user}@{host}") 1407 1408 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1409 self._match(TokenType.TABLE) 1410 self._match(TokenType.EQ) 1411 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1412 1413 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1414 return self.expression(exp.LogProperty, no=no) 1415 1416 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1417 return self.expression(exp.JournalProperty, **kwargs) 1418 1419 def _parse_checksum(self) -> exp.ChecksumProperty: 1420 self._match(TokenType.EQ) 1421 1422 on = None 1423 if self._match(TokenType.ON): 1424 on = True 1425 elif self._match_text_seq("OFF"): 1426 on = False 1427 1428 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1429 1430 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1431 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1432 1433 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1434 if not self._match_text_seq("GRANTS"): 1435 self._retreat(self._index - 1) 1436 return None 1437 1438 return self.expression(exp.CopyGrantsProperty) 1439 1440 def _parse_freespace(self) -> exp.FreespaceProperty: 1441 self._match(TokenType.EQ) 1442 return self.expression( 1443 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1444 ) 1445 1446 def _parse_mergeblockratio( 1447 self, no: bool = False, default: bool = False 1448 ) -> exp.MergeBlockRatioProperty: 1449 if self._match(TokenType.EQ): 1450 return self.expression( 1451 exp.MergeBlockRatioProperty, 1452 this=self._parse_number(), 1453 percent=self._match(TokenType.PERCENT), 1454 ) 1455 1456 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1457 1458 def _parse_datablocksize( 1459 self, 1460 default: t.Optional[bool] = None, 1461 minimum: t.Optional[bool] = None, 1462 maximum: t.Optional[bool] = None, 1463 ) -> exp.DataBlocksizeProperty: 1464 self._match(TokenType.EQ) 1465 size = self._parse_number() 1466 1467 units = None 1468 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1469 units = self._prev.text 1470 1471 return self.expression( 1472 exp.DataBlocksizeProperty, 1473 size=size, 1474 units=units, 1475 default=default, 1476 minimum=minimum, 1477 maximum=maximum, 1478 ) 1479 1480 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1481 self._match(TokenType.EQ) 1482 always = self._match_text_seq("ALWAYS") 1483 manual = self._match_text_seq("MANUAL") 1484 never = self._match_text_seq("NEVER") 1485 default = self._match_text_seq("DEFAULT") 1486 1487 autotemp = None 1488 if self._match_text_seq("AUTOTEMP"): 1489 autotemp = self._parse_schema() 1490 1491 return self.expression( 1492 exp.BlockCompressionProperty, 1493 always=always, 1494 manual=manual, 1495 never=never, 1496 default=default, 1497 autotemp=autotemp, 1498 ) 1499 1500 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1501 no = self._match_text_seq("NO") 1502 concurrent = self._match_text_seq("CONCURRENT") 1503 self._match_text_seq("ISOLATED", "LOADING") 1504 for_all = self._match_text_seq("FOR", "ALL") 1505 for_insert = self._match_text_seq("FOR", "INSERT") 1506 for_none = self._match_text_seq("FOR", "NONE") 1507 return self.expression( 1508 exp.IsolatedLoadingProperty, 1509 no=no, 1510 concurrent=concurrent, 1511 for_all=for_all, 1512 for_insert=for_insert, 1513 for_none=for_none, 1514 ) 1515 1516 def _parse_locking(self) -> exp.LockingProperty: 1517 if self._match(TokenType.TABLE): 1518 kind = "TABLE" 1519 elif self._match(TokenType.VIEW): 1520 kind = "VIEW" 1521 elif self._match(TokenType.ROW): 1522 kind = "ROW" 1523 elif self._match_text_seq("DATABASE"): 1524 kind = "DATABASE" 1525 else: 1526 kind = None 1527 1528 if kind in ("DATABASE", "TABLE", "VIEW"): 1529 this = self._parse_table_parts() 1530 else: 1531 this = None 1532 1533 if self._match(TokenType.FOR): 1534 for_or_in = "FOR" 1535 elif self._match(TokenType.IN): 1536 for_or_in = "IN" 1537 else: 1538 for_or_in = None 1539 1540 if self._match_text_seq("ACCESS"): 1541 lock_type = "ACCESS" 1542 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1543 lock_type = "EXCLUSIVE" 1544 elif self._match_text_seq("SHARE"): 1545 lock_type = "SHARE" 1546 elif self._match_text_seq("READ"): 1547 lock_type = "READ" 1548 elif self._match_text_seq("WRITE"): 1549 lock_type = "WRITE" 1550 elif self._match_text_seq("CHECKSUM"): 1551 lock_type = "CHECKSUM" 1552 else: 1553 lock_type = None 1554 1555 override = self._match_text_seq("OVERRIDE") 1556 1557 return self.expression( 1558 exp.LockingProperty, 1559 this=this, 1560 kind=kind, 1561 for_or_in=for_or_in, 1562 lock_type=lock_type, 1563 override=override, 1564 ) 1565 1566 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1567 if self._match(TokenType.PARTITION_BY): 1568 return self._parse_csv(self._parse_conjunction) 1569 return [] 1570 1571 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1572 self._match(TokenType.EQ) 1573 return self.expression( 1574 exp.PartitionedByProperty, 1575 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1576 ) 1577 1578 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1579 if self._match_text_seq("AND", "STATISTICS"): 1580 statistics = True 1581 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1582 statistics = False 1583 else: 1584 statistics = None 1585 1586 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1587 1588 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1589 if self._match_text_seq("PRIMARY", "INDEX"): 1590 return exp.NoPrimaryIndexProperty() 1591 return None 1592 1593 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1594 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1595 return exp.OnCommitProperty() 1596 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1597 return exp.OnCommitProperty(delete=True) 1598 return None 1599 1600 def _parse_distkey(self) -> exp.DistKeyProperty: 1601 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1602 1603 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1604 table = self._parse_table(schema=True) 1605 1606 options = [] 1607 while self._match_texts(("INCLUDING", "EXCLUDING")): 1608 this = self._prev.text.upper() 1609 1610 id_var = self._parse_id_var() 1611 if not id_var: 1612 return None 1613 1614 options.append( 1615 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1616 ) 1617 1618 return self.expression(exp.LikeProperty, this=table, expressions=options) 1619 1620 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1621 return self.expression( 1622 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1623 ) 1624 1625 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1626 self._match(TokenType.EQ) 1627 return self.expression( 1628 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1629 ) 1630 1631 def _parse_returns(self) -> exp.ReturnsProperty: 1632 value: t.Optional[exp.Expression] 1633 is_table = self._match(TokenType.TABLE) 1634 1635 if is_table: 1636 if self._match(TokenType.LT): 1637 value = self.expression( 1638 exp.Schema, 1639 this="TABLE", 1640 expressions=self._parse_csv(self._parse_struct_types), 1641 ) 1642 if not self._match(TokenType.GT): 1643 self.raise_error("Expecting >") 1644 else: 1645 value = self._parse_schema(exp.var("TABLE")) 1646 else: 1647 value = self._parse_types() 1648 1649 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1650 1651 def _parse_describe(self) -> exp.Describe: 1652 kind = self._match_set(self.CREATABLES) and self._prev.text 1653 this = self._parse_table() 1654 return self.expression(exp.Describe, this=this, kind=kind) 1655 1656 def _parse_insert(self) -> exp.Insert: 1657 overwrite = self._match(TokenType.OVERWRITE) 1658 local = self._match_text_seq("LOCAL") 1659 alternative = None 1660 1661 if self._match_text_seq("DIRECTORY"): 1662 this: t.Optional[exp.Expression] = self.expression( 1663 exp.Directory, 1664 this=self._parse_var_or_string(), 1665 local=local, 1666 row_format=self._parse_row_format(match_row=True), 1667 ) 1668 else: 1669 if self._match(TokenType.OR): 1670 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1671 1672 self._match(TokenType.INTO) 1673 self._match(TokenType.TABLE) 1674 this = self._parse_table(schema=True) 1675 1676 return self.expression( 1677 exp.Insert, 1678 this=this, 1679 exists=self._parse_exists(), 1680 partition=self._parse_partition(), 1681 expression=self._parse_ddl_select(), 1682 conflict=self._parse_on_conflict(), 1683 returning=self._parse_returning(), 1684 overwrite=overwrite, 1685 alternative=alternative, 1686 ) 1687 1688 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1689 conflict = self._match_text_seq("ON", "CONFLICT") 1690 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1691 1692 if not conflict and not duplicate: 1693 return None 1694 1695 nothing = None 1696 expressions = None 1697 key = None 1698 constraint = None 1699 1700 if conflict: 1701 if self._match_text_seq("ON", "CONSTRAINT"): 1702 constraint = self._parse_id_var() 1703 else: 1704 key = self._parse_csv(self._parse_value) 1705 1706 self._match_text_seq("DO") 1707 if self._match_text_seq("NOTHING"): 1708 nothing = True 1709 else: 1710 self._match(TokenType.UPDATE) 1711 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1712 1713 return self.expression( 1714 exp.OnConflict, 1715 duplicate=duplicate, 1716 expressions=expressions, 1717 nothing=nothing, 1718 key=key, 1719 constraint=constraint, 1720 ) 1721 1722 def _parse_returning(self) -> t.Optional[exp.Returning]: 1723 if not self._match(TokenType.RETURNING): 1724 return None 1725 1726 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1727 1728 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1729 if not self._match(TokenType.FORMAT): 1730 return None 1731 return self._parse_row_format() 1732 1733 def _parse_row_format( 1734 self, match_row: bool = False 1735 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1736 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1737 return None 1738 1739 if self._match_text_seq("SERDE"): 1740 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1741 1742 self._match_text_seq("DELIMITED") 1743 1744 kwargs = {} 1745 1746 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1747 kwargs["fields"] = self._parse_string() 1748 if self._match_text_seq("ESCAPED", "BY"): 1749 kwargs["escaped"] = self._parse_string() 1750 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1751 kwargs["collection_items"] = self._parse_string() 1752 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1753 kwargs["map_keys"] = self._parse_string() 1754 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1755 kwargs["lines"] = self._parse_string() 1756 if self._match_text_seq("NULL", "DEFINED", "AS"): 1757 kwargs["null"] = self._parse_string() 1758 1759 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1760 1761 def _parse_load(self) -> exp.LoadData | exp.Command: 1762 if self._match_text_seq("DATA"): 1763 local = self._match_text_seq("LOCAL") 1764 self._match_text_seq("INPATH") 1765 inpath = self._parse_string() 1766 overwrite = self._match(TokenType.OVERWRITE) 1767 self._match_pair(TokenType.INTO, TokenType.TABLE) 1768 1769 return self.expression( 1770 exp.LoadData, 1771 this=self._parse_table(schema=True), 1772 local=local, 1773 overwrite=overwrite, 1774 inpath=inpath, 1775 partition=self._parse_partition(), 1776 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1777 serde=self._match_text_seq("SERDE") and self._parse_string(), 1778 ) 1779 return self._parse_as_command(self._prev) 1780 1781 def _parse_delete(self) -> exp.Delete: 1782 self._match(TokenType.FROM) 1783 1784 return self.expression( 1785 exp.Delete, 1786 this=self._parse_table(), 1787 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1788 where=self._parse_where(), 1789 returning=self._parse_returning(), 1790 limit=self._parse_limit(), 1791 ) 1792 1793 def _parse_update(self) -> exp.Update: 1794 return self.expression( 1795 exp.Update, 1796 **{ # type: ignore 1797 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1798 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1799 "from": self._parse_from(modifiers=True), 1800 "where": self._parse_where(), 1801 "returning": self._parse_returning(), 1802 "limit": self._parse_limit(), 1803 }, 1804 ) 1805 1806 def _parse_uncache(self) -> exp.Uncache: 1807 if not self._match(TokenType.TABLE): 1808 self.raise_error("Expecting TABLE after UNCACHE") 1809 1810 return self.expression( 1811 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1812 ) 1813 1814 def _parse_cache(self) -> exp.Cache: 1815 lazy = self._match_text_seq("LAZY") 1816 self._match(TokenType.TABLE) 1817 table = self._parse_table(schema=True) 1818 1819 options = [] 1820 if self._match_text_seq("OPTIONS"): 1821 self._match_l_paren() 1822 k = self._parse_string() 1823 self._match(TokenType.EQ) 1824 v = self._parse_string() 1825 options = [k, v] 1826 self._match_r_paren() 1827 1828 self._match(TokenType.ALIAS) 1829 return self.expression( 1830 exp.Cache, 1831 this=table, 1832 lazy=lazy, 1833 options=options, 1834 expression=self._parse_select(nested=True), 1835 ) 1836 1837 def _parse_partition(self) -> t.Optional[exp.Partition]: 1838 if not self._match(TokenType.PARTITION): 1839 return None 1840 1841 return self.expression( 1842 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1843 ) 1844 1845 def _parse_value(self) -> exp.Tuple: 1846 if self._match(TokenType.L_PAREN): 1847 expressions = self._parse_csv(self._parse_conjunction) 1848 self._match_r_paren() 1849 return self.expression(exp.Tuple, expressions=expressions) 1850 1851 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1852 # Source: https://prestodb.io/docs/current/sql/values.html 1853 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1854 1855 def _parse_select( 1856 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1857 ) -> t.Optional[exp.Expression]: 1858 cte = self._parse_with() 1859 if cte: 1860 this = self._parse_statement() 1861 1862 if not this: 1863 self.raise_error("Failed to parse any statement following CTE") 1864 return cte 1865 1866 if "with" in this.arg_types: 1867 this.set("with", cte) 1868 else: 1869 self.raise_error(f"{this.key} does not support CTE") 1870 this = cte 1871 elif self._match(TokenType.SELECT): 1872 comments = self._prev_comments 1873 1874 hint = self._parse_hint() 1875 all_ = self._match(TokenType.ALL) 1876 distinct = self._match(TokenType.DISTINCT) 1877 1878 kind = ( 1879 self._match(TokenType.ALIAS) 1880 and self._match_texts(("STRUCT", "VALUE")) 1881 and self._prev.text 1882 ) 1883 1884 if distinct: 1885 distinct = self.expression( 1886 exp.Distinct, 1887 on=self._parse_value() if self._match(TokenType.ON) else None, 1888 ) 1889 1890 if all_ and distinct: 1891 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1892 1893 limit = self._parse_limit(top=True) 1894 expressions = self._parse_csv(self._parse_expression) 1895 1896 this = self.expression( 1897 exp.Select, 1898 kind=kind, 1899 hint=hint, 1900 distinct=distinct, 1901 expressions=expressions, 1902 limit=limit, 1903 ) 1904 this.comments = comments 1905 1906 into = self._parse_into() 1907 if into: 1908 this.set("into", into) 1909 1910 from_ = self._parse_from() 1911 if from_: 1912 this.set("from", from_) 1913 1914 this = self._parse_query_modifiers(this) 1915 elif (table or nested) and self._match(TokenType.L_PAREN): 1916 if self._match(TokenType.PIVOT): 1917 this = self._parse_simplified_pivot() 1918 elif self._match(TokenType.FROM): 1919 this = exp.select("*").from_( 1920 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1921 ) 1922 else: 1923 this = self._parse_table() if table else self._parse_select(nested=True) 1924 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1925 1926 self._match_r_paren() 1927 1928 # early return so that subquery unions aren't parsed again 1929 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1930 # Union ALL should be a property of the top select node, not the subquery 1931 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1932 elif self._match(TokenType.VALUES): 1933 this = self.expression( 1934 exp.Values, 1935 expressions=self._parse_csv(self._parse_value), 1936 alias=self._parse_table_alias(), 1937 ) 1938 else: 1939 this = None 1940 1941 return self._parse_set_operations(this) 1942 1943 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1944 if not skip_with_token and not self._match(TokenType.WITH): 1945 return None 1946 1947 comments = self._prev_comments 1948 recursive = self._match(TokenType.RECURSIVE) 1949 1950 expressions = [] 1951 while True: 1952 expressions.append(self._parse_cte()) 1953 1954 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1955 break 1956 else: 1957 self._match(TokenType.WITH) 1958 1959 return self.expression( 1960 exp.With, comments=comments, expressions=expressions, recursive=recursive 1961 ) 1962 1963 def _parse_cte(self) -> exp.CTE: 1964 alias = self._parse_table_alias() 1965 if not alias or not alias.this: 1966 self.raise_error("Expected CTE to have alias") 1967 1968 self._match(TokenType.ALIAS) 1969 return self.expression( 1970 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1971 ) 1972 1973 def _parse_table_alias( 1974 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1975 ) -> t.Optional[exp.TableAlias]: 1976 any_token = self._match(TokenType.ALIAS) 1977 alias = ( 1978 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1979 or self._parse_string_as_identifier() 1980 ) 1981 1982 index = self._index 1983 if self._match(TokenType.L_PAREN): 1984 columns = self._parse_csv(self._parse_function_parameter) 1985 self._match_r_paren() if columns else self._retreat(index) 1986 else: 1987 columns = None 1988 1989 if not alias and not columns: 1990 return None 1991 1992 return self.expression(exp.TableAlias, this=alias, columns=columns) 1993 1994 def _parse_subquery( 1995 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1996 ) -> t.Optional[exp.Subquery]: 1997 if not this: 1998 return None 1999 2000 return self.expression( 2001 exp.Subquery, 2002 this=this, 2003 pivots=self._parse_pivots(), 2004 alias=self._parse_table_alias() if parse_alias else None, 2005 ) 2006 2007 def _parse_query_modifiers( 2008 self, this: t.Optional[exp.Expression] 2009 ) -> t.Optional[exp.Expression]: 2010 if isinstance(this, self.MODIFIABLES): 2011 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2012 expression = parser(self) 2013 2014 if expression: 2015 if key == "limit": 2016 offset = expression.args.pop("offset", None) 2017 if offset: 2018 this.set("offset", exp.Offset(expression=offset)) 2019 this.set(key, expression) 2020 return this 2021 2022 def _parse_hint(self) -> t.Optional[exp.Hint]: 2023 if self._match(TokenType.HINT): 2024 hints = self._parse_csv(self._parse_function) 2025 2026 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2027 self.raise_error("Expected */ after HINT") 2028 2029 return self.expression(exp.Hint, expressions=hints) 2030 2031 return None 2032 2033 def _parse_into(self) -> t.Optional[exp.Into]: 2034 if not self._match(TokenType.INTO): 2035 return None 2036 2037 temp = self._match(TokenType.TEMPORARY) 2038 unlogged = self._match_text_seq("UNLOGGED") 2039 self._match(TokenType.TABLE) 2040 2041 return self.expression( 2042 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2043 ) 2044 2045 def _parse_from( 2046 self, modifiers: bool = False, skip_from_token: bool = False 2047 ) -> t.Optional[exp.From]: 2048 if not skip_from_token and not self._match(TokenType.FROM): 2049 return None 2050 2051 comments = self._prev_comments 2052 this = self._parse_table() 2053 2054 return self.expression( 2055 exp.From, 2056 comments=comments, 2057 this=self._parse_query_modifiers(this) if modifiers else this, 2058 ) 2059 2060 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2061 if not self._match(TokenType.MATCH_RECOGNIZE): 2062 return None 2063 2064 self._match_l_paren() 2065 2066 partition = self._parse_partition_by() 2067 order = self._parse_order() 2068 measures = ( 2069 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2070 ) 2071 2072 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2073 rows = exp.var("ONE ROW PER MATCH") 2074 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2075 text = "ALL ROWS PER MATCH" 2076 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2077 text += f" SHOW EMPTY MATCHES" 2078 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2079 text += f" OMIT EMPTY MATCHES" 2080 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2081 text += f" WITH UNMATCHED ROWS" 2082 rows = exp.var(text) 2083 else: 2084 rows = None 2085 2086 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2087 text = "AFTER MATCH SKIP" 2088 if self._match_text_seq("PAST", "LAST", "ROW"): 2089 text += f" PAST LAST ROW" 2090 elif self._match_text_seq("TO", "NEXT", "ROW"): 2091 text += f" TO NEXT ROW" 2092 elif self._match_text_seq("TO", "FIRST"): 2093 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2094 elif self._match_text_seq("TO", "LAST"): 2095 text += f" TO LAST {self._advance_any().text}" # type: ignore 2096 after = exp.var(text) 2097 else: 2098 after = None 2099 2100 if self._match_text_seq("PATTERN"): 2101 self._match_l_paren() 2102 2103 if not self._curr: 2104 self.raise_error("Expecting )", self._curr) 2105 2106 paren = 1 2107 start = self._curr 2108 2109 while self._curr and paren > 0: 2110 if self._curr.token_type == TokenType.L_PAREN: 2111 paren += 1 2112 if self._curr.token_type == TokenType.R_PAREN: 2113 paren -= 1 2114 2115 end = self._prev 2116 self._advance() 2117 2118 if paren > 0: 2119 self.raise_error("Expecting )", self._curr) 2120 2121 pattern = exp.var(self._find_sql(start, end)) 2122 else: 2123 pattern = None 2124 2125 define = ( 2126 self._parse_csv( 2127 lambda: self.expression( 2128 exp.Alias, 2129 alias=self._parse_id_var(any_token=True), 2130 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2131 ) 2132 ) 2133 if self._match_text_seq("DEFINE") 2134 else None 2135 ) 2136 2137 self._match_r_paren() 2138 2139 return self.expression( 2140 exp.MatchRecognize, 2141 partition_by=partition, 2142 order=order, 2143 measures=measures, 2144 rows=rows, 2145 after=after, 2146 pattern=pattern, 2147 define=define, 2148 alias=self._parse_table_alias(), 2149 ) 2150 2151 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2152 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2153 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2154 2155 if outer_apply or cross_apply: 2156 this = self._parse_select(table=True) 2157 view = None 2158 outer = not cross_apply 2159 elif self._match(TokenType.LATERAL): 2160 this = self._parse_select(table=True) 2161 view = self._match(TokenType.VIEW) 2162 outer = self._match(TokenType.OUTER) 2163 else: 2164 return None 2165 2166 if not this: 2167 this = self._parse_function() or self._parse_id_var(any_token=False) 2168 while self._match(TokenType.DOT): 2169 this = exp.Dot( 2170 this=this, 2171 expression=self._parse_function() or self._parse_id_var(any_token=False), 2172 ) 2173 2174 if view: 2175 table = self._parse_id_var(any_token=False) 2176 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2177 table_alias: t.Optional[exp.TableAlias] = self.expression( 2178 exp.TableAlias, this=table, columns=columns 2179 ) 2180 elif isinstance(this, exp.Subquery) and this.alias: 2181 # Ensures parity between the Subquery's and the Lateral's "alias" args 2182 table_alias = this.args["alias"].copy() 2183 else: 2184 table_alias = self._parse_table_alias() 2185 2186 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2187 2188 def _parse_join_parts( 2189 self, 2190 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2191 return ( 2192 self._match_set(self.JOIN_METHODS) and self._prev, 2193 self._match_set(self.JOIN_SIDES) and self._prev, 2194 self._match_set(self.JOIN_KINDS) and self._prev, 2195 ) 2196 2197 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2198 if self._match(TokenType.COMMA): 2199 return self.expression(exp.Join, this=self._parse_table()) 2200 2201 index = self._index 2202 method, side, kind = self._parse_join_parts() 2203 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2204 join = self._match(TokenType.JOIN) 2205 2206 if not skip_join_token and not join: 2207 self._retreat(index) 2208 kind = None 2209 method = None 2210 side = None 2211 2212 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2213 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2214 2215 if not skip_join_token and not join and not outer_apply and not cross_apply: 2216 return None 2217 2218 if outer_apply: 2219 side = Token(TokenType.LEFT, "LEFT") 2220 2221 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2222 2223 if method: 2224 kwargs["method"] = method.text 2225 if side: 2226 kwargs["side"] = side.text 2227 if kind: 2228 kwargs["kind"] = kind.text 2229 if hint: 2230 kwargs["hint"] = hint 2231 2232 if self._match(TokenType.ON): 2233 kwargs["on"] = self._parse_conjunction() 2234 elif self._match(TokenType.USING): 2235 kwargs["using"] = self._parse_wrapped_id_vars() 2236 2237 return self.expression(exp.Join, **kwargs) 2238 2239 def _parse_index( 2240 self, 2241 index: t.Optional[exp.Expression] = None, 2242 ) -> t.Optional[exp.Index]: 2243 if index: 2244 unique = None 2245 primary = None 2246 amp = None 2247 2248 self._match(TokenType.ON) 2249 self._match(TokenType.TABLE) # hive 2250 table = self._parse_table_parts(schema=True) 2251 else: 2252 unique = self._match(TokenType.UNIQUE) 2253 primary = self._match_text_seq("PRIMARY") 2254 amp = self._match_text_seq("AMP") 2255 2256 if not self._match(TokenType.INDEX): 2257 return None 2258 2259 index = self._parse_id_var() 2260 table = None 2261 2262 using = self._parse_field() if self._match(TokenType.USING) else None 2263 2264 if self._match(TokenType.L_PAREN, advance=False): 2265 columns = self._parse_wrapped_csv(self._parse_ordered) 2266 else: 2267 columns = None 2268 2269 return self.expression( 2270 exp.Index, 2271 this=index, 2272 table=table, 2273 using=using, 2274 columns=columns, 2275 unique=unique, 2276 primary=primary, 2277 amp=amp, 2278 partition_by=self._parse_partition_by(), 2279 ) 2280 2281 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2282 hints: t.List[exp.Expression] = [] 2283 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2284 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2285 hints.append( 2286 self.expression( 2287 exp.WithTableHint, 2288 expressions=self._parse_csv( 2289 lambda: self._parse_function() or self._parse_var(any_token=True) 2290 ), 2291 ) 2292 ) 2293 self._match_r_paren() 2294 else: 2295 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2296 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2297 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2298 2299 self._match_texts({"INDEX", "KEY"}) 2300 if self._match(TokenType.FOR): 2301 hint.set("target", self._advance_any() and self._prev.text.upper()) 2302 2303 hint.set("expressions", self._parse_wrapped_id_vars()) 2304 hints.append(hint) 2305 2306 return hints or None 2307 2308 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2309 return ( 2310 (not schema and self._parse_function(optional_parens=False)) 2311 or self._parse_id_var(any_token=False) 2312 or self._parse_string_as_identifier() 2313 or self._parse_placeholder() 2314 ) 2315 2316 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2317 catalog = None 2318 db = None 2319 table = self._parse_table_part(schema=schema) 2320 2321 while self._match(TokenType.DOT): 2322 if catalog: 2323 # This allows nesting the table in arbitrarily many dot expressions if needed 2324 table = self.expression( 2325 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2326 ) 2327 else: 2328 catalog = db 2329 db = table 2330 table = self._parse_table_part(schema=schema) 2331 2332 if not table: 2333 self.raise_error(f"Expected table name but got {self._curr}") 2334 2335 return self.expression( 2336 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2337 ) 2338 2339 def _parse_table( 2340 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2341 ) -> t.Optional[exp.Expression]: 2342 lateral = self._parse_lateral() 2343 if lateral: 2344 return lateral 2345 2346 unnest = self._parse_unnest() 2347 if unnest: 2348 return unnest 2349 2350 values = self._parse_derived_table_values() 2351 if values: 2352 return values 2353 2354 subquery = self._parse_select(table=True) 2355 if subquery: 2356 if not subquery.args.get("pivots"): 2357 subquery.set("pivots", self._parse_pivots()) 2358 return subquery 2359 2360 this: exp.Expression = self._parse_table_parts(schema=schema) 2361 2362 if schema: 2363 return self._parse_schema(this=this) 2364 2365 if self.ALIAS_POST_TABLESAMPLE: 2366 table_sample = self._parse_table_sample() 2367 2368 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2369 if alias: 2370 this.set("alias", alias) 2371 2372 if not this.args.get("pivots"): 2373 this.set("pivots", self._parse_pivots()) 2374 2375 this.set("hints", self._parse_table_hints()) 2376 2377 if not self.ALIAS_POST_TABLESAMPLE: 2378 table_sample = self._parse_table_sample() 2379 2380 if table_sample: 2381 table_sample.set("this", this) 2382 this = table_sample 2383 2384 return this 2385 2386 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2387 if not self._match(TokenType.UNNEST): 2388 return None 2389 2390 expressions = self._parse_wrapped_csv(self._parse_type) 2391 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2392 2393 alias = self._parse_table_alias() if with_alias else None 2394 2395 if alias and self.UNNEST_COLUMN_ONLY: 2396 if alias.args.get("columns"): 2397 self.raise_error("Unexpected extra column alias in unnest.") 2398 2399 alias.set("columns", [alias.this]) 2400 alias.set("this", None) 2401 2402 offset = None 2403 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2404 self._match(TokenType.ALIAS) 2405 offset = self._parse_id_var() or exp.to_identifier("offset") 2406 2407 return self.expression( 2408 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2409 ) 2410 2411 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2412 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2413 if not is_derived and not self._match(TokenType.VALUES): 2414 return None 2415 2416 expressions = self._parse_csv(self._parse_value) 2417 alias = self._parse_table_alias() 2418 2419 if is_derived: 2420 self._match_r_paren() 2421 2422 return self.expression( 2423 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2424 ) 2425 2426 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2427 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2428 as_modifier and self._match_text_seq("USING", "SAMPLE") 2429 ): 2430 return None 2431 2432 bucket_numerator = None 2433 bucket_denominator = None 2434 bucket_field = None 2435 percent = None 2436 rows = None 2437 size = None 2438 seed = None 2439 2440 kind = ( 2441 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2442 ) 2443 method = self._parse_var(tokens=(TokenType.ROW,)) 2444 2445 self._match(TokenType.L_PAREN) 2446 2447 num = self._parse_number() 2448 2449 if self._match_text_seq("BUCKET"): 2450 bucket_numerator = self._parse_number() 2451 self._match_text_seq("OUT", "OF") 2452 bucket_denominator = bucket_denominator = self._parse_number() 2453 self._match(TokenType.ON) 2454 bucket_field = self._parse_field() 2455 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2456 percent = num 2457 elif self._match(TokenType.ROWS): 2458 rows = num 2459 else: 2460 size = num 2461 2462 self._match(TokenType.R_PAREN) 2463 2464 if self._match(TokenType.L_PAREN): 2465 method = self._parse_var() 2466 seed = self._match(TokenType.COMMA) and self._parse_number() 2467 self._match_r_paren() 2468 elif self._match_texts(("SEED", "REPEATABLE")): 2469 seed = self._parse_wrapped(self._parse_number) 2470 2471 return self.expression( 2472 exp.TableSample, 2473 method=method, 2474 bucket_numerator=bucket_numerator, 2475 bucket_denominator=bucket_denominator, 2476 bucket_field=bucket_field, 2477 percent=percent, 2478 rows=rows, 2479 size=size, 2480 seed=seed, 2481 kind=kind, 2482 ) 2483 2484 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2485 return list(iter(self._parse_pivot, None)) 2486 2487 # https://duckdb.org/docs/sql/statements/pivot 2488 def _parse_simplified_pivot(self) -> exp.Pivot: 2489 def _parse_on() -> t.Optional[exp.Expression]: 2490 this = self._parse_bitwise() 2491 return self._parse_in(this) if self._match(TokenType.IN) else this 2492 2493 this = self._parse_table() 2494 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2495 using = self._match(TokenType.USING) and self._parse_csv( 2496 lambda: self._parse_alias(self._parse_function()) 2497 ) 2498 group = self._parse_group() 2499 return self.expression( 2500 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2501 ) 2502 2503 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2504 index = self._index 2505 2506 if self._match(TokenType.PIVOT): 2507 unpivot = False 2508 elif self._match(TokenType.UNPIVOT): 2509 unpivot = True 2510 else: 2511 return None 2512 2513 expressions = [] 2514 field = None 2515 2516 if not self._match(TokenType.L_PAREN): 2517 self._retreat(index) 2518 return None 2519 2520 if unpivot: 2521 expressions = self._parse_csv(self._parse_column) 2522 else: 2523 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2524 2525 if not expressions: 2526 self.raise_error("Failed to parse PIVOT's aggregation list") 2527 2528 if not self._match(TokenType.FOR): 2529 self.raise_error("Expecting FOR") 2530 2531 value = self._parse_column() 2532 2533 if not self._match(TokenType.IN): 2534 self.raise_error("Expecting IN") 2535 2536 field = self._parse_in(value, alias=True) 2537 2538 self._match_r_paren() 2539 2540 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2541 2542 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2543 pivot.set("alias", self._parse_table_alias()) 2544 2545 if not unpivot: 2546 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2547 2548 columns: t.List[exp.Expression] = [] 2549 for fld in pivot.args["field"].expressions: 2550 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2551 for name in names: 2552 if self.PREFIXED_PIVOT_COLUMNS: 2553 name = f"{name}_{field_name}" if name else field_name 2554 else: 2555 name = f"{field_name}_{name}" if name else field_name 2556 2557 columns.append(exp.to_identifier(name)) 2558 2559 pivot.set("columns", columns) 2560 2561 return pivot 2562 2563 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2564 return [agg.alias for agg in aggregations] 2565 2566 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2567 if not skip_where_token and not self._match(TokenType.WHERE): 2568 return None 2569 2570 return self.expression( 2571 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2572 ) 2573 2574 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2575 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2576 return None 2577 2578 elements = defaultdict(list) 2579 2580 while True: 2581 expressions = self._parse_csv(self._parse_conjunction) 2582 if expressions: 2583 elements["expressions"].extend(expressions) 2584 2585 grouping_sets = self._parse_grouping_sets() 2586 if grouping_sets: 2587 elements["grouping_sets"].extend(grouping_sets) 2588 2589 rollup = None 2590 cube = None 2591 totals = None 2592 2593 with_ = self._match(TokenType.WITH) 2594 if self._match(TokenType.ROLLUP): 2595 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2596 elements["rollup"].extend(ensure_list(rollup)) 2597 2598 if self._match(TokenType.CUBE): 2599 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2600 elements["cube"].extend(ensure_list(cube)) 2601 2602 if self._match_text_seq("TOTALS"): 2603 totals = True 2604 elements["totals"] = True # type: ignore 2605 2606 if not (grouping_sets or rollup or cube or totals): 2607 break 2608 2609 return self.expression(exp.Group, **elements) # type: ignore 2610 2611 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2612 if not self._match(TokenType.GROUPING_SETS): 2613 return None 2614 2615 return self._parse_wrapped_csv(self._parse_grouping_set) 2616 2617 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2618 if self._match(TokenType.L_PAREN): 2619 grouping_set = self._parse_csv(self._parse_column) 2620 self._match_r_paren() 2621 return self.expression(exp.Tuple, expressions=grouping_set) 2622 2623 return self._parse_column() 2624 2625 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2626 if not skip_having_token and not self._match(TokenType.HAVING): 2627 return None 2628 return self.expression(exp.Having, this=self._parse_conjunction()) 2629 2630 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2631 if not self._match(TokenType.QUALIFY): 2632 return None 2633 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2634 2635 def _parse_order( 2636 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2637 ) -> t.Optional[exp.Expression]: 2638 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2639 return this 2640 2641 return self.expression( 2642 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2643 ) 2644 2645 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2646 if not self._match(token): 2647 return None 2648 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2649 2650 def _parse_ordered(self) -> exp.Ordered: 2651 this = self._parse_conjunction() 2652 self._match(TokenType.ASC) 2653 2654 is_desc = self._match(TokenType.DESC) 2655 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2656 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2657 desc = is_desc or False 2658 asc = not desc 2659 nulls_first = is_nulls_first or False 2660 explicitly_null_ordered = is_nulls_first or is_nulls_last 2661 2662 if ( 2663 not explicitly_null_ordered 2664 and ( 2665 (asc and self.NULL_ORDERING == "nulls_are_small") 2666 or (desc and self.NULL_ORDERING != "nulls_are_small") 2667 ) 2668 and self.NULL_ORDERING != "nulls_are_last" 2669 ): 2670 nulls_first = True 2671 2672 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2673 2674 def _parse_limit( 2675 self, this: t.Optional[exp.Expression] = None, top: bool = False 2676 ) -> t.Optional[exp.Expression]: 2677 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2678 limit_paren = self._match(TokenType.L_PAREN) 2679 expression = self._parse_number() if top else self._parse_term() 2680 2681 if self._match(TokenType.COMMA): 2682 offset = expression 2683 expression = self._parse_term() 2684 else: 2685 offset = None 2686 2687 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2688 2689 if limit_paren: 2690 self._match_r_paren() 2691 2692 return limit_exp 2693 2694 if self._match(TokenType.FETCH): 2695 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2696 direction = self._prev.text if direction else "FIRST" 2697 2698 count = self._parse_number() 2699 percent = self._match(TokenType.PERCENT) 2700 2701 self._match_set((TokenType.ROW, TokenType.ROWS)) 2702 2703 only = self._match_text_seq("ONLY") 2704 with_ties = self._match_text_seq("WITH", "TIES") 2705 2706 if only and with_ties: 2707 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2708 2709 return self.expression( 2710 exp.Fetch, 2711 direction=direction, 2712 count=count, 2713 percent=percent, 2714 with_ties=with_ties, 2715 ) 2716 2717 return this 2718 2719 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2720 if not self._match(TokenType.OFFSET): 2721 return this 2722 2723 count = self._parse_number() 2724 self._match_set((TokenType.ROW, TokenType.ROWS)) 2725 return self.expression(exp.Offset, this=this, expression=count) 2726 2727 def _parse_locks(self) -> t.List[exp.Lock]: 2728 locks = [] 2729 while True: 2730 if self._match_text_seq("FOR", "UPDATE"): 2731 update = True 2732 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2733 "LOCK", "IN", "SHARE", "MODE" 2734 ): 2735 update = False 2736 else: 2737 break 2738 2739 expressions = None 2740 if self._match_text_seq("OF"): 2741 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2742 2743 wait: t.Optional[bool | exp.Expression] = None 2744 if self._match_text_seq("NOWAIT"): 2745 wait = True 2746 elif self._match_text_seq("WAIT"): 2747 wait = self._parse_primary() 2748 elif self._match_text_seq("SKIP", "LOCKED"): 2749 wait = False 2750 2751 locks.append( 2752 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2753 ) 2754 2755 return locks 2756 2757 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2758 if not self._match_set(self.SET_OPERATIONS): 2759 return this 2760 2761 token_type = self._prev.token_type 2762 2763 if token_type == TokenType.UNION: 2764 expression = exp.Union 2765 elif token_type == TokenType.EXCEPT: 2766 expression = exp.Except 2767 else: 2768 expression = exp.Intersect 2769 2770 return self.expression( 2771 expression, 2772 this=this, 2773 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2774 expression=self._parse_set_operations(self._parse_select(nested=True)), 2775 ) 2776 2777 def _parse_expression(self) -> t.Optional[exp.Expression]: 2778 return self._parse_alias(self._parse_conjunction()) 2779 2780 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2781 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2782 2783 def _parse_equality(self) -> t.Optional[exp.Expression]: 2784 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2785 2786 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2787 return self._parse_tokens(self._parse_range, self.COMPARISON) 2788 2789 def _parse_range(self) -> t.Optional[exp.Expression]: 2790 this = self._parse_bitwise() 2791 negate = self._match(TokenType.NOT) 2792 2793 if self._match_set(self.RANGE_PARSERS): 2794 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2795 if not expression: 2796 return this 2797 2798 this = expression 2799 elif self._match(TokenType.ISNULL): 2800 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2801 2802 # Postgres supports ISNULL and NOTNULL for conditions. 2803 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2804 if self._match(TokenType.NOTNULL): 2805 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2806 this = self.expression(exp.Not, this=this) 2807 2808 if negate: 2809 this = self.expression(exp.Not, this=this) 2810 2811 if self._match(TokenType.IS): 2812 this = self._parse_is(this) 2813 2814 return this 2815 2816 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2817 index = self._index - 1 2818 negate = self._match(TokenType.NOT) 2819 2820 if self._match_text_seq("DISTINCT", "FROM"): 2821 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2822 return self.expression(klass, this=this, expression=self._parse_expression()) 2823 2824 expression = self._parse_null() or self._parse_boolean() 2825 if not expression: 2826 self._retreat(index) 2827 return None 2828 2829 this = self.expression(exp.Is, this=this, expression=expression) 2830 return self.expression(exp.Not, this=this) if negate else this 2831 2832 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2833 unnest = self._parse_unnest(with_alias=False) 2834 if unnest: 2835 this = self.expression(exp.In, this=this, unnest=unnest) 2836 elif self._match(TokenType.L_PAREN): 2837 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2838 2839 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2840 this = self.expression(exp.In, this=this, query=expressions[0]) 2841 else: 2842 this = self.expression(exp.In, this=this, expressions=expressions) 2843 2844 self._match_r_paren(this) 2845 else: 2846 this = self.expression(exp.In, this=this, field=self._parse_field()) 2847 2848 return this 2849 2850 def _parse_between(self, this: exp.Expression) -> exp.Between: 2851 low = self._parse_bitwise() 2852 self._match(TokenType.AND) 2853 high = self._parse_bitwise() 2854 return self.expression(exp.Between, this=this, low=low, high=high) 2855 2856 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2857 if not self._match(TokenType.ESCAPE): 2858 return this 2859 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2860 2861 def _parse_interval(self) -> t.Optional[exp.Interval]: 2862 if not self._match(TokenType.INTERVAL): 2863 return None 2864 2865 this = self._parse_primary() or self._parse_term() 2866 unit = self._parse_function() or self._parse_var() 2867 2868 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2869 # each INTERVAL expression into this canonical form so it's easy to transpile 2870 if this and this.is_number: 2871 this = exp.Literal.string(this.name) 2872 elif this and this.is_string: 2873 parts = this.name.split() 2874 2875 if len(parts) == 2: 2876 if unit: 2877 # this is not actually a unit, it's something else 2878 unit = None 2879 self._retreat(self._index - 1) 2880 else: 2881 this = exp.Literal.string(parts[0]) 2882 unit = self.expression(exp.Var, this=parts[1]) 2883 2884 return self.expression(exp.Interval, this=this, unit=unit) 2885 2886 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2887 this = self._parse_term() 2888 2889 while True: 2890 if self._match_set(self.BITWISE): 2891 this = self.expression( 2892 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2893 ) 2894 elif self._match_pair(TokenType.LT, TokenType.LT): 2895 this = self.expression( 2896 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2897 ) 2898 elif self._match_pair(TokenType.GT, TokenType.GT): 2899 this = self.expression( 2900 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2901 ) 2902 else: 2903 break 2904 2905 return this 2906 2907 def _parse_term(self) -> t.Optional[exp.Expression]: 2908 return self._parse_tokens(self._parse_factor, self.TERM) 2909 2910 def _parse_factor(self) -> t.Optional[exp.Expression]: 2911 return self._parse_tokens(self._parse_unary, self.FACTOR) 2912 2913 def _parse_unary(self) -> t.Optional[exp.Expression]: 2914 if self._match_set(self.UNARY_PARSERS): 2915 return self.UNARY_PARSERS[self._prev.token_type](self) 2916 return self._parse_at_time_zone(self._parse_type()) 2917 2918 def _parse_type(self) -> t.Optional[exp.Expression]: 2919 interval = self._parse_interval() 2920 if interval: 2921 return interval 2922 2923 index = self._index 2924 data_type = self._parse_types(check_func=True) 2925 this = self._parse_column() 2926 2927 if data_type: 2928 if isinstance(this, exp.Literal): 2929 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2930 if parser: 2931 return parser(self, this, data_type) 2932 return self.expression(exp.Cast, this=this, to=data_type) 2933 if not data_type.expressions: 2934 self._retreat(index) 2935 return self._parse_column() 2936 return self._parse_column_ops(data_type) 2937 2938 return this 2939 2940 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2941 this = self._parse_type() 2942 if not this: 2943 return None 2944 2945 return self.expression( 2946 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2947 ) 2948 2949 def _parse_types( 2950 self, check_func: bool = False, schema: bool = False 2951 ) -> t.Optional[exp.Expression]: 2952 index = self._index 2953 2954 prefix = self._match_text_seq("SYSUDTLIB", ".") 2955 2956 if not self._match_set(self.TYPE_TOKENS): 2957 return None 2958 2959 type_token = self._prev.token_type 2960 2961 if type_token == TokenType.PSEUDO_TYPE: 2962 return self.expression(exp.PseudoType, this=self._prev.text) 2963 2964 nested = type_token in self.NESTED_TYPE_TOKENS 2965 is_struct = type_token == TokenType.STRUCT 2966 expressions = None 2967 maybe_func = False 2968 2969 if self._match(TokenType.L_PAREN): 2970 if is_struct: 2971 expressions = self._parse_csv(self._parse_struct_types) 2972 elif nested: 2973 expressions = self._parse_csv( 2974 lambda: self._parse_types(check_func=check_func, schema=schema) 2975 ) 2976 elif type_token in self.ENUM_TYPE_TOKENS: 2977 expressions = self._parse_csv(self._parse_primary) 2978 else: 2979 expressions = self._parse_csv(self._parse_type_size) 2980 2981 if not expressions or not self._match(TokenType.R_PAREN): 2982 self._retreat(index) 2983 return None 2984 2985 maybe_func = True 2986 2987 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2988 this = exp.DataType( 2989 this=exp.DataType.Type.ARRAY, 2990 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2991 nested=True, 2992 ) 2993 2994 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2995 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2996 2997 return this 2998 2999 if self._match(TokenType.L_BRACKET): 3000 self._retreat(index) 3001 return None 3002 3003 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3004 if nested and self._match(TokenType.LT): 3005 if is_struct: 3006 expressions = self._parse_csv(self._parse_struct_types) 3007 else: 3008 expressions = self._parse_csv( 3009 lambda: self._parse_types(check_func=check_func, schema=schema) 3010 ) 3011 3012 if not self._match(TokenType.GT): 3013 self.raise_error("Expecting >") 3014 3015 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3016 values = self._parse_csv(self._parse_conjunction) 3017 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3018 3019 value: t.Optional[exp.Expression] = None 3020 if type_token in self.TIMESTAMPS: 3021 if self._match_text_seq("WITH", "TIME", "ZONE"): 3022 maybe_func = False 3023 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3024 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3025 maybe_func = False 3026 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3027 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3028 maybe_func = False 3029 elif type_token == TokenType.INTERVAL: 3030 unit = self._parse_var() 3031 3032 if not unit: 3033 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3034 else: 3035 value = self.expression(exp.Interval, unit=unit) 3036 3037 if maybe_func and check_func: 3038 index2 = self._index 3039 peek = self._parse_string() 3040 3041 if not peek: 3042 self._retreat(index) 3043 return None 3044 3045 self._retreat(index2) 3046 3047 if value: 3048 return value 3049 3050 return exp.DataType( 3051 this=exp.DataType.Type[type_token.value.upper()], 3052 expressions=expressions, 3053 nested=nested, 3054 values=values, 3055 prefix=prefix, 3056 ) 3057 3058 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3059 this = self._parse_type() or self._parse_id_var() 3060 self._match(TokenType.COLON) 3061 return self._parse_column_def(this) 3062 3063 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3064 if not self._match_text_seq("AT", "TIME", "ZONE"): 3065 return this 3066 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3067 3068 def _parse_column(self) -> t.Optional[exp.Expression]: 3069 this = self._parse_field() 3070 if isinstance(this, exp.Identifier): 3071 this = self.expression(exp.Column, this=this) 3072 elif not this: 3073 return self._parse_bracket(this) 3074 return self._parse_column_ops(this) 3075 3076 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3077 this = self._parse_bracket(this) 3078 3079 while self._match_set(self.COLUMN_OPERATORS): 3080 op_token = self._prev.token_type 3081 op = self.COLUMN_OPERATORS.get(op_token) 3082 3083 if op_token == TokenType.DCOLON: 3084 field = self._parse_types() 3085 if not field: 3086 self.raise_error("Expected type") 3087 elif op and self._curr: 3088 self._advance() 3089 value = self._prev.text 3090 field = ( 3091 exp.Literal.number(value) 3092 if self._prev.token_type == TokenType.NUMBER 3093 else exp.Literal.string(value) 3094 ) 3095 else: 3096 field = self._parse_field(anonymous_func=True, any_token=True) 3097 3098 if isinstance(field, exp.Func): 3099 # bigquery allows function calls like x.y.count(...) 3100 # SAFE.SUBSTR(...) 3101 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3102 this = self._replace_columns_with_dots(this) 3103 3104 if op: 3105 this = op(self, this, field) 3106 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3107 this = self.expression( 3108 exp.Column, 3109 this=field, 3110 table=this.this, 3111 db=this.args.get("table"), 3112 catalog=this.args.get("db"), 3113 ) 3114 else: 3115 this = self.expression(exp.Dot, this=this, expression=field) 3116 this = self._parse_bracket(this) 3117 return this 3118 3119 def _parse_primary(self) -> t.Optional[exp.Expression]: 3120 if self._match_set(self.PRIMARY_PARSERS): 3121 token_type = self._prev.token_type 3122 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3123 3124 if token_type == TokenType.STRING: 3125 expressions = [primary] 3126 while self._match(TokenType.STRING): 3127 expressions.append(exp.Literal.string(self._prev.text)) 3128 3129 if len(expressions) > 1: 3130 return self.expression(exp.Concat, expressions=expressions) 3131 3132 return primary 3133 3134 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3135 return exp.Literal.number(f"0.{self._prev.text}") 3136 3137 if self._match(TokenType.L_PAREN): 3138 comments = self._prev_comments 3139 query = self._parse_select() 3140 3141 if query: 3142 expressions = [query] 3143 else: 3144 expressions = self._parse_csv(self._parse_expression) 3145 3146 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3147 3148 if isinstance(this, exp.Subqueryable): 3149 this = self._parse_set_operations( 3150 self._parse_subquery(this=this, parse_alias=False) 3151 ) 3152 elif len(expressions) > 1: 3153 this = self.expression(exp.Tuple, expressions=expressions) 3154 else: 3155 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3156 3157 if this: 3158 this.add_comments(comments) 3159 3160 self._match_r_paren(expression=this) 3161 return this 3162 3163 return None 3164 3165 def _parse_field( 3166 self, 3167 any_token: bool = False, 3168 tokens: t.Optional[t.Collection[TokenType]] = None, 3169 anonymous_func: bool = False, 3170 ) -> t.Optional[exp.Expression]: 3171 return ( 3172 self._parse_primary() 3173 or self._parse_function(anonymous=anonymous_func) 3174 or self._parse_id_var(any_token=any_token, tokens=tokens) 3175 ) 3176 3177 def _parse_function( 3178 self, 3179 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3180 anonymous: bool = False, 3181 optional_parens: bool = True, 3182 ) -> t.Optional[exp.Expression]: 3183 if not self._curr: 3184 return None 3185 3186 token_type = self._curr.token_type 3187 3188 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3189 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3190 3191 if not self._next or self._next.token_type != TokenType.L_PAREN: 3192 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3193 self._advance() 3194 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3195 3196 return None 3197 3198 if token_type not in self.FUNC_TOKENS: 3199 return None 3200 3201 this = self._curr.text 3202 upper = this.upper() 3203 self._advance(2) 3204 3205 parser = self.FUNCTION_PARSERS.get(upper) 3206 3207 if parser and not anonymous: 3208 this = parser(self) 3209 else: 3210 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3211 3212 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3213 this = self.expression(subquery_predicate, this=self._parse_select()) 3214 self._match_r_paren() 3215 return this 3216 3217 if functions is None: 3218 functions = self.FUNCTIONS 3219 3220 function = functions.get(upper) 3221 3222 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3223 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3224 3225 if function and not anonymous: 3226 this = self.validate_expression(function(args), args) 3227 else: 3228 this = self.expression(exp.Anonymous, this=this, expressions=args) 3229 3230 self._match_r_paren(this) 3231 return self._parse_window(this) 3232 3233 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3234 return self._parse_column_def(self._parse_id_var()) 3235 3236 def _parse_user_defined_function( 3237 self, kind: t.Optional[TokenType] = None 3238 ) -> t.Optional[exp.Expression]: 3239 this = self._parse_id_var() 3240 3241 while self._match(TokenType.DOT): 3242 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3243 3244 if not self._match(TokenType.L_PAREN): 3245 return this 3246 3247 expressions = self._parse_csv(self._parse_function_parameter) 3248 self._match_r_paren() 3249 return self.expression( 3250 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3251 ) 3252 3253 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3254 literal = self._parse_primary() 3255 if literal: 3256 return self.expression(exp.Introducer, this=token.text, expression=literal) 3257 3258 return self.expression(exp.Identifier, this=token.text) 3259 3260 def _parse_session_parameter(self) -> exp.SessionParameter: 3261 kind = None 3262 this = self._parse_id_var() or self._parse_primary() 3263 3264 if this and self._match(TokenType.DOT): 3265 kind = this.name 3266 this = self._parse_var() or self._parse_primary() 3267 3268 return self.expression(exp.SessionParameter, this=this, kind=kind) 3269 3270 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3271 index = self._index 3272 3273 if self._match(TokenType.L_PAREN): 3274 expressions = self._parse_csv(self._parse_id_var) 3275 3276 if not self._match(TokenType.R_PAREN): 3277 self._retreat(index) 3278 else: 3279 expressions = [self._parse_id_var()] 3280 3281 if self._match_set(self.LAMBDAS): 3282 return self.LAMBDAS[self._prev.token_type](self, expressions) 3283 3284 self._retreat(index) 3285 3286 this: t.Optional[exp.Expression] 3287 3288 if self._match(TokenType.DISTINCT): 3289 this = self.expression( 3290 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3291 ) 3292 else: 3293 this = self._parse_select_or_expression(alias=alias) 3294 3295 if isinstance(this, exp.EQ): 3296 left = this.this 3297 if isinstance(left, exp.Column): 3298 left.replace(exp.var(left.text("this"))) 3299 3300 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3301 3302 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3303 index = self._index 3304 3305 if not self.errors: 3306 try: 3307 if self._parse_select(nested=True): 3308 return this 3309 except ParseError: 3310 pass 3311 finally: 3312 self.errors.clear() 3313 self._retreat(index) 3314 3315 if not self._match(TokenType.L_PAREN): 3316 return this 3317 3318 args = self._parse_csv( 3319 lambda: self._parse_constraint() 3320 or self._parse_column_def(self._parse_field(any_token=True)) 3321 ) 3322 3323 self._match_r_paren() 3324 return self.expression(exp.Schema, this=this, expressions=args) 3325 3326 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3327 # column defs are not really columns, they're identifiers 3328 if isinstance(this, exp.Column): 3329 this = this.this 3330 3331 kind = self._parse_types(schema=True) 3332 3333 if self._match_text_seq("FOR", "ORDINALITY"): 3334 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3335 3336 constraints = [] 3337 while True: 3338 constraint = self._parse_column_constraint() 3339 if not constraint: 3340 break 3341 constraints.append(constraint) 3342 3343 if not kind and not constraints: 3344 return this 3345 3346 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3347 3348 def _parse_auto_increment( 3349 self, 3350 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3351 start = None 3352 increment = None 3353 3354 if self._match(TokenType.L_PAREN, advance=False): 3355 args = self._parse_wrapped_csv(self._parse_bitwise) 3356 start = seq_get(args, 0) 3357 increment = seq_get(args, 1) 3358 elif self._match_text_seq("START"): 3359 start = self._parse_bitwise() 3360 self._match_text_seq("INCREMENT") 3361 increment = self._parse_bitwise() 3362 3363 if start and increment: 3364 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3365 3366 return exp.AutoIncrementColumnConstraint() 3367 3368 def _parse_compress(self) -> exp.CompressColumnConstraint: 3369 if self._match(TokenType.L_PAREN, advance=False): 3370 return self.expression( 3371 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3372 ) 3373 3374 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3375 3376 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3377 if self._match_text_seq("BY", "DEFAULT"): 3378 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3379 this = self.expression( 3380 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3381 ) 3382 else: 3383 self._match_text_seq("ALWAYS") 3384 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3385 3386 self._match(TokenType.ALIAS) 3387 identity = self._match_text_seq("IDENTITY") 3388 3389 if self._match(TokenType.L_PAREN): 3390 if self._match_text_seq("START", "WITH"): 3391 this.set("start", self._parse_bitwise()) 3392 if self._match_text_seq("INCREMENT", "BY"): 3393 this.set("increment", self._parse_bitwise()) 3394 if self._match_text_seq("MINVALUE"): 3395 this.set("minvalue", self._parse_bitwise()) 3396 if self._match_text_seq("MAXVALUE"): 3397 this.set("maxvalue", self._parse_bitwise()) 3398 3399 if self._match_text_seq("CYCLE"): 3400 this.set("cycle", True) 3401 elif self._match_text_seq("NO", "CYCLE"): 3402 this.set("cycle", False) 3403 3404 if not identity: 3405 this.set("expression", self._parse_bitwise()) 3406 3407 self._match_r_paren() 3408 3409 return this 3410 3411 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3412 self._match_text_seq("LENGTH") 3413 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3414 3415 def _parse_not_constraint( 3416 self, 3417 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3418 if self._match_text_seq("NULL"): 3419 return self.expression(exp.NotNullColumnConstraint) 3420 if self._match_text_seq("CASESPECIFIC"): 3421 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3422 return None 3423 3424 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3425 if self._match(TokenType.CONSTRAINT): 3426 this = self._parse_id_var() 3427 else: 3428 this = None 3429 3430 if self._match_texts(self.CONSTRAINT_PARSERS): 3431 return self.expression( 3432 exp.ColumnConstraint, 3433 this=this, 3434 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3435 ) 3436 3437 return this 3438 3439 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3440 if not self._match(TokenType.CONSTRAINT): 3441 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3442 3443 this = self._parse_id_var() 3444 expressions = [] 3445 3446 while True: 3447 constraint = self._parse_unnamed_constraint() or self._parse_function() 3448 if not constraint: 3449 break 3450 expressions.append(constraint) 3451 3452 return self.expression(exp.Constraint, this=this, expressions=expressions) 3453 3454 def _parse_unnamed_constraint( 3455 self, constraints: t.Optional[t.Collection[str]] = None 3456 ) -> t.Optional[exp.Expression]: 3457 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3458 return None 3459 3460 constraint = self._prev.text.upper() 3461 if constraint not in self.CONSTRAINT_PARSERS: 3462 self.raise_error(f"No parser found for schema constraint {constraint}.") 3463 3464 return self.CONSTRAINT_PARSERS[constraint](self) 3465 3466 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3467 self._match_text_seq("KEY") 3468 return self.expression( 3469 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3470 ) 3471 3472 def _parse_key_constraint_options(self) -> t.List[str]: 3473 options = [] 3474 while True: 3475 if not self._curr: 3476 break 3477 3478 if self._match(TokenType.ON): 3479 action = None 3480 on = self._advance_any() and self._prev.text 3481 3482 if self._match_text_seq("NO", "ACTION"): 3483 action = "NO ACTION" 3484 elif self._match_text_seq("CASCADE"): 3485 action = "CASCADE" 3486 elif self._match_pair(TokenType.SET, TokenType.NULL): 3487 action = "SET NULL" 3488 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3489 action = "SET DEFAULT" 3490 else: 3491 self.raise_error("Invalid key constraint") 3492 3493 options.append(f"ON {on} {action}") 3494 elif self._match_text_seq("NOT", "ENFORCED"): 3495 options.append("NOT ENFORCED") 3496 elif self._match_text_seq("DEFERRABLE"): 3497 options.append("DEFERRABLE") 3498 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3499 options.append("INITIALLY DEFERRED") 3500 elif self._match_text_seq("NORELY"): 3501 options.append("NORELY") 3502 elif self._match_text_seq("MATCH", "FULL"): 3503 options.append("MATCH FULL") 3504 else: 3505 break 3506 3507 return options 3508 3509 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3510 if match and not self._match(TokenType.REFERENCES): 3511 return None 3512 3513 expressions = None 3514 this = self._parse_id_var() 3515 3516 if self._match(TokenType.L_PAREN, advance=False): 3517 expressions = self._parse_wrapped_id_vars() 3518 3519 options = self._parse_key_constraint_options() 3520 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3521 3522 def _parse_foreign_key(self) -> exp.ForeignKey: 3523 expressions = self._parse_wrapped_id_vars() 3524 reference = self._parse_references() 3525 options = {} 3526 3527 while self._match(TokenType.ON): 3528 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3529 self.raise_error("Expected DELETE or UPDATE") 3530 3531 kind = self._prev.text.lower() 3532 3533 if self._match_text_seq("NO", "ACTION"): 3534 action = "NO ACTION" 3535 elif self._match(TokenType.SET): 3536 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3537 action = "SET " + self._prev.text.upper() 3538 else: 3539 self._advance() 3540 action = self._prev.text.upper() 3541 3542 options[kind] = action 3543 3544 return self.expression( 3545 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3546 ) 3547 3548 def _parse_primary_key( 3549 self, wrapped_optional: bool = False, in_props: bool = False 3550 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3551 desc = ( 3552 self._match_set((TokenType.ASC, TokenType.DESC)) 3553 and self._prev.token_type == TokenType.DESC 3554 ) 3555 3556 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3557 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3558 3559 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3560 options = self._parse_key_constraint_options() 3561 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3562 3563 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3564 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3565 return this 3566 3567 bracket_kind = self._prev.token_type 3568 3569 if self._match(TokenType.COLON): 3570 expressions: t.List[t.Optional[exp.Expression]] = [ 3571 self.expression(exp.Slice, expression=self._parse_conjunction()) 3572 ] 3573 else: 3574 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3575 3576 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3577 if bracket_kind == TokenType.L_BRACE: 3578 this = self.expression(exp.Struct, expressions=expressions) 3579 elif not this or this.name.upper() == "ARRAY": 3580 this = self.expression(exp.Array, expressions=expressions) 3581 else: 3582 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3583 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3584 3585 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3586 self.raise_error("Expected ]") 3587 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3588 self.raise_error("Expected }") 3589 3590 self._add_comments(this) 3591 return self._parse_bracket(this) 3592 3593 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3594 if self._match(TokenType.COLON): 3595 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3596 return this 3597 3598 def _parse_case(self) -> t.Optional[exp.Expression]: 3599 ifs = [] 3600 default = None 3601 3602 expression = self._parse_conjunction() 3603 3604 while self._match(TokenType.WHEN): 3605 this = self._parse_conjunction() 3606 self._match(TokenType.THEN) 3607 then = self._parse_conjunction() 3608 ifs.append(self.expression(exp.If, this=this, true=then)) 3609 3610 if self._match(TokenType.ELSE): 3611 default = self._parse_conjunction() 3612 3613 if not self._match(TokenType.END): 3614 self.raise_error("Expected END after CASE", self._prev) 3615 3616 return self._parse_window( 3617 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3618 ) 3619 3620 def _parse_if(self) -> t.Optional[exp.Expression]: 3621 if self._match(TokenType.L_PAREN): 3622 args = self._parse_csv(self._parse_conjunction) 3623 this = self.validate_expression(exp.If.from_arg_list(args), args) 3624 self._match_r_paren() 3625 else: 3626 index = self._index - 1 3627 condition = self._parse_conjunction() 3628 3629 if not condition: 3630 self._retreat(index) 3631 return None 3632 3633 self._match(TokenType.THEN) 3634 true = self._parse_conjunction() 3635 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3636 self._match(TokenType.END) 3637 this = self.expression(exp.If, this=condition, true=true, false=false) 3638 3639 return self._parse_window(this) 3640 3641 def _parse_extract(self) -> exp.Extract: 3642 this = self._parse_function() or self._parse_var() or self._parse_type() 3643 3644 if self._match(TokenType.FROM): 3645 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3646 3647 if not self._match(TokenType.COMMA): 3648 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3649 3650 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3651 3652 def _parse_cast(self, strict: bool) -> exp.Expression: 3653 this = self._parse_conjunction() 3654 3655 if not self._match(TokenType.ALIAS): 3656 if self._match(TokenType.COMMA): 3657 return self.expression( 3658 exp.CastToStrType, this=this, expression=self._parse_string() 3659 ) 3660 else: 3661 self.raise_error("Expected AS after CAST") 3662 3663 to = self._parse_types() 3664 3665 if not to: 3666 self.raise_error("Expected TYPE after CAST") 3667 elif to.this == exp.DataType.Type.CHAR: 3668 if self._match(TokenType.CHARACTER_SET): 3669 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3670 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3671 fmt = self._parse_string() 3672 3673 return self.expression( 3674 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3675 this=this, 3676 format=exp.Literal.string( 3677 format_time( 3678 fmt.this if fmt else "", 3679 self.FORMAT_MAPPING or self.TIME_MAPPING, 3680 self.FORMAT_TRIE or self.TIME_TRIE, 3681 ) 3682 ), 3683 ) 3684 3685 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3686 3687 def _parse_concat(self) -> t.Optional[exp.Expression]: 3688 args = self._parse_csv(self._parse_conjunction) 3689 if self.CONCAT_NULL_OUTPUTS_STRING: 3690 args = [ 3691 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3692 for arg in args 3693 if arg 3694 ] 3695 3696 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3697 # we find such a call we replace it with its argument. 3698 if len(args) == 1: 3699 return args[0] 3700 3701 return self.expression( 3702 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3703 ) 3704 3705 def _parse_string_agg(self) -> exp.Expression: 3706 expression: t.Optional[exp.Expression] 3707 3708 if self._match(TokenType.DISTINCT): 3709 args = self._parse_csv(self._parse_conjunction) 3710 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3711 else: 3712 args = self._parse_csv(self._parse_conjunction) 3713 expression = seq_get(args, 0) 3714 3715 index = self._index 3716 if not self._match(TokenType.R_PAREN): 3717 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3718 order = self._parse_order(this=expression) 3719 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3720 3721 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3722 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3723 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3724 if not self._match_text_seq("WITHIN", "GROUP"): 3725 self._retreat(index) 3726 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3727 3728 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3729 order = self._parse_order(this=expression) 3730 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3731 3732 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3733 to: t.Optional[exp.Expression] 3734 this = self._parse_bitwise() 3735 3736 if self._match(TokenType.USING): 3737 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3738 elif self._match(TokenType.COMMA): 3739 to = self._parse_bitwise() 3740 else: 3741 to = None 3742 3743 # Swap the argument order if needed to produce the correct AST 3744 if self.CONVERT_TYPE_FIRST: 3745 this, to = to, this 3746 3747 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3748 3749 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3750 """ 3751 There are generally two variants of the DECODE function: 3752 3753 - DECODE(bin, charset) 3754 - DECODE(expression, search, result [, search, result] ... [, default]) 3755 3756 The second variant will always be parsed into a CASE expression. Note that NULL 3757 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3758 instead of relying on pattern matching. 3759 """ 3760 args = self._parse_csv(self._parse_conjunction) 3761 3762 if len(args) < 3: 3763 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3764 3765 expression, *expressions = args 3766 if not expression: 3767 return None 3768 3769 ifs = [] 3770 for search, result in zip(expressions[::2], expressions[1::2]): 3771 if not search or not result: 3772 return None 3773 3774 if isinstance(search, exp.Literal): 3775 ifs.append( 3776 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3777 ) 3778 elif isinstance(search, exp.Null): 3779 ifs.append( 3780 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3781 ) 3782 else: 3783 cond = exp.or_( 3784 exp.EQ(this=expression.copy(), expression=search), 3785 exp.and_( 3786 exp.Is(this=expression.copy(), expression=exp.Null()), 3787 exp.Is(this=search.copy(), expression=exp.Null()), 3788 copy=False, 3789 ), 3790 copy=False, 3791 ) 3792 ifs.append(exp.If(this=cond, true=result)) 3793 3794 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3795 3796 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3797 self._match_text_seq("KEY") 3798 key = self._parse_field() 3799 self._match(TokenType.COLON) 3800 self._match_text_seq("VALUE") 3801 value = self._parse_field() 3802 3803 if not key and not value: 3804 return None 3805 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3806 3807 def _parse_json_object(self) -> exp.JSONObject: 3808 star = self._parse_star() 3809 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3810 3811 null_handling = None 3812 if self._match_text_seq("NULL", "ON", "NULL"): 3813 null_handling = "NULL ON NULL" 3814 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3815 null_handling = "ABSENT ON NULL" 3816 3817 unique_keys = None 3818 if self._match_text_seq("WITH", "UNIQUE"): 3819 unique_keys = True 3820 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3821 unique_keys = False 3822 3823 self._match_text_seq("KEYS") 3824 3825 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3826 format_json = self._match_text_seq("FORMAT", "JSON") 3827 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3828 3829 return self.expression( 3830 exp.JSONObject, 3831 expressions=expressions, 3832 null_handling=null_handling, 3833 unique_keys=unique_keys, 3834 return_type=return_type, 3835 format_json=format_json, 3836 encoding=encoding, 3837 ) 3838 3839 def _parse_logarithm(self) -> exp.Func: 3840 # Default argument order is base, expression 3841 args = self._parse_csv(self._parse_range) 3842 3843 if len(args) > 1: 3844 if not self.LOG_BASE_FIRST: 3845 args.reverse() 3846 return exp.Log.from_arg_list(args) 3847 3848 return self.expression( 3849 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3850 ) 3851 3852 def _parse_match_against(self) -> exp.MatchAgainst: 3853 expressions = self._parse_csv(self._parse_column) 3854 3855 self._match_text_seq(")", "AGAINST", "(") 3856 3857 this = self._parse_string() 3858 3859 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3860 modifier = "IN NATURAL LANGUAGE MODE" 3861 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3862 modifier = f"{modifier} WITH QUERY EXPANSION" 3863 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3864 modifier = "IN BOOLEAN MODE" 3865 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3866 modifier = "WITH QUERY EXPANSION" 3867 else: 3868 modifier = None 3869 3870 return self.expression( 3871 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3872 ) 3873 3874 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3875 def _parse_open_json(self) -> exp.OpenJSON: 3876 this = self._parse_bitwise() 3877 path = self._match(TokenType.COMMA) and self._parse_string() 3878 3879 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3880 this = self._parse_field(any_token=True) 3881 kind = self._parse_types() 3882 path = self._parse_string() 3883 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3884 3885 return self.expression( 3886 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3887 ) 3888 3889 expressions = None 3890 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3891 self._match_l_paren() 3892 expressions = self._parse_csv(_parse_open_json_column_def) 3893 3894 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3895 3896 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3897 args = self._parse_csv(self._parse_bitwise) 3898 3899 if self._match(TokenType.IN): 3900 return self.expression( 3901 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3902 ) 3903 3904 if haystack_first: 3905 haystack = seq_get(args, 0) 3906 needle = seq_get(args, 1) 3907 else: 3908 needle = seq_get(args, 0) 3909 haystack = seq_get(args, 1) 3910 3911 return self.expression( 3912 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3913 ) 3914 3915 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3916 args = self._parse_csv(self._parse_table) 3917 return exp.JoinHint(this=func_name.upper(), expressions=args) 3918 3919 def _parse_substring(self) -> exp.Substring: 3920 # Postgres supports the form: substring(string [from int] [for int]) 3921 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3922 3923 args = self._parse_csv(self._parse_bitwise) 3924 3925 if self._match(TokenType.FROM): 3926 args.append(self._parse_bitwise()) 3927 if self._match(TokenType.FOR): 3928 args.append(self._parse_bitwise()) 3929 3930 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3931 3932 def _parse_trim(self) -> exp.Trim: 3933 # https://www.w3resource.com/sql/character-functions/trim.php 3934 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3935 3936 position = None 3937 collation = None 3938 3939 if self._match_texts(self.TRIM_TYPES): 3940 position = self._prev.text.upper() 3941 3942 expression = self._parse_bitwise() 3943 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3944 this = self._parse_bitwise() 3945 else: 3946 this = expression 3947 expression = None 3948 3949 if self._match(TokenType.COLLATE): 3950 collation = self._parse_bitwise() 3951 3952 return self.expression( 3953 exp.Trim, this=this, position=position, expression=expression, collation=collation 3954 ) 3955 3956 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3957 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3958 3959 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3960 return self._parse_window(self._parse_id_var(), alias=True) 3961 3962 def _parse_respect_or_ignore_nulls( 3963 self, this: t.Optional[exp.Expression] 3964 ) -> t.Optional[exp.Expression]: 3965 if self._match_text_seq("IGNORE", "NULLS"): 3966 return self.expression(exp.IgnoreNulls, this=this) 3967 if self._match_text_seq("RESPECT", "NULLS"): 3968 return self.expression(exp.RespectNulls, this=this) 3969 return this 3970 3971 def _parse_window( 3972 self, this: t.Optional[exp.Expression], alias: bool = False 3973 ) -> t.Optional[exp.Expression]: 3974 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3975 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3976 self._match_r_paren() 3977 3978 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3979 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3980 if self._match_text_seq("WITHIN", "GROUP"): 3981 order = self._parse_wrapped(self._parse_order) 3982 this = self.expression(exp.WithinGroup, this=this, expression=order) 3983 3984 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3985 # Some dialects choose to implement and some do not. 3986 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3987 3988 # There is some code above in _parse_lambda that handles 3989 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3990 3991 # The below changes handle 3992 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3993 3994 # Oracle allows both formats 3995 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3996 # and Snowflake chose to do the same for familiarity 3997 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3998 this = self._parse_respect_or_ignore_nulls(this) 3999 4000 # bigquery select from window x AS (partition by ...) 4001 if alias: 4002 over = None 4003 self._match(TokenType.ALIAS) 4004 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4005 return this 4006 else: 4007 over = self._prev.text.upper() 4008 4009 if not self._match(TokenType.L_PAREN): 4010 return self.expression( 4011 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4012 ) 4013 4014 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4015 4016 first = self._match(TokenType.FIRST) 4017 if self._match_text_seq("LAST"): 4018 first = False 4019 4020 partition = self._parse_partition_by() 4021 order = self._parse_order() 4022 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4023 4024 if kind: 4025 self._match(TokenType.BETWEEN) 4026 start = self._parse_window_spec() 4027 self._match(TokenType.AND) 4028 end = self._parse_window_spec() 4029 4030 spec = self.expression( 4031 exp.WindowSpec, 4032 kind=kind, 4033 start=start["value"], 4034 start_side=start["side"], 4035 end=end["value"], 4036 end_side=end["side"], 4037 ) 4038 else: 4039 spec = None 4040 4041 self._match_r_paren() 4042 4043 return self.expression( 4044 exp.Window, 4045 this=this, 4046 partition_by=partition, 4047 order=order, 4048 spec=spec, 4049 alias=window_alias, 4050 over=over, 4051 first=first, 4052 ) 4053 4054 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4055 self._match(TokenType.BETWEEN) 4056 4057 return { 4058 "value": ( 4059 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4060 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4061 or self._parse_bitwise() 4062 ), 4063 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4064 } 4065 4066 def _parse_alias( 4067 self, this: t.Optional[exp.Expression], explicit: bool = False 4068 ) -> t.Optional[exp.Expression]: 4069 any_token = self._match(TokenType.ALIAS) 4070 4071 if explicit and not any_token: 4072 return this 4073 4074 if self._match(TokenType.L_PAREN): 4075 aliases = self.expression( 4076 exp.Aliases, 4077 this=this, 4078 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4079 ) 4080 self._match_r_paren(aliases) 4081 return aliases 4082 4083 alias = self._parse_id_var(any_token) 4084 4085 if alias: 4086 return self.expression(exp.Alias, this=this, alias=alias) 4087 4088 return this 4089 4090 def _parse_id_var( 4091 self, 4092 any_token: bool = True, 4093 tokens: t.Optional[t.Collection[TokenType]] = None, 4094 ) -> t.Optional[exp.Expression]: 4095 identifier = self._parse_identifier() 4096 4097 if identifier: 4098 return identifier 4099 4100 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4101 quoted = self._prev.token_type == TokenType.STRING 4102 return exp.Identifier(this=self._prev.text, quoted=quoted) 4103 4104 return None 4105 4106 def _parse_string(self) -> t.Optional[exp.Expression]: 4107 if self._match(TokenType.STRING): 4108 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4109 return self._parse_placeholder() 4110 4111 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4112 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4113 4114 def _parse_number(self) -> t.Optional[exp.Expression]: 4115 if self._match(TokenType.NUMBER): 4116 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4117 return self._parse_placeholder() 4118 4119 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4120 if self._match(TokenType.IDENTIFIER): 4121 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4122 return self._parse_placeholder() 4123 4124 def _parse_var( 4125 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4126 ) -> t.Optional[exp.Expression]: 4127 if ( 4128 (any_token and self._advance_any()) 4129 or self._match(TokenType.VAR) 4130 or (self._match_set(tokens) if tokens else False) 4131 ): 4132 return self.expression(exp.Var, this=self._prev.text) 4133 return self._parse_placeholder() 4134 4135 def _advance_any(self) -> t.Optional[Token]: 4136 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4137 self._advance() 4138 return self._prev 4139 return None 4140 4141 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4142 return self._parse_var() or self._parse_string() 4143 4144 def _parse_null(self) -> t.Optional[exp.Expression]: 4145 if self._match(TokenType.NULL): 4146 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4147 return None 4148 4149 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4150 if self._match(TokenType.TRUE): 4151 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4152 if self._match(TokenType.FALSE): 4153 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4154 return None 4155 4156 def _parse_star(self) -> t.Optional[exp.Expression]: 4157 if self._match(TokenType.STAR): 4158 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4159 return None 4160 4161 def _parse_parameter(self) -> exp.Parameter: 4162 wrapped = self._match(TokenType.L_BRACE) 4163 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4164 self._match(TokenType.R_BRACE) 4165 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4166 4167 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4168 if self._match_set(self.PLACEHOLDER_PARSERS): 4169 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4170 if placeholder: 4171 return placeholder 4172 self._advance(-1) 4173 return None 4174 4175 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4176 if not self._match(TokenType.EXCEPT): 4177 return None 4178 if self._match(TokenType.L_PAREN, advance=False): 4179 return self._parse_wrapped_csv(self._parse_column) 4180 return self._parse_csv(self._parse_column) 4181 4182 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4183 if not self._match(TokenType.REPLACE): 4184 return None 4185 if self._match(TokenType.L_PAREN, advance=False): 4186 return self._parse_wrapped_csv(self._parse_expression) 4187 return self._parse_csv(self._parse_expression) 4188 4189 def _parse_csv( 4190 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4191 ) -> t.List[t.Optional[exp.Expression]]: 4192 parse_result = parse_method() 4193 items = [parse_result] if parse_result is not None else [] 4194 4195 while self._match(sep): 4196 self._add_comments(parse_result) 4197 parse_result = parse_method() 4198 if parse_result is not None: 4199 items.append(parse_result) 4200 4201 return items 4202 4203 def _parse_tokens( 4204 self, parse_method: t.Callable, expressions: t.Dict 4205 ) -> t.Optional[exp.Expression]: 4206 this = parse_method() 4207 4208 while self._match_set(expressions): 4209 this = self.expression( 4210 expressions[self._prev.token_type], 4211 this=this, 4212 comments=self._prev_comments, 4213 expression=parse_method(), 4214 ) 4215 4216 return this 4217 4218 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4219 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4220 4221 def _parse_wrapped_csv( 4222 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4223 ) -> t.List[t.Optional[exp.Expression]]: 4224 return self._parse_wrapped( 4225 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4226 ) 4227 4228 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4229 wrapped = self._match(TokenType.L_PAREN) 4230 if not wrapped and not optional: 4231 self.raise_error("Expecting (") 4232 parse_result = parse_method() 4233 if wrapped: 4234 self._match_r_paren() 4235 return parse_result 4236 4237 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4238 return self._parse_select() or self._parse_set_operations( 4239 self._parse_expression() if alias else self._parse_conjunction() 4240 ) 4241 4242 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4243 return self._parse_query_modifiers( 4244 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4245 ) 4246 4247 def _parse_transaction(self) -> exp.Transaction: 4248 this = None 4249 if self._match_texts(self.TRANSACTION_KIND): 4250 this = self._prev.text 4251 4252 self._match_texts({"TRANSACTION", "WORK"}) 4253 4254 modes = [] 4255 while True: 4256 mode = [] 4257 while self._match(TokenType.VAR): 4258 mode.append(self._prev.text) 4259 4260 if mode: 4261 modes.append(" ".join(mode)) 4262 if not self._match(TokenType.COMMA): 4263 break 4264 4265 return self.expression(exp.Transaction, this=this, modes=modes) 4266 4267 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4268 chain = None 4269 savepoint = None 4270 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4271 4272 self._match_texts({"TRANSACTION", "WORK"}) 4273 4274 if self._match_text_seq("TO"): 4275 self._match_text_seq("SAVEPOINT") 4276 savepoint = self._parse_id_var() 4277 4278 if self._match(TokenType.AND): 4279 chain = not self._match_text_seq("NO") 4280 self._match_text_seq("CHAIN") 4281 4282 if is_rollback: 4283 return self.expression(exp.Rollback, savepoint=savepoint) 4284 4285 return self.expression(exp.Commit, chain=chain) 4286 4287 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4288 if not self._match_text_seq("ADD"): 4289 return None 4290 4291 self._match(TokenType.COLUMN) 4292 exists_column = self._parse_exists(not_=True) 4293 expression = self._parse_column_def(self._parse_field(any_token=True)) 4294 4295 if expression: 4296 expression.set("exists", exists_column) 4297 4298 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4299 if self._match_texts(("FIRST", "AFTER")): 4300 position = self._prev.text 4301 column_position = self.expression( 4302 exp.ColumnPosition, this=self._parse_column(), position=position 4303 ) 4304 expression.set("position", column_position) 4305 4306 return expression 4307 4308 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4309 drop = self._match(TokenType.DROP) and self._parse_drop() 4310 if drop and not isinstance(drop, exp.Command): 4311 drop.set("kind", drop.args.get("kind", "COLUMN")) 4312 return drop 4313 4314 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4315 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4316 return self.expression( 4317 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4318 ) 4319 4320 def _parse_add_constraint(self) -> exp.AddConstraint: 4321 this = None 4322 kind = self._prev.token_type 4323 4324 if kind == TokenType.CONSTRAINT: 4325 this = self._parse_id_var() 4326 4327 if self._match_text_seq("CHECK"): 4328 expression = self._parse_wrapped(self._parse_conjunction) 4329 enforced = self._match_text_seq("ENFORCED") 4330 4331 return self.expression( 4332 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4333 ) 4334 4335 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4336 expression = self._parse_foreign_key() 4337 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4338 expression = self._parse_primary_key() 4339 else: 4340 expression = None 4341 4342 return self.expression(exp.AddConstraint, this=this, expression=expression) 4343 4344 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4345 index = self._index - 1 4346 4347 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4348 return self._parse_csv(self._parse_add_constraint) 4349 4350 self._retreat(index) 4351 return self._parse_csv(self._parse_add_column) 4352 4353 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4354 self._match(TokenType.COLUMN) 4355 column = self._parse_field(any_token=True) 4356 4357 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4358 return self.expression(exp.AlterColumn, this=column, drop=True) 4359 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4360 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4361 4362 self._match_text_seq("SET", "DATA") 4363 return self.expression( 4364 exp.AlterColumn, 4365 this=column, 4366 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4367 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4368 using=self._match(TokenType.USING) and self._parse_conjunction(), 4369 ) 4370 4371 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4372 index = self._index - 1 4373 4374 partition_exists = self._parse_exists() 4375 if self._match(TokenType.PARTITION, advance=False): 4376 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4377 4378 self._retreat(index) 4379 return self._parse_csv(self._parse_drop_column) 4380 4381 def _parse_alter_table_rename(self) -> exp.RenameTable: 4382 self._match_text_seq("TO") 4383 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4384 4385 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4386 start = self._prev 4387 4388 if not self._match(TokenType.TABLE): 4389 return self._parse_as_command(start) 4390 4391 exists = self._parse_exists() 4392 this = self._parse_table(schema=True) 4393 4394 if self._next: 4395 self._advance() 4396 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4397 4398 if parser: 4399 actions = ensure_list(parser(self)) 4400 4401 if not self._curr: 4402 return self.expression( 4403 exp.AlterTable, 4404 this=this, 4405 exists=exists, 4406 actions=actions, 4407 ) 4408 return self._parse_as_command(start) 4409 4410 def _parse_merge(self) -> exp.Merge: 4411 self._match(TokenType.INTO) 4412 target = self._parse_table() 4413 4414 self._match(TokenType.USING) 4415 using = self._parse_table() 4416 4417 self._match(TokenType.ON) 4418 on = self._parse_conjunction() 4419 4420 whens = [] 4421 while self._match(TokenType.WHEN): 4422 matched = not self._match(TokenType.NOT) 4423 self._match_text_seq("MATCHED") 4424 source = ( 4425 False 4426 if self._match_text_seq("BY", "TARGET") 4427 else self._match_text_seq("BY", "SOURCE") 4428 ) 4429 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4430 4431 self._match(TokenType.THEN) 4432 4433 if self._match(TokenType.INSERT): 4434 _this = self._parse_star() 4435 if _this: 4436 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4437 else: 4438 then = self.expression( 4439 exp.Insert, 4440 this=self._parse_value(), 4441 expression=self._match(TokenType.VALUES) and self._parse_value(), 4442 ) 4443 elif self._match(TokenType.UPDATE): 4444 expressions = self._parse_star() 4445 if expressions: 4446 then = self.expression(exp.Update, expressions=expressions) 4447 else: 4448 then = self.expression( 4449 exp.Update, 4450 expressions=self._match(TokenType.SET) 4451 and self._parse_csv(self._parse_equality), 4452 ) 4453 elif self._match(TokenType.DELETE): 4454 then = self.expression(exp.Var, this=self._prev.text) 4455 else: 4456 then = None 4457 4458 whens.append( 4459 self.expression( 4460 exp.When, 4461 matched=matched, 4462 source=source, 4463 condition=condition, 4464 then=then, 4465 ) 4466 ) 4467 4468 return self.expression( 4469 exp.Merge, 4470 this=target, 4471 using=using, 4472 on=on, 4473 expressions=whens, 4474 ) 4475 4476 def _parse_show(self) -> t.Optional[exp.Expression]: 4477 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4478 if parser: 4479 return parser(self) 4480 self._advance() 4481 return self.expression(exp.Show, this=self._prev.text.upper()) 4482 4483 def _parse_set_item_assignment( 4484 self, kind: t.Optional[str] = None 4485 ) -> t.Optional[exp.Expression]: 4486 index = self._index 4487 4488 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4489 return self._parse_set_transaction(global_=kind == "GLOBAL") 4490 4491 left = self._parse_primary() or self._parse_id_var() 4492 4493 if not self._match_texts(("=", "TO")): 4494 self._retreat(index) 4495 return None 4496 4497 right = self._parse_statement() or self._parse_id_var() 4498 this = self.expression(exp.EQ, this=left, expression=right) 4499 4500 return self.expression(exp.SetItem, this=this, kind=kind) 4501 4502 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4503 self._match_text_seq("TRANSACTION") 4504 characteristics = self._parse_csv( 4505 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4506 ) 4507 return self.expression( 4508 exp.SetItem, 4509 expressions=characteristics, 4510 kind="TRANSACTION", 4511 **{"global": global_}, # type: ignore 4512 ) 4513 4514 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4515 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4516 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4517 4518 def _parse_set(self) -> exp.Set | exp.Command: 4519 index = self._index 4520 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4521 4522 if self._curr: 4523 self._retreat(index) 4524 return self._parse_as_command(self._prev) 4525 4526 return set_ 4527 4528 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4529 for option in options: 4530 if self._match_text_seq(*option.split(" ")): 4531 return exp.var(option) 4532 return None 4533 4534 def _parse_as_command(self, start: Token) -> exp.Command: 4535 while self._curr: 4536 self._advance() 4537 text = self._find_sql(start, self._prev) 4538 size = len(start.text) 4539 return exp.Command(this=text[:size], expression=text[size:]) 4540 4541 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4542 settings = [] 4543 4544 self._match_l_paren() 4545 kind = self._parse_id_var() 4546 4547 if self._match(TokenType.L_PAREN): 4548 while True: 4549 key = self._parse_id_var() 4550 value = self._parse_primary() 4551 4552 if not key and value is None: 4553 break 4554 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4555 self._match(TokenType.R_PAREN) 4556 4557 self._match_r_paren() 4558 4559 return self.expression( 4560 exp.DictProperty, 4561 this=this, 4562 kind=kind.this if kind else None, 4563 settings=settings, 4564 ) 4565 4566 def _parse_dict_range(self, this: str) -> exp.DictRange: 4567 self._match_l_paren() 4568 has_min = self._match_text_seq("MIN") 4569 if has_min: 4570 min = self._parse_var() or self._parse_primary() 4571 self._match_text_seq("MAX") 4572 max = self._parse_var() or self._parse_primary() 4573 else: 4574 max = self._parse_var() or self._parse_primary() 4575 min = exp.Literal.number(0) 4576 self._match_r_paren() 4577 return self.expression(exp.DictRange, this=this, min=min, max=max) 4578 4579 def _find_parser( 4580 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4581 ) -> t.Optional[t.Callable]: 4582 if not self._curr: 4583 return None 4584 4585 index = self._index 4586 this = [] 4587 while True: 4588 # The current token might be multiple words 4589 curr = self._curr.text.upper() 4590 key = curr.split(" ") 4591 this.append(curr) 4592 4593 self._advance() 4594 result, trie = in_trie(trie, key) 4595 if result == TrieResult.FAILED: 4596 break 4597 4598 if result == TrieResult.EXISTS: 4599 subparser = parsers[" ".join(this)] 4600 return subparser 4601 4602 self._retreat(index) 4603 return None 4604 4605 def _match(self, token_type, advance=True, expression=None): 4606 if not self._curr: 4607 return None 4608 4609 if self._curr.token_type == token_type: 4610 if advance: 4611 self._advance() 4612 self._add_comments(expression) 4613 return True 4614 4615 return None 4616 4617 def _match_set(self, types, advance=True): 4618 if not self._curr: 4619 return None 4620 4621 if self._curr.token_type in types: 4622 if advance: 4623 self._advance() 4624 return True 4625 4626 return None 4627 4628 def _match_pair(self, token_type_a, token_type_b, advance=True): 4629 if not self._curr or not self._next: 4630 return None 4631 4632 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4633 if advance: 4634 self._advance(2) 4635 return True 4636 4637 return None 4638 4639 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4640 if not self._match(TokenType.L_PAREN, expression=expression): 4641 self.raise_error("Expecting (") 4642 4643 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4644 if not self._match(TokenType.R_PAREN, expression=expression): 4645 self.raise_error("Expecting )") 4646 4647 def _match_texts(self, texts, advance=True): 4648 if self._curr and self._curr.text.upper() in texts: 4649 if advance: 4650 self._advance() 4651 return True 4652 return False 4653 4654 def _match_text_seq(self, *texts, advance=True): 4655 index = self._index 4656 for text in texts: 4657 if self._curr and self._curr.text.upper() == text: 4658 self._advance() 4659 else: 4660 self._retreat(index) 4661 return False 4662 4663 if not advance: 4664 self._retreat(index) 4665 4666 return True 4667 4668 @t.overload 4669 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4670 ... 4671 4672 @t.overload 4673 def _replace_columns_with_dots( 4674 self, this: t.Optional[exp.Expression] 4675 ) -> t.Optional[exp.Expression]: 4676 ... 4677 4678 def _replace_columns_with_dots(self, this): 4679 if isinstance(this, exp.Dot): 4680 exp.replace_children(this, self._replace_columns_with_dots) 4681 elif isinstance(this, exp.Column): 4682 exp.replace_children(this, self._replace_columns_with_dots) 4683 table = this.args.get("table") 4684 this = ( 4685 self.expression(exp.Dot, this=table, expression=this.this) 4686 if table 4687 else self.expression(exp.Var, this=this.name) 4688 ) 4689 elif isinstance(this, exp.Identifier): 4690 this = self.expression(exp.Var, this=this.name) 4691 4692 return this 4693 4694 def _replace_lambda( 4695 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4696 ) -> t.Optional[exp.Expression]: 4697 if not node: 4698 return node 4699 4700 for column in node.find_all(exp.Column): 4701 if column.parts[0].name in lambda_variables: 4702 dot_or_id = column.to_dot() if column.table else column.this 4703 parent = column.parent 4704 4705 while isinstance(parent, exp.Dot): 4706 if not isinstance(parent.parent, exp.Dot): 4707 parent.replace(dot_or_id) 4708 break 4709 parent = parent.parent 4710 else: 4711 if column is node: 4712 node = dot_or_id 4713 else: 4714 column.replace(dot_or_id) 4715 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset()
854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 )
Logs or raises any found errors, depending on the chosen error level setting.
954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.