sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 TYPE_TOKENS = { 113 TokenType.BIT, 114 TokenType.BOOLEAN, 115 TokenType.TINYINT, 116 TokenType.UTINYINT, 117 TokenType.SMALLINT, 118 TokenType.USMALLINT, 119 TokenType.INT, 120 TokenType.UINT, 121 TokenType.BIGINT, 122 TokenType.UBIGINT, 123 TokenType.INT128, 124 TokenType.UINT128, 125 TokenType.INT256, 126 TokenType.UINT256, 127 TokenType.FLOAT, 128 TokenType.DOUBLE, 129 TokenType.CHAR, 130 TokenType.NCHAR, 131 TokenType.VARCHAR, 132 TokenType.NVARCHAR, 133 TokenType.TEXT, 134 TokenType.MEDIUMTEXT, 135 TokenType.LONGTEXT, 136 TokenType.MEDIUMBLOB, 137 TokenType.LONGBLOB, 138 TokenType.BINARY, 139 TokenType.VARBINARY, 140 TokenType.JSON, 141 TokenType.JSONB, 142 TokenType.INTERVAL, 143 TokenType.TIME, 144 TokenType.TIMESTAMP, 145 TokenType.TIMESTAMPTZ, 146 TokenType.TIMESTAMPLTZ, 147 TokenType.DATETIME, 148 TokenType.DATETIME64, 149 TokenType.DATE, 150 TokenType.INT4RANGE, 151 TokenType.INT4MULTIRANGE, 152 TokenType.INT8RANGE, 153 TokenType.INT8MULTIRANGE, 154 TokenType.NUMRANGE, 155 TokenType.NUMMULTIRANGE, 156 TokenType.TSRANGE, 157 TokenType.TSMULTIRANGE, 158 TokenType.TSTZRANGE, 159 TokenType.TSTZMULTIRANGE, 160 TokenType.DATERANGE, 161 TokenType.DATEMULTIRANGE, 162 TokenType.DECIMAL, 163 TokenType.BIGDECIMAL, 164 TokenType.UUID, 165 TokenType.GEOGRAPHY, 166 TokenType.GEOMETRY, 167 TokenType.HLLSKETCH, 168 TokenType.HSTORE, 169 TokenType.PSEUDO_TYPE, 170 TokenType.SUPER, 171 TokenType.SERIAL, 172 TokenType.SMALLSERIAL, 173 TokenType.BIGSERIAL, 174 TokenType.XML, 175 TokenType.UNIQUEIDENTIFIER, 176 TokenType.MONEY, 177 TokenType.SMALLMONEY, 178 TokenType.ROWVERSION, 179 TokenType.IMAGE, 180 TokenType.VARIANT, 181 TokenType.OBJECT, 182 TokenType.INET, 183 *NESTED_TYPE_TOKENS, 184 } 185 186 SUBQUERY_PREDICATES = { 187 TokenType.ANY: exp.Any, 188 TokenType.ALL: exp.All, 189 TokenType.EXISTS: exp.Exists, 190 TokenType.SOME: exp.Any, 191 } 192 193 RESERVED_KEYWORDS = { 194 *Tokenizer.SINGLE_TOKENS.values(), 195 TokenType.SELECT, 196 } 197 198 DB_CREATABLES = { 199 TokenType.DATABASE, 200 TokenType.SCHEMA, 201 TokenType.TABLE, 202 TokenType.VIEW, 203 TokenType.DICTIONARY, 204 } 205 206 CREATABLES = { 207 TokenType.COLUMN, 208 TokenType.FUNCTION, 209 TokenType.INDEX, 210 TokenType.PROCEDURE, 211 *DB_CREATABLES, 212 } 213 214 # Tokens that can represent identifiers 215 ID_VAR_TOKENS = { 216 TokenType.VAR, 217 TokenType.ANTI, 218 TokenType.APPLY, 219 TokenType.ASC, 220 TokenType.AUTO_INCREMENT, 221 TokenType.BEGIN, 222 TokenType.CACHE, 223 TokenType.CASE, 224 TokenType.COLLATE, 225 TokenType.COMMAND, 226 TokenType.COMMENT, 227 TokenType.COMMIT, 228 TokenType.CONSTRAINT, 229 TokenType.DEFAULT, 230 TokenType.DELETE, 231 TokenType.DESC, 232 TokenType.DESCRIBE, 233 TokenType.DICTIONARY, 234 TokenType.DIV, 235 TokenType.END, 236 TokenType.EXECUTE, 237 TokenType.ESCAPE, 238 TokenType.FALSE, 239 TokenType.FIRST, 240 TokenType.FILTER, 241 TokenType.FORMAT, 242 TokenType.FULL, 243 TokenType.IF, 244 TokenType.IS, 245 TokenType.ISNULL, 246 TokenType.INTERVAL, 247 TokenType.KEEP, 248 TokenType.LEFT, 249 TokenType.LOAD, 250 TokenType.MERGE, 251 TokenType.NATURAL, 252 TokenType.NEXT, 253 TokenType.OFFSET, 254 TokenType.ORDINALITY, 255 TokenType.OVERWRITE, 256 TokenType.PARTITION, 257 TokenType.PERCENT, 258 TokenType.PIVOT, 259 TokenType.PRAGMA, 260 TokenType.RANGE, 261 TokenType.REFERENCES, 262 TokenType.RIGHT, 263 TokenType.ROW, 264 TokenType.ROWS, 265 TokenType.SEMI, 266 TokenType.SET, 267 TokenType.SETTINGS, 268 TokenType.SHOW, 269 TokenType.TEMPORARY, 270 TokenType.TOP, 271 TokenType.TRUE, 272 TokenType.UNIQUE, 273 TokenType.UNPIVOT, 274 TokenType.UPDATE, 275 TokenType.VOLATILE, 276 TokenType.WINDOW, 277 *CREATABLES, 278 *SUBQUERY_PREDICATES, 279 *TYPE_TOKENS, 280 *NO_PAREN_FUNCTIONS, 281 } 282 283 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 284 285 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 286 TokenType.APPLY, 287 TokenType.ASOF, 288 TokenType.FULL, 289 TokenType.LEFT, 290 TokenType.LOCK, 291 TokenType.NATURAL, 292 TokenType.OFFSET, 293 TokenType.RIGHT, 294 TokenType.WINDOW, 295 } 296 297 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 298 299 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 300 301 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 302 303 FUNC_TOKENS = { 304 TokenType.COMMAND, 305 TokenType.CURRENT_DATE, 306 TokenType.CURRENT_DATETIME, 307 TokenType.CURRENT_TIMESTAMP, 308 TokenType.CURRENT_TIME, 309 TokenType.CURRENT_USER, 310 TokenType.FILTER, 311 TokenType.FIRST, 312 TokenType.FORMAT, 313 TokenType.GLOB, 314 TokenType.IDENTIFIER, 315 TokenType.INDEX, 316 TokenType.ISNULL, 317 TokenType.ILIKE, 318 TokenType.LIKE, 319 TokenType.MERGE, 320 TokenType.OFFSET, 321 TokenType.PRIMARY_KEY, 322 TokenType.RANGE, 323 TokenType.REPLACE, 324 TokenType.ROW, 325 TokenType.UNNEST, 326 TokenType.VAR, 327 TokenType.LEFT, 328 TokenType.RIGHT, 329 TokenType.DATE, 330 TokenType.DATETIME, 331 TokenType.TABLE, 332 TokenType.TIMESTAMP, 333 TokenType.TIMESTAMPTZ, 334 TokenType.WINDOW, 335 *TYPE_TOKENS, 336 *SUBQUERY_PREDICATES, 337 } 338 339 CONJUNCTION = { 340 TokenType.AND: exp.And, 341 TokenType.OR: exp.Or, 342 } 343 344 EQUALITY = { 345 TokenType.EQ: exp.EQ, 346 TokenType.NEQ: exp.NEQ, 347 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 348 } 349 350 COMPARISON = { 351 TokenType.GT: exp.GT, 352 TokenType.GTE: exp.GTE, 353 TokenType.LT: exp.LT, 354 TokenType.LTE: exp.LTE, 355 } 356 357 BITWISE = { 358 TokenType.AMP: exp.BitwiseAnd, 359 TokenType.CARET: exp.BitwiseXor, 360 TokenType.PIPE: exp.BitwiseOr, 361 TokenType.DPIPE: exp.DPipe, 362 } 363 364 TERM = { 365 TokenType.DASH: exp.Sub, 366 TokenType.PLUS: exp.Add, 367 TokenType.MOD: exp.Mod, 368 TokenType.COLLATE: exp.Collate, 369 } 370 371 FACTOR = { 372 TokenType.DIV: exp.IntDiv, 373 TokenType.LR_ARROW: exp.Distance, 374 TokenType.SLASH: exp.Div, 375 TokenType.STAR: exp.Mul, 376 } 377 378 TIMESTAMPS = { 379 TokenType.TIME, 380 TokenType.TIMESTAMP, 381 TokenType.TIMESTAMPTZ, 382 TokenType.TIMESTAMPLTZ, 383 } 384 385 SET_OPERATIONS = { 386 TokenType.UNION, 387 TokenType.INTERSECT, 388 TokenType.EXCEPT, 389 } 390 391 JOIN_METHODS = { 392 TokenType.NATURAL, 393 TokenType.ASOF, 394 } 395 396 JOIN_SIDES = { 397 TokenType.LEFT, 398 TokenType.RIGHT, 399 TokenType.FULL, 400 } 401 402 JOIN_KINDS = { 403 TokenType.INNER, 404 TokenType.OUTER, 405 TokenType.CROSS, 406 TokenType.SEMI, 407 TokenType.ANTI, 408 } 409 410 JOIN_HINTS: t.Set[str] = set() 411 412 LAMBDAS = { 413 TokenType.ARROW: lambda self, expressions: self.expression( 414 exp.Lambda, 415 this=self._replace_lambda( 416 self._parse_conjunction(), 417 {node.name for node in expressions}, 418 ), 419 expressions=expressions, 420 ), 421 TokenType.FARROW: lambda self, expressions: self.expression( 422 exp.Kwarg, 423 this=exp.var(expressions[0].name), 424 expression=self._parse_conjunction(), 425 ), 426 } 427 428 COLUMN_OPERATORS = { 429 TokenType.DOT: None, 430 TokenType.DCOLON: lambda self, this, to: self.expression( 431 exp.Cast if self.STRICT_CAST else exp.TryCast, 432 this=this, 433 to=to, 434 ), 435 TokenType.ARROW: lambda self, this, path: self.expression( 436 exp.JSONExtract, 437 this=this, 438 expression=path, 439 ), 440 TokenType.DARROW: lambda self, this, path: self.expression( 441 exp.JSONExtractScalar, 442 this=this, 443 expression=path, 444 ), 445 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 446 exp.JSONBExtract, 447 this=this, 448 expression=path, 449 ), 450 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 451 exp.JSONBExtractScalar, 452 this=this, 453 expression=path, 454 ), 455 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 456 exp.JSONBContains, 457 this=this, 458 expression=key, 459 ), 460 } 461 462 EXPRESSION_PARSERS = { 463 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 464 exp.Column: lambda self: self._parse_column(), 465 exp.Condition: lambda self: self._parse_conjunction(), 466 exp.DataType: lambda self: self._parse_types(), 467 exp.Expression: lambda self: self._parse_statement(), 468 exp.From: lambda self: self._parse_from(), 469 exp.Group: lambda self: self._parse_group(), 470 exp.Having: lambda self: self._parse_having(), 471 exp.Identifier: lambda self: self._parse_id_var(), 472 exp.Join: lambda self: self._parse_join(), 473 exp.Lambda: lambda self: self._parse_lambda(), 474 exp.Lateral: lambda self: self._parse_lateral(), 475 exp.Limit: lambda self: self._parse_limit(), 476 exp.Offset: lambda self: self._parse_offset(), 477 exp.Order: lambda self: self._parse_order(), 478 exp.Ordered: lambda self: self._parse_ordered(), 479 exp.Properties: lambda self: self._parse_properties(), 480 exp.Qualify: lambda self: self._parse_qualify(), 481 exp.Returning: lambda self: self._parse_returning(), 482 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 483 exp.Table: lambda self: self._parse_table_parts(), 484 exp.TableAlias: lambda self: self._parse_table_alias(), 485 exp.Where: lambda self: self._parse_where(), 486 exp.Window: lambda self: self._parse_named_window(), 487 exp.With: lambda self: self._parse_with(), 488 "JOIN_TYPE": lambda self: self._parse_join_parts(), 489 } 490 491 STATEMENT_PARSERS = { 492 TokenType.ALTER: lambda self: self._parse_alter(), 493 TokenType.BEGIN: lambda self: self._parse_transaction(), 494 TokenType.CACHE: lambda self: self._parse_cache(), 495 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 496 TokenType.COMMENT: lambda self: self._parse_comment(), 497 TokenType.CREATE: lambda self: self._parse_create(), 498 TokenType.DELETE: lambda self: self._parse_delete(), 499 TokenType.DESC: lambda self: self._parse_describe(), 500 TokenType.DESCRIBE: lambda self: self._parse_describe(), 501 TokenType.DROP: lambda self: self._parse_drop(), 502 TokenType.END: lambda self: self._parse_commit_or_rollback(), 503 TokenType.FROM: lambda self: exp.select("*").from_( 504 t.cast(exp.From, self._parse_from(skip_from_token=True)) 505 ), 506 TokenType.INSERT: lambda self: self._parse_insert(), 507 TokenType.LOAD: lambda self: self._parse_load(), 508 TokenType.MERGE: lambda self: self._parse_merge(), 509 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 510 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 511 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 512 TokenType.SET: lambda self: self._parse_set(), 513 TokenType.UNCACHE: lambda self: self._parse_uncache(), 514 TokenType.UPDATE: lambda self: self._parse_update(), 515 TokenType.USE: lambda self: self.expression( 516 exp.Use, 517 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 518 and exp.var(self._prev.text), 519 this=self._parse_table(schema=False), 520 ), 521 } 522 523 UNARY_PARSERS = { 524 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 525 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 526 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 527 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 528 } 529 530 PRIMARY_PARSERS = { 531 TokenType.STRING: lambda self, token: self.expression( 532 exp.Literal, this=token.text, is_string=True 533 ), 534 TokenType.NUMBER: lambda self, token: self.expression( 535 exp.Literal, this=token.text, is_string=False 536 ), 537 TokenType.STAR: lambda self, _: self.expression( 538 exp.Star, 539 **{"except": self._parse_except(), "replace": self._parse_replace()}, 540 ), 541 TokenType.NULL: lambda self, _: self.expression(exp.Null), 542 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 543 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 544 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 545 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 546 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 547 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 548 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 549 exp.National, this=token.text 550 ), 551 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 552 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 553 } 554 555 PLACEHOLDER_PARSERS = { 556 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 557 TokenType.PARAMETER: lambda self: self._parse_parameter(), 558 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 559 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 560 else None, 561 } 562 563 RANGE_PARSERS = { 564 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 565 TokenType.GLOB: binary_range_parser(exp.Glob), 566 TokenType.ILIKE: binary_range_parser(exp.ILike), 567 TokenType.IN: lambda self, this: self._parse_in(this), 568 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 569 TokenType.IS: lambda self, this: self._parse_is(this), 570 TokenType.LIKE: binary_range_parser(exp.Like), 571 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 572 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 573 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 574 } 575 576 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 577 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 578 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 579 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 580 "CHARACTER SET": lambda self: self._parse_character_set(), 581 "CHECKSUM": lambda self: self._parse_checksum(), 582 "CLUSTER": lambda self: self._parse_cluster(), 583 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 584 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 585 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 586 "DEFINER": lambda self: self._parse_definer(), 587 "DETERMINISTIC": lambda self: self.expression( 588 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 589 ), 590 "DISTKEY": lambda self: self._parse_distkey(), 591 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 592 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 593 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 594 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 595 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 596 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 597 "FREESPACE": lambda self: self._parse_freespace(), 598 "IMMUTABLE": lambda self: self.expression( 599 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 600 ), 601 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 602 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 603 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 604 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 605 "LIKE": lambda self: self._parse_create_like(), 606 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 607 "LOCK": lambda self: self._parse_locking(), 608 "LOCKING": lambda self: self._parse_locking(), 609 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 610 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 611 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 612 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 613 "NO": lambda self: self._parse_no_property(), 614 "ON": lambda self: self._parse_on_property(), 615 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 616 "PARTITION BY": lambda self: self._parse_partitioned_by(), 617 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 618 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 619 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 620 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 621 "RETURNS": lambda self: self._parse_returns(), 622 "ROW": lambda self: self._parse_row(), 623 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 624 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 625 "SETTINGS": lambda self: self.expression( 626 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 627 ), 628 "SORTKEY": lambda self: self._parse_sortkey(), 629 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 630 "STABLE": lambda self: self.expression( 631 exp.StabilityProperty, this=exp.Literal.string("STABLE") 632 ), 633 "STORED": lambda self: self._parse_stored(), 634 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 635 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 636 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 637 "TO": lambda self: self._parse_to_table(), 638 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 639 "TTL": lambda self: self._parse_ttl(), 640 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 641 "VOLATILE": lambda self: self._parse_volatile_property(), 642 "WITH": lambda self: self._parse_with_property(), 643 } 644 645 CONSTRAINT_PARSERS = { 646 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 647 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 648 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 649 "CHARACTER SET": lambda self: self.expression( 650 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 651 ), 652 "CHECK": lambda self: self.expression( 653 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 654 ), 655 "COLLATE": lambda self: self.expression( 656 exp.CollateColumnConstraint, this=self._parse_var() 657 ), 658 "COMMENT": lambda self: self.expression( 659 exp.CommentColumnConstraint, this=self._parse_string() 660 ), 661 "COMPRESS": lambda self: self._parse_compress(), 662 "DEFAULT": lambda self: self.expression( 663 exp.DefaultColumnConstraint, this=self._parse_bitwise() 664 ), 665 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 666 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 667 "FORMAT": lambda self: self.expression( 668 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 669 ), 670 "GENERATED": lambda self: self._parse_generated_as_identity(), 671 "IDENTITY": lambda self: self._parse_auto_increment(), 672 "INLINE": lambda self: self._parse_inline(), 673 "LIKE": lambda self: self._parse_create_like(), 674 "NOT": lambda self: self._parse_not_constraint(), 675 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 676 "ON": lambda self: self._match(TokenType.UPDATE) 677 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 678 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 679 "PRIMARY KEY": lambda self: self._parse_primary_key(), 680 "REFERENCES": lambda self: self._parse_references(match=False), 681 "TITLE": lambda self: self.expression( 682 exp.TitleColumnConstraint, this=self._parse_var_or_string() 683 ), 684 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 685 "UNIQUE": lambda self: self._parse_unique(), 686 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 687 } 688 689 ALTER_PARSERS = { 690 "ADD": lambda self: self._parse_alter_table_add(), 691 "ALTER": lambda self: self._parse_alter_table_alter(), 692 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 693 "DROP": lambda self: self._parse_alter_table_drop(), 694 "RENAME": lambda self: self._parse_alter_table_rename(), 695 } 696 697 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 698 699 NO_PAREN_FUNCTION_PARSERS = { 700 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 701 TokenType.CASE: lambda self: self._parse_case(), 702 TokenType.IF: lambda self: self._parse_if(), 703 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 704 exp.NextValueFor, 705 this=self._parse_column(), 706 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 707 ), 708 } 709 710 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 711 712 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 713 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 714 "CONCAT": lambda self: self._parse_concat(), 715 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 716 "DECODE": lambda self: self._parse_decode(), 717 "EXTRACT": lambda self: self._parse_extract(), 718 "JSON_OBJECT": lambda self: self._parse_json_object(), 719 "LOG": lambda self: self._parse_logarithm(), 720 "MATCH": lambda self: self._parse_match_against(), 721 "OPENJSON": lambda self: self._parse_open_json(), 722 "POSITION": lambda self: self._parse_position(), 723 "SAFE_CAST": lambda self: self._parse_cast(False), 724 "STRING_AGG": lambda self: self._parse_string_agg(), 725 "SUBSTRING": lambda self: self._parse_substring(), 726 "TRIM": lambda self: self._parse_trim(), 727 "TRY_CAST": lambda self: self._parse_cast(False), 728 "TRY_CONVERT": lambda self: self._parse_convert(False), 729 } 730 731 QUERY_MODIFIER_PARSERS = { 732 "joins": lambda self: list(iter(self._parse_join, None)), 733 "laterals": lambda self: list(iter(self._parse_lateral, None)), 734 "match": lambda self: self._parse_match_recognize(), 735 "where": lambda self: self._parse_where(), 736 "group": lambda self: self._parse_group(), 737 "having": lambda self: self._parse_having(), 738 "qualify": lambda self: self._parse_qualify(), 739 "windows": lambda self: self._parse_window_clause(), 740 "order": lambda self: self._parse_order(), 741 "limit": lambda self: self._parse_limit(), 742 "offset": lambda self: self._parse_offset(), 743 "locks": lambda self: self._parse_locks(), 744 "sample": lambda self: self._parse_table_sample(as_modifier=True), 745 } 746 747 SET_PARSERS = { 748 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 749 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 750 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 751 "TRANSACTION": lambda self: self._parse_set_transaction(), 752 } 753 754 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 755 756 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 757 758 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 759 760 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 761 762 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 763 764 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 765 TRANSACTION_CHARACTERISTICS = { 766 "ISOLATION LEVEL REPEATABLE READ", 767 "ISOLATION LEVEL READ COMMITTED", 768 "ISOLATION LEVEL READ UNCOMMITTED", 769 "ISOLATION LEVEL SERIALIZABLE", 770 "READ WRITE", 771 "READ ONLY", 772 } 773 774 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 775 776 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 777 778 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 779 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 780 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 781 782 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 783 784 STRICT_CAST = True 785 786 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 787 788 CONVERT_TYPE_FIRST = False 789 790 PREFIXED_PIVOT_COLUMNS = False 791 IDENTIFY_PIVOT_STRINGS = False 792 793 LOG_BASE_FIRST = True 794 LOG_DEFAULTS_TO_LN = False 795 796 __slots__ = ( 797 "error_level", 798 "error_message_context", 799 "max_errors", 800 "sql", 801 "errors", 802 "_tokens", 803 "_index", 804 "_curr", 805 "_next", 806 "_prev", 807 "_prev_comments", 808 ) 809 810 # Autofilled 811 INDEX_OFFSET: int = 0 812 UNNEST_COLUMN_ONLY: bool = False 813 ALIAS_POST_TABLESAMPLE: bool = False 814 STRICT_STRING_CONCAT = False 815 NULL_ORDERING: str = "nulls_are_small" 816 SHOW_TRIE: t.Dict = {} 817 SET_TRIE: t.Dict = {} 818 FORMAT_MAPPING: t.Dict[str, str] = {} 819 FORMAT_TRIE: t.Dict = {} 820 TIME_MAPPING: t.Dict[str, str] = {} 821 TIME_TRIE: t.Dict = {} 822 823 def __init__( 824 self, 825 error_level: t.Optional[ErrorLevel] = None, 826 error_message_context: int = 100, 827 max_errors: int = 3, 828 ): 829 self.error_level = error_level or ErrorLevel.IMMEDIATE 830 self.error_message_context = error_message_context 831 self.max_errors = max_errors 832 self.reset() 833 834 def reset(self): 835 self.sql = "" 836 self.errors = [] 837 self._tokens = [] 838 self._index = 0 839 self._curr = None 840 self._next = None 841 self._prev = None 842 self._prev_comments = None 843 844 def parse( 845 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 846 ) -> t.List[t.Optional[exp.Expression]]: 847 """ 848 Parses a list of tokens and returns a list of syntax trees, one tree 849 per parsed SQL statement. 850 851 Args: 852 raw_tokens: The list of tokens. 853 sql: The original SQL string, used to produce helpful debug messages. 854 855 Returns: 856 The list of the produced syntax trees. 857 """ 858 return self._parse( 859 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 860 ) 861 862 def parse_into( 863 self, 864 expression_types: exp.IntoType, 865 raw_tokens: t.List[Token], 866 sql: t.Optional[str] = None, 867 ) -> t.List[t.Optional[exp.Expression]]: 868 """ 869 Parses a list of tokens into a given Expression type. If a collection of Expression 870 types is given instead, this method will try to parse the token list into each one 871 of them, stopping at the first for which the parsing succeeds. 872 873 Args: 874 expression_types: The expression type(s) to try and parse the token list into. 875 raw_tokens: The list of tokens. 876 sql: The original SQL string, used to produce helpful debug messages. 877 878 Returns: 879 The target Expression. 880 """ 881 errors = [] 882 for expression_type in ensure_list(expression_types): 883 parser = self.EXPRESSION_PARSERS.get(expression_type) 884 if not parser: 885 raise TypeError(f"No parser registered for {expression_type}") 886 887 try: 888 return self._parse(parser, raw_tokens, sql) 889 except ParseError as e: 890 e.errors[0]["into_expression"] = expression_type 891 errors.append(e) 892 893 raise ParseError( 894 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 895 errors=merge_errors(errors), 896 ) from errors[-1] 897 898 def _parse( 899 self, 900 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 901 raw_tokens: t.List[Token], 902 sql: t.Optional[str] = None, 903 ) -> t.List[t.Optional[exp.Expression]]: 904 self.reset() 905 self.sql = sql or "" 906 907 total = len(raw_tokens) 908 chunks: t.List[t.List[Token]] = [[]] 909 910 for i, token in enumerate(raw_tokens): 911 if token.token_type == TokenType.SEMICOLON: 912 if i < total - 1: 913 chunks.append([]) 914 else: 915 chunks[-1].append(token) 916 917 expressions = [] 918 919 for tokens in chunks: 920 self._index = -1 921 self._tokens = tokens 922 self._advance() 923 924 expressions.append(parse_method(self)) 925 926 if self._index < len(self._tokens): 927 self.raise_error("Invalid expression / Unexpected token") 928 929 self.check_errors() 930 931 return expressions 932 933 def check_errors(self) -> None: 934 """Logs or raises any found errors, depending on the chosen error level setting.""" 935 if self.error_level == ErrorLevel.WARN: 936 for error in self.errors: 937 logger.error(str(error)) 938 elif self.error_level == ErrorLevel.RAISE and self.errors: 939 raise ParseError( 940 concat_messages(self.errors, self.max_errors), 941 errors=merge_errors(self.errors), 942 ) 943 944 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 945 """ 946 Appends an error in the list of recorded errors or raises it, depending on the chosen 947 error level setting. 948 """ 949 token = token or self._curr or self._prev or Token.string("") 950 start = token.start 951 end = token.end + 1 952 start_context = self.sql[max(start - self.error_message_context, 0) : start] 953 highlight = self.sql[start:end] 954 end_context = self.sql[end : end + self.error_message_context] 955 956 error = ParseError.new( 957 f"{message}. Line {token.line}, Col: {token.col}.\n" 958 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 959 description=message, 960 line=token.line, 961 col=token.col, 962 start_context=start_context, 963 highlight=highlight, 964 end_context=end_context, 965 ) 966 967 if self.error_level == ErrorLevel.IMMEDIATE: 968 raise error 969 970 self.errors.append(error) 971 972 def expression( 973 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 974 ) -> E: 975 """ 976 Creates a new, validated Expression. 977 978 Args: 979 exp_class: The expression class to instantiate. 980 comments: An optional list of comments to attach to the expression. 981 kwargs: The arguments to set for the expression along with their respective values. 982 983 Returns: 984 The target expression. 985 """ 986 instance = exp_class(**kwargs) 987 instance.add_comments(comments) if comments else self._add_comments(instance) 988 return self.validate_expression(instance) 989 990 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 991 if expression and self._prev_comments: 992 expression.add_comments(self._prev_comments) 993 self._prev_comments = None 994 995 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 996 """ 997 Validates an Expression, making sure that all its mandatory arguments are set. 998 999 Args: 1000 expression: The expression to validate. 1001 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1002 1003 Returns: 1004 The validated expression. 1005 """ 1006 if self.error_level != ErrorLevel.IGNORE: 1007 for error_message in expression.error_messages(args): 1008 self.raise_error(error_message) 1009 1010 return expression 1011 1012 def _find_sql(self, start: Token, end: Token) -> str: 1013 return self.sql[start.start : end.end + 1] 1014 1015 def _advance(self, times: int = 1) -> None: 1016 self._index += times 1017 self._curr = seq_get(self._tokens, self._index) 1018 self._next = seq_get(self._tokens, self._index + 1) 1019 1020 if self._index > 0: 1021 self._prev = self._tokens[self._index - 1] 1022 self._prev_comments = self._prev.comments 1023 else: 1024 self._prev = None 1025 self._prev_comments = None 1026 1027 def _retreat(self, index: int) -> None: 1028 if index != self._index: 1029 self._advance(index - self._index) 1030 1031 def _parse_command(self) -> exp.Command: 1032 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1033 1034 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1035 start = self._prev 1036 exists = self._parse_exists() if allow_exists else None 1037 1038 self._match(TokenType.ON) 1039 1040 kind = self._match_set(self.CREATABLES) and self._prev 1041 if not kind: 1042 return self._parse_as_command(start) 1043 1044 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1045 this = self._parse_user_defined_function(kind=kind.token_type) 1046 elif kind.token_type == TokenType.TABLE: 1047 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1048 elif kind.token_type == TokenType.COLUMN: 1049 this = self._parse_column() 1050 else: 1051 this = self._parse_id_var() 1052 1053 self._match(TokenType.IS) 1054 1055 return self.expression( 1056 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1057 ) 1058 1059 def _parse_to_table( 1060 self, 1061 ) -> exp.ToTableProperty: 1062 table = self._parse_table_parts(schema=True) 1063 return self.expression(exp.ToTableProperty, this=table) 1064 1065 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1066 def _parse_ttl(self) -> exp.Expression: 1067 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1068 this = self._parse_bitwise() 1069 1070 if self._match_text_seq("DELETE"): 1071 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1072 if self._match_text_seq("RECOMPRESS"): 1073 return self.expression( 1074 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1075 ) 1076 if self._match_text_seq("TO", "DISK"): 1077 return self.expression( 1078 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1079 ) 1080 if self._match_text_seq("TO", "VOLUME"): 1081 return self.expression( 1082 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1083 ) 1084 1085 return this 1086 1087 expressions = self._parse_csv(_parse_ttl_action) 1088 where = self._parse_where() 1089 group = self._parse_group() 1090 1091 aggregates = None 1092 if group and self._match(TokenType.SET): 1093 aggregates = self._parse_csv(self._parse_set_item) 1094 1095 return self.expression( 1096 exp.MergeTreeTTL, 1097 expressions=expressions, 1098 where=where, 1099 group=group, 1100 aggregates=aggregates, 1101 ) 1102 1103 def _parse_statement(self) -> t.Optional[exp.Expression]: 1104 if self._curr is None: 1105 return None 1106 1107 if self._match_set(self.STATEMENT_PARSERS): 1108 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1109 1110 if self._match_set(Tokenizer.COMMANDS): 1111 return self._parse_command() 1112 1113 expression = self._parse_expression() 1114 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1115 return self._parse_query_modifiers(expression) 1116 1117 def _parse_drop(self) -> exp.Drop | exp.Command: 1118 start = self._prev 1119 temporary = self._match(TokenType.TEMPORARY) 1120 materialized = self._match_text_seq("MATERIALIZED") 1121 1122 kind = self._match_set(self.CREATABLES) and self._prev.text 1123 if not kind: 1124 return self._parse_as_command(start) 1125 1126 return self.expression( 1127 exp.Drop, 1128 exists=self._parse_exists(), 1129 this=self._parse_table(schema=True), 1130 kind=kind, 1131 temporary=temporary, 1132 materialized=materialized, 1133 cascade=self._match_text_seq("CASCADE"), 1134 constraints=self._match_text_seq("CONSTRAINTS"), 1135 purge=self._match_text_seq("PURGE"), 1136 ) 1137 1138 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1139 return ( 1140 self._match(TokenType.IF) 1141 and (not not_ or self._match(TokenType.NOT)) 1142 and self._match(TokenType.EXISTS) 1143 ) 1144 1145 def _parse_create(self) -> exp.Create | exp.Command: 1146 # Note: this can't be None because we've matched a statement parser 1147 start = self._prev 1148 replace = start.text.upper() == "REPLACE" or self._match_pair( 1149 TokenType.OR, TokenType.REPLACE 1150 ) 1151 unique = self._match(TokenType.UNIQUE) 1152 1153 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1154 self._advance() 1155 1156 properties = None 1157 create_token = self._match_set(self.CREATABLES) and self._prev 1158 1159 if not create_token: 1160 # exp.Properties.Location.POST_CREATE 1161 properties = self._parse_properties() 1162 create_token = self._match_set(self.CREATABLES) and self._prev 1163 1164 if not properties or not create_token: 1165 return self._parse_as_command(start) 1166 1167 exists = self._parse_exists(not_=True) 1168 this = None 1169 expression = None 1170 indexes = None 1171 no_schema_binding = None 1172 begin = None 1173 clone = None 1174 1175 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1176 nonlocal properties 1177 if properties and temp_props: 1178 properties.expressions.extend(temp_props.expressions) 1179 elif temp_props: 1180 properties = temp_props 1181 1182 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1183 this = self._parse_user_defined_function(kind=create_token.token_type) 1184 1185 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1186 extend_props(self._parse_properties()) 1187 1188 self._match(TokenType.ALIAS) 1189 begin = self._match(TokenType.BEGIN) 1190 return_ = self._match_text_seq("RETURN") 1191 expression = self._parse_statement() 1192 1193 if return_: 1194 expression = self.expression(exp.Return, this=expression) 1195 elif create_token.token_type == TokenType.INDEX: 1196 this = self._parse_index(index=self._parse_id_var()) 1197 elif create_token.token_type in self.DB_CREATABLES: 1198 table_parts = self._parse_table_parts(schema=True) 1199 1200 # exp.Properties.Location.POST_NAME 1201 self._match(TokenType.COMMA) 1202 extend_props(self._parse_properties(before=True)) 1203 1204 this = self._parse_schema(this=table_parts) 1205 1206 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1207 extend_props(self._parse_properties()) 1208 1209 self._match(TokenType.ALIAS) 1210 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1211 # exp.Properties.Location.POST_ALIAS 1212 extend_props(self._parse_properties()) 1213 1214 expression = self._parse_ddl_select() 1215 1216 if create_token.token_type == TokenType.TABLE: 1217 indexes = [] 1218 while True: 1219 index = self._parse_index() 1220 1221 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1222 extend_props(self._parse_properties()) 1223 1224 if not index: 1225 break 1226 else: 1227 self._match(TokenType.COMMA) 1228 indexes.append(index) 1229 elif create_token.token_type == TokenType.VIEW: 1230 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1231 no_schema_binding = True 1232 1233 if self._match_text_seq("CLONE"): 1234 clone = self._parse_table(schema=True) 1235 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1236 clone_kind = ( 1237 self._match(TokenType.L_PAREN) 1238 and self._match_texts(self.CLONE_KINDS) 1239 and self._prev.text.upper() 1240 ) 1241 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1242 self._match(TokenType.R_PAREN) 1243 clone = self.expression( 1244 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1245 ) 1246 1247 return self.expression( 1248 exp.Create, 1249 this=this, 1250 kind=create_token.text, 1251 replace=replace, 1252 unique=unique, 1253 expression=expression, 1254 exists=exists, 1255 properties=properties, 1256 indexes=indexes, 1257 no_schema_binding=no_schema_binding, 1258 begin=begin, 1259 clone=clone, 1260 ) 1261 1262 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1263 # only used for teradata currently 1264 self._match(TokenType.COMMA) 1265 1266 kwargs = { 1267 "no": self._match_text_seq("NO"), 1268 "dual": self._match_text_seq("DUAL"), 1269 "before": self._match_text_seq("BEFORE"), 1270 "default": self._match_text_seq("DEFAULT"), 1271 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1272 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1273 "after": self._match_text_seq("AFTER"), 1274 "minimum": self._match_texts(("MIN", "MINIMUM")), 1275 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1276 } 1277 1278 if self._match_texts(self.PROPERTY_PARSERS): 1279 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1280 try: 1281 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1282 except TypeError: 1283 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1284 1285 return None 1286 1287 def _parse_property(self) -> t.Optional[exp.Expression]: 1288 if self._match_texts(self.PROPERTY_PARSERS): 1289 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1290 1291 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1292 return self._parse_character_set(default=True) 1293 1294 if self._match_text_seq("COMPOUND", "SORTKEY"): 1295 return self._parse_sortkey(compound=True) 1296 1297 if self._match_text_seq("SQL", "SECURITY"): 1298 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1299 1300 assignment = self._match_pair( 1301 TokenType.VAR, TokenType.EQ, advance=False 1302 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1303 1304 if assignment: 1305 key = self._parse_var_or_string() 1306 self._match(TokenType.EQ) 1307 return self.expression(exp.Property, this=key, value=self._parse_column()) 1308 1309 return None 1310 1311 def _parse_stored(self) -> exp.FileFormatProperty: 1312 self._match(TokenType.ALIAS) 1313 1314 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1315 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1316 1317 return self.expression( 1318 exp.FileFormatProperty, 1319 this=self.expression( 1320 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1321 ) 1322 if input_format or output_format 1323 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1324 ) 1325 1326 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1327 self._match(TokenType.EQ) 1328 self._match(TokenType.ALIAS) 1329 return self.expression(exp_class, this=self._parse_field()) 1330 1331 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1332 properties = [] 1333 while True: 1334 if before: 1335 prop = self._parse_property_before() 1336 else: 1337 prop = self._parse_property() 1338 1339 if not prop: 1340 break 1341 for p in ensure_list(prop): 1342 properties.append(p) 1343 1344 if properties: 1345 return self.expression(exp.Properties, expressions=properties) 1346 1347 return None 1348 1349 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1350 return self.expression( 1351 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1352 ) 1353 1354 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1355 if self._index >= 2: 1356 pre_volatile_token = self._tokens[self._index - 2] 1357 else: 1358 pre_volatile_token = None 1359 1360 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1361 return exp.VolatileProperty() 1362 1363 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1364 1365 def _parse_with_property( 1366 self, 1367 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1368 self._match(TokenType.WITH) 1369 if self._match(TokenType.L_PAREN, advance=False): 1370 return self._parse_wrapped_csv(self._parse_property) 1371 1372 if self._match_text_seq("JOURNAL"): 1373 return self._parse_withjournaltable() 1374 1375 if self._match_text_seq("DATA"): 1376 return self._parse_withdata(no=False) 1377 elif self._match_text_seq("NO", "DATA"): 1378 return self._parse_withdata(no=True) 1379 1380 if not self._next: 1381 return None 1382 1383 return self._parse_withisolatedloading() 1384 1385 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1386 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1387 self._match(TokenType.EQ) 1388 1389 user = self._parse_id_var() 1390 self._match(TokenType.PARAMETER) 1391 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1392 1393 if not user or not host: 1394 return None 1395 1396 return exp.DefinerProperty(this=f"{user}@{host}") 1397 1398 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1399 self._match(TokenType.TABLE) 1400 self._match(TokenType.EQ) 1401 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1402 1403 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1404 return self.expression(exp.LogProperty, no=no) 1405 1406 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1407 return self.expression(exp.JournalProperty, **kwargs) 1408 1409 def _parse_checksum(self) -> exp.ChecksumProperty: 1410 self._match(TokenType.EQ) 1411 1412 on = None 1413 if self._match(TokenType.ON): 1414 on = True 1415 elif self._match_text_seq("OFF"): 1416 on = False 1417 1418 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1419 1420 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1421 if not self._match_text_seq("BY"): 1422 self._retreat(self._index - 1) 1423 return None 1424 1425 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1426 1427 def _parse_freespace(self) -> exp.FreespaceProperty: 1428 self._match(TokenType.EQ) 1429 return self.expression( 1430 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1431 ) 1432 1433 def _parse_mergeblockratio( 1434 self, no: bool = False, default: bool = False 1435 ) -> exp.MergeBlockRatioProperty: 1436 if self._match(TokenType.EQ): 1437 return self.expression( 1438 exp.MergeBlockRatioProperty, 1439 this=self._parse_number(), 1440 percent=self._match(TokenType.PERCENT), 1441 ) 1442 1443 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1444 1445 def _parse_datablocksize( 1446 self, 1447 default: t.Optional[bool] = None, 1448 minimum: t.Optional[bool] = None, 1449 maximum: t.Optional[bool] = None, 1450 ) -> exp.DataBlocksizeProperty: 1451 self._match(TokenType.EQ) 1452 size = self._parse_number() 1453 1454 units = None 1455 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1456 units = self._prev.text 1457 1458 return self.expression( 1459 exp.DataBlocksizeProperty, 1460 size=size, 1461 units=units, 1462 default=default, 1463 minimum=minimum, 1464 maximum=maximum, 1465 ) 1466 1467 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1468 self._match(TokenType.EQ) 1469 always = self._match_text_seq("ALWAYS") 1470 manual = self._match_text_seq("MANUAL") 1471 never = self._match_text_seq("NEVER") 1472 default = self._match_text_seq("DEFAULT") 1473 1474 autotemp = None 1475 if self._match_text_seq("AUTOTEMP"): 1476 autotemp = self._parse_schema() 1477 1478 return self.expression( 1479 exp.BlockCompressionProperty, 1480 always=always, 1481 manual=manual, 1482 never=never, 1483 default=default, 1484 autotemp=autotemp, 1485 ) 1486 1487 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1488 no = self._match_text_seq("NO") 1489 concurrent = self._match_text_seq("CONCURRENT") 1490 self._match_text_seq("ISOLATED", "LOADING") 1491 for_all = self._match_text_seq("FOR", "ALL") 1492 for_insert = self._match_text_seq("FOR", "INSERT") 1493 for_none = self._match_text_seq("FOR", "NONE") 1494 return self.expression( 1495 exp.IsolatedLoadingProperty, 1496 no=no, 1497 concurrent=concurrent, 1498 for_all=for_all, 1499 for_insert=for_insert, 1500 for_none=for_none, 1501 ) 1502 1503 def _parse_locking(self) -> exp.LockingProperty: 1504 if self._match(TokenType.TABLE): 1505 kind = "TABLE" 1506 elif self._match(TokenType.VIEW): 1507 kind = "VIEW" 1508 elif self._match(TokenType.ROW): 1509 kind = "ROW" 1510 elif self._match_text_seq("DATABASE"): 1511 kind = "DATABASE" 1512 else: 1513 kind = None 1514 1515 if kind in ("DATABASE", "TABLE", "VIEW"): 1516 this = self._parse_table_parts() 1517 else: 1518 this = None 1519 1520 if self._match(TokenType.FOR): 1521 for_or_in = "FOR" 1522 elif self._match(TokenType.IN): 1523 for_or_in = "IN" 1524 else: 1525 for_or_in = None 1526 1527 if self._match_text_seq("ACCESS"): 1528 lock_type = "ACCESS" 1529 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1530 lock_type = "EXCLUSIVE" 1531 elif self._match_text_seq("SHARE"): 1532 lock_type = "SHARE" 1533 elif self._match_text_seq("READ"): 1534 lock_type = "READ" 1535 elif self._match_text_seq("WRITE"): 1536 lock_type = "WRITE" 1537 elif self._match_text_seq("CHECKSUM"): 1538 lock_type = "CHECKSUM" 1539 else: 1540 lock_type = None 1541 1542 override = self._match_text_seq("OVERRIDE") 1543 1544 return self.expression( 1545 exp.LockingProperty, 1546 this=this, 1547 kind=kind, 1548 for_or_in=for_or_in, 1549 lock_type=lock_type, 1550 override=override, 1551 ) 1552 1553 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1554 if self._match(TokenType.PARTITION_BY): 1555 return self._parse_csv(self._parse_conjunction) 1556 return [] 1557 1558 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1559 self._match(TokenType.EQ) 1560 return self.expression( 1561 exp.PartitionedByProperty, 1562 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1563 ) 1564 1565 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1566 if self._match_text_seq("AND", "STATISTICS"): 1567 statistics = True 1568 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1569 statistics = False 1570 else: 1571 statistics = None 1572 1573 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1574 1575 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1576 if self._match_text_seq("PRIMARY", "INDEX"): 1577 return exp.NoPrimaryIndexProperty() 1578 return None 1579 1580 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1581 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1582 return exp.OnCommitProperty() 1583 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1584 return exp.OnCommitProperty(delete=True) 1585 return None 1586 1587 def _parse_distkey(self) -> exp.DistKeyProperty: 1588 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1589 1590 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1591 table = self._parse_table(schema=True) 1592 1593 options = [] 1594 while self._match_texts(("INCLUDING", "EXCLUDING")): 1595 this = self._prev.text.upper() 1596 1597 id_var = self._parse_id_var() 1598 if not id_var: 1599 return None 1600 1601 options.append( 1602 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1603 ) 1604 1605 return self.expression(exp.LikeProperty, this=table, expressions=options) 1606 1607 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1608 return self.expression( 1609 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1610 ) 1611 1612 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1613 self._match(TokenType.EQ) 1614 return self.expression( 1615 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1616 ) 1617 1618 def _parse_returns(self) -> exp.ReturnsProperty: 1619 value: t.Optional[exp.Expression] 1620 is_table = self._match(TokenType.TABLE) 1621 1622 if is_table: 1623 if self._match(TokenType.LT): 1624 value = self.expression( 1625 exp.Schema, 1626 this="TABLE", 1627 expressions=self._parse_csv(self._parse_struct_types), 1628 ) 1629 if not self._match(TokenType.GT): 1630 self.raise_error("Expecting >") 1631 else: 1632 value = self._parse_schema(exp.var("TABLE")) 1633 else: 1634 value = self._parse_types() 1635 1636 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1637 1638 def _parse_describe(self) -> exp.Describe: 1639 kind = self._match_set(self.CREATABLES) and self._prev.text 1640 this = self._parse_table() 1641 return self.expression(exp.Describe, this=this, kind=kind) 1642 1643 def _parse_insert(self) -> exp.Insert: 1644 overwrite = self._match(TokenType.OVERWRITE) 1645 local = self._match_text_seq("LOCAL") 1646 alternative = None 1647 1648 if self._match_text_seq("DIRECTORY"): 1649 this: t.Optional[exp.Expression] = self.expression( 1650 exp.Directory, 1651 this=self._parse_var_or_string(), 1652 local=local, 1653 row_format=self._parse_row_format(match_row=True), 1654 ) 1655 else: 1656 if self._match(TokenType.OR): 1657 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1658 1659 self._match(TokenType.INTO) 1660 self._match(TokenType.TABLE) 1661 this = self._parse_table(schema=True) 1662 1663 return self.expression( 1664 exp.Insert, 1665 this=this, 1666 exists=self._parse_exists(), 1667 partition=self._parse_partition(), 1668 expression=self._parse_ddl_select(), 1669 conflict=self._parse_on_conflict(), 1670 returning=self._parse_returning(), 1671 overwrite=overwrite, 1672 alternative=alternative, 1673 ) 1674 1675 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1676 conflict = self._match_text_seq("ON", "CONFLICT") 1677 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1678 1679 if not conflict and not duplicate: 1680 return None 1681 1682 nothing = None 1683 expressions = None 1684 key = None 1685 constraint = None 1686 1687 if conflict: 1688 if self._match_text_seq("ON", "CONSTRAINT"): 1689 constraint = self._parse_id_var() 1690 else: 1691 key = self._parse_csv(self._parse_value) 1692 1693 self._match_text_seq("DO") 1694 if self._match_text_seq("NOTHING"): 1695 nothing = True 1696 else: 1697 self._match(TokenType.UPDATE) 1698 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1699 1700 return self.expression( 1701 exp.OnConflict, 1702 duplicate=duplicate, 1703 expressions=expressions, 1704 nothing=nothing, 1705 key=key, 1706 constraint=constraint, 1707 ) 1708 1709 def _parse_returning(self) -> t.Optional[exp.Returning]: 1710 if not self._match(TokenType.RETURNING): 1711 return None 1712 1713 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1714 1715 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1716 if not self._match(TokenType.FORMAT): 1717 return None 1718 return self._parse_row_format() 1719 1720 def _parse_row_format( 1721 self, match_row: bool = False 1722 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1723 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1724 return None 1725 1726 if self._match_text_seq("SERDE"): 1727 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1728 1729 self._match_text_seq("DELIMITED") 1730 1731 kwargs = {} 1732 1733 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1734 kwargs["fields"] = self._parse_string() 1735 if self._match_text_seq("ESCAPED", "BY"): 1736 kwargs["escaped"] = self._parse_string() 1737 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1738 kwargs["collection_items"] = self._parse_string() 1739 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1740 kwargs["map_keys"] = self._parse_string() 1741 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1742 kwargs["lines"] = self._parse_string() 1743 if self._match_text_seq("NULL", "DEFINED", "AS"): 1744 kwargs["null"] = self._parse_string() 1745 1746 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1747 1748 def _parse_load(self) -> exp.LoadData | exp.Command: 1749 if self._match_text_seq("DATA"): 1750 local = self._match_text_seq("LOCAL") 1751 self._match_text_seq("INPATH") 1752 inpath = self._parse_string() 1753 overwrite = self._match(TokenType.OVERWRITE) 1754 self._match_pair(TokenType.INTO, TokenType.TABLE) 1755 1756 return self.expression( 1757 exp.LoadData, 1758 this=self._parse_table(schema=True), 1759 local=local, 1760 overwrite=overwrite, 1761 inpath=inpath, 1762 partition=self._parse_partition(), 1763 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1764 serde=self._match_text_seq("SERDE") and self._parse_string(), 1765 ) 1766 return self._parse_as_command(self._prev) 1767 1768 def _parse_delete(self) -> exp.Delete: 1769 self._match(TokenType.FROM) 1770 1771 return self.expression( 1772 exp.Delete, 1773 this=self._parse_table(), 1774 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1775 where=self._parse_where(), 1776 returning=self._parse_returning(), 1777 ) 1778 1779 def _parse_update(self) -> exp.Update: 1780 return self.expression( 1781 exp.Update, 1782 **{ # type: ignore 1783 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1784 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1785 "from": self._parse_from(modifiers=True), 1786 "where": self._parse_where(), 1787 "returning": self._parse_returning(), 1788 }, 1789 ) 1790 1791 def _parse_uncache(self) -> exp.Uncache: 1792 if not self._match(TokenType.TABLE): 1793 self.raise_error("Expecting TABLE after UNCACHE") 1794 1795 return self.expression( 1796 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1797 ) 1798 1799 def _parse_cache(self) -> exp.Cache: 1800 lazy = self._match_text_seq("LAZY") 1801 self._match(TokenType.TABLE) 1802 table = self._parse_table(schema=True) 1803 1804 options = [] 1805 if self._match_text_seq("OPTIONS"): 1806 self._match_l_paren() 1807 k = self._parse_string() 1808 self._match(TokenType.EQ) 1809 v = self._parse_string() 1810 options = [k, v] 1811 self._match_r_paren() 1812 1813 self._match(TokenType.ALIAS) 1814 return self.expression( 1815 exp.Cache, 1816 this=table, 1817 lazy=lazy, 1818 options=options, 1819 expression=self._parse_select(nested=True), 1820 ) 1821 1822 def _parse_partition(self) -> t.Optional[exp.Partition]: 1823 if not self._match(TokenType.PARTITION): 1824 return None 1825 1826 return self.expression( 1827 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1828 ) 1829 1830 def _parse_value(self) -> exp.Tuple: 1831 if self._match(TokenType.L_PAREN): 1832 expressions = self._parse_csv(self._parse_conjunction) 1833 self._match_r_paren() 1834 return self.expression(exp.Tuple, expressions=expressions) 1835 1836 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1837 # Source: https://prestodb.io/docs/current/sql/values.html 1838 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1839 1840 def _parse_select( 1841 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1842 ) -> t.Optional[exp.Expression]: 1843 cte = self._parse_with() 1844 if cte: 1845 this = self._parse_statement() 1846 1847 if not this: 1848 self.raise_error("Failed to parse any statement following CTE") 1849 return cte 1850 1851 if "with" in this.arg_types: 1852 this.set("with", cte) 1853 else: 1854 self.raise_error(f"{this.key} does not support CTE") 1855 this = cte 1856 elif self._match(TokenType.SELECT): 1857 comments = self._prev_comments 1858 1859 hint = self._parse_hint() 1860 all_ = self._match(TokenType.ALL) 1861 distinct = self._match(TokenType.DISTINCT) 1862 1863 kind = ( 1864 self._match(TokenType.ALIAS) 1865 and self._match_texts(("STRUCT", "VALUE")) 1866 and self._prev.text 1867 ) 1868 1869 if distinct: 1870 distinct = self.expression( 1871 exp.Distinct, 1872 on=self._parse_value() if self._match(TokenType.ON) else None, 1873 ) 1874 1875 if all_ and distinct: 1876 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1877 1878 limit = self._parse_limit(top=True) 1879 expressions = self._parse_csv(self._parse_expression) 1880 1881 this = self.expression( 1882 exp.Select, 1883 kind=kind, 1884 hint=hint, 1885 distinct=distinct, 1886 expressions=expressions, 1887 limit=limit, 1888 ) 1889 this.comments = comments 1890 1891 into = self._parse_into() 1892 if into: 1893 this.set("into", into) 1894 1895 from_ = self._parse_from() 1896 if from_: 1897 this.set("from", from_) 1898 1899 this = self._parse_query_modifiers(this) 1900 elif (table or nested) and self._match(TokenType.L_PAREN): 1901 if self._match(TokenType.PIVOT): 1902 this = self._parse_simplified_pivot() 1903 elif self._match(TokenType.FROM): 1904 this = exp.select("*").from_( 1905 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1906 ) 1907 else: 1908 this = self._parse_table() if table else self._parse_select(nested=True) 1909 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1910 1911 self._match_r_paren() 1912 1913 # early return so that subquery unions aren't parsed again 1914 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1915 # Union ALL should be a property of the top select node, not the subquery 1916 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1917 elif self._match(TokenType.VALUES): 1918 this = self.expression( 1919 exp.Values, 1920 expressions=self._parse_csv(self._parse_value), 1921 alias=self._parse_table_alias(), 1922 ) 1923 else: 1924 this = None 1925 1926 return self._parse_set_operations(this) 1927 1928 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1929 if not skip_with_token and not self._match(TokenType.WITH): 1930 return None 1931 1932 comments = self._prev_comments 1933 recursive = self._match(TokenType.RECURSIVE) 1934 1935 expressions = [] 1936 while True: 1937 expressions.append(self._parse_cte()) 1938 1939 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1940 break 1941 else: 1942 self._match(TokenType.WITH) 1943 1944 return self.expression( 1945 exp.With, comments=comments, expressions=expressions, recursive=recursive 1946 ) 1947 1948 def _parse_cte(self) -> exp.CTE: 1949 alias = self._parse_table_alias() 1950 if not alias or not alias.this: 1951 self.raise_error("Expected CTE to have alias") 1952 1953 self._match(TokenType.ALIAS) 1954 return self.expression( 1955 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1956 ) 1957 1958 def _parse_table_alias( 1959 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1960 ) -> t.Optional[exp.TableAlias]: 1961 any_token = self._match(TokenType.ALIAS) 1962 alias = ( 1963 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1964 or self._parse_string_as_identifier() 1965 ) 1966 1967 index = self._index 1968 if self._match(TokenType.L_PAREN): 1969 columns = self._parse_csv(self._parse_function_parameter) 1970 self._match_r_paren() if columns else self._retreat(index) 1971 else: 1972 columns = None 1973 1974 if not alias and not columns: 1975 return None 1976 1977 return self.expression(exp.TableAlias, this=alias, columns=columns) 1978 1979 def _parse_subquery( 1980 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1981 ) -> t.Optional[exp.Subquery]: 1982 if not this: 1983 return None 1984 1985 return self.expression( 1986 exp.Subquery, 1987 this=this, 1988 pivots=self._parse_pivots(), 1989 alias=self._parse_table_alias() if parse_alias else None, 1990 ) 1991 1992 def _parse_query_modifiers( 1993 self, this: t.Optional[exp.Expression] 1994 ) -> t.Optional[exp.Expression]: 1995 if isinstance(this, self.MODIFIABLES): 1996 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1997 expression = parser(self) 1998 1999 if expression: 2000 if key == "limit": 2001 offset = expression.args.pop("offset", None) 2002 if offset: 2003 this.set("offset", exp.Offset(expression=offset)) 2004 this.set(key, expression) 2005 return this 2006 2007 def _parse_hint(self) -> t.Optional[exp.Hint]: 2008 if self._match(TokenType.HINT): 2009 hints = self._parse_csv(self._parse_function) 2010 2011 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2012 self.raise_error("Expected */ after HINT") 2013 2014 return self.expression(exp.Hint, expressions=hints) 2015 2016 return None 2017 2018 def _parse_into(self) -> t.Optional[exp.Into]: 2019 if not self._match(TokenType.INTO): 2020 return None 2021 2022 temp = self._match(TokenType.TEMPORARY) 2023 unlogged = self._match_text_seq("UNLOGGED") 2024 self._match(TokenType.TABLE) 2025 2026 return self.expression( 2027 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2028 ) 2029 2030 def _parse_from( 2031 self, modifiers: bool = False, skip_from_token: bool = False 2032 ) -> t.Optional[exp.From]: 2033 if not skip_from_token and not self._match(TokenType.FROM): 2034 return None 2035 2036 comments = self._prev_comments 2037 this = self._parse_table() 2038 2039 return self.expression( 2040 exp.From, 2041 comments=comments, 2042 this=self._parse_query_modifiers(this) if modifiers else this, 2043 ) 2044 2045 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2046 if not self._match(TokenType.MATCH_RECOGNIZE): 2047 return None 2048 2049 self._match_l_paren() 2050 2051 partition = self._parse_partition_by() 2052 order = self._parse_order() 2053 measures = ( 2054 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2055 ) 2056 2057 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2058 rows = exp.var("ONE ROW PER MATCH") 2059 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2060 text = "ALL ROWS PER MATCH" 2061 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2062 text += f" SHOW EMPTY MATCHES" 2063 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2064 text += f" OMIT EMPTY MATCHES" 2065 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2066 text += f" WITH UNMATCHED ROWS" 2067 rows = exp.var(text) 2068 else: 2069 rows = None 2070 2071 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2072 text = "AFTER MATCH SKIP" 2073 if self._match_text_seq("PAST", "LAST", "ROW"): 2074 text += f" PAST LAST ROW" 2075 elif self._match_text_seq("TO", "NEXT", "ROW"): 2076 text += f" TO NEXT ROW" 2077 elif self._match_text_seq("TO", "FIRST"): 2078 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2079 elif self._match_text_seq("TO", "LAST"): 2080 text += f" TO LAST {self._advance_any().text}" # type: ignore 2081 after = exp.var(text) 2082 else: 2083 after = None 2084 2085 if self._match_text_seq("PATTERN"): 2086 self._match_l_paren() 2087 2088 if not self._curr: 2089 self.raise_error("Expecting )", self._curr) 2090 2091 paren = 1 2092 start = self._curr 2093 2094 while self._curr and paren > 0: 2095 if self._curr.token_type == TokenType.L_PAREN: 2096 paren += 1 2097 if self._curr.token_type == TokenType.R_PAREN: 2098 paren -= 1 2099 2100 end = self._prev 2101 self._advance() 2102 2103 if paren > 0: 2104 self.raise_error("Expecting )", self._curr) 2105 2106 pattern = exp.var(self._find_sql(start, end)) 2107 else: 2108 pattern = None 2109 2110 define = ( 2111 self._parse_csv( 2112 lambda: self.expression( 2113 exp.Alias, 2114 alias=self._parse_id_var(any_token=True), 2115 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2116 ) 2117 ) 2118 if self._match_text_seq("DEFINE") 2119 else None 2120 ) 2121 2122 self._match_r_paren() 2123 2124 return self.expression( 2125 exp.MatchRecognize, 2126 partition_by=partition, 2127 order=order, 2128 measures=measures, 2129 rows=rows, 2130 after=after, 2131 pattern=pattern, 2132 define=define, 2133 alias=self._parse_table_alias(), 2134 ) 2135 2136 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2137 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2138 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2139 2140 if outer_apply or cross_apply: 2141 this = self._parse_select(table=True) 2142 view = None 2143 outer = not cross_apply 2144 elif self._match(TokenType.LATERAL): 2145 this = self._parse_select(table=True) 2146 view = self._match(TokenType.VIEW) 2147 outer = self._match(TokenType.OUTER) 2148 else: 2149 return None 2150 2151 if not this: 2152 this = self._parse_function() or self._parse_id_var(any_token=False) 2153 while self._match(TokenType.DOT): 2154 this = exp.Dot( 2155 this=this, 2156 expression=self._parse_function() or self._parse_id_var(any_token=False), 2157 ) 2158 2159 if view: 2160 table = self._parse_id_var(any_token=False) 2161 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2162 table_alias: t.Optional[exp.TableAlias] = self.expression( 2163 exp.TableAlias, this=table, columns=columns 2164 ) 2165 elif isinstance(this, exp.Subquery) and this.alias: 2166 # Ensures parity between the Subquery's and the Lateral's "alias" args 2167 table_alias = this.args["alias"].copy() 2168 else: 2169 table_alias = self._parse_table_alias() 2170 2171 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2172 2173 def _parse_join_parts( 2174 self, 2175 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2176 return ( 2177 self._match_set(self.JOIN_METHODS) and self._prev, 2178 self._match_set(self.JOIN_SIDES) and self._prev, 2179 self._match_set(self.JOIN_KINDS) and self._prev, 2180 ) 2181 2182 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2183 if self._match(TokenType.COMMA): 2184 return self.expression(exp.Join, this=self._parse_table()) 2185 2186 index = self._index 2187 method, side, kind = self._parse_join_parts() 2188 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2189 join = self._match(TokenType.JOIN) 2190 2191 if not skip_join_token and not join: 2192 self._retreat(index) 2193 kind = None 2194 method = None 2195 side = None 2196 2197 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2198 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2199 2200 if not skip_join_token and not join and not outer_apply and not cross_apply: 2201 return None 2202 2203 if outer_apply: 2204 side = Token(TokenType.LEFT, "LEFT") 2205 2206 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2207 2208 if method: 2209 kwargs["method"] = method.text 2210 if side: 2211 kwargs["side"] = side.text 2212 if kind: 2213 kwargs["kind"] = kind.text 2214 if hint: 2215 kwargs["hint"] = hint 2216 2217 if self._match(TokenType.ON): 2218 kwargs["on"] = self._parse_conjunction() 2219 elif self._match(TokenType.USING): 2220 kwargs["using"] = self._parse_wrapped_id_vars() 2221 2222 return self.expression(exp.Join, **kwargs) 2223 2224 def _parse_index( 2225 self, 2226 index: t.Optional[exp.Expression] = None, 2227 ) -> t.Optional[exp.Index]: 2228 if index: 2229 unique = None 2230 primary = None 2231 amp = None 2232 2233 self._match(TokenType.ON) 2234 self._match(TokenType.TABLE) # hive 2235 table = self._parse_table_parts(schema=True) 2236 else: 2237 unique = self._match(TokenType.UNIQUE) 2238 primary = self._match_text_seq("PRIMARY") 2239 amp = self._match_text_seq("AMP") 2240 2241 if not self._match(TokenType.INDEX): 2242 return None 2243 2244 index = self._parse_id_var() 2245 table = None 2246 2247 using = self._parse_field() if self._match(TokenType.USING) else None 2248 2249 if self._match(TokenType.L_PAREN, advance=False): 2250 columns = self._parse_wrapped_csv(self._parse_ordered) 2251 else: 2252 columns = None 2253 2254 return self.expression( 2255 exp.Index, 2256 this=index, 2257 table=table, 2258 using=using, 2259 columns=columns, 2260 unique=unique, 2261 primary=primary, 2262 amp=amp, 2263 partition_by=self._parse_partition_by(), 2264 ) 2265 2266 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2267 return ( 2268 (not schema and self._parse_function(optional_parens=False)) 2269 or self._parse_id_var(any_token=False) 2270 or self._parse_string_as_identifier() 2271 or self._parse_placeholder() 2272 ) 2273 2274 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2275 catalog = None 2276 db = None 2277 table = self._parse_table_part(schema=schema) 2278 2279 while self._match(TokenType.DOT): 2280 if catalog: 2281 # This allows nesting the table in arbitrarily many dot expressions if needed 2282 table = self.expression( 2283 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2284 ) 2285 else: 2286 catalog = db 2287 db = table 2288 table = self._parse_table_part(schema=schema) 2289 2290 if not table: 2291 self.raise_error(f"Expected table name but got {self._curr}") 2292 2293 return self.expression( 2294 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2295 ) 2296 2297 def _parse_table( 2298 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2299 ) -> t.Optional[exp.Expression]: 2300 lateral = self._parse_lateral() 2301 if lateral: 2302 return lateral 2303 2304 unnest = self._parse_unnest() 2305 if unnest: 2306 return unnest 2307 2308 values = self._parse_derived_table_values() 2309 if values: 2310 return values 2311 2312 subquery = self._parse_select(table=True) 2313 if subquery: 2314 if not subquery.args.get("pivots"): 2315 subquery.set("pivots", self._parse_pivots()) 2316 return subquery 2317 2318 this: exp.Expression = self._parse_table_parts(schema=schema) 2319 2320 if schema: 2321 return self._parse_schema(this=this) 2322 2323 if self.ALIAS_POST_TABLESAMPLE: 2324 table_sample = self._parse_table_sample() 2325 2326 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2327 if alias: 2328 this.set("alias", alias) 2329 2330 if not this.args.get("pivots"): 2331 this.set("pivots", self._parse_pivots()) 2332 2333 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2334 this.set( 2335 "hints", 2336 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2337 ) 2338 self._match_r_paren() 2339 2340 if not self.ALIAS_POST_TABLESAMPLE: 2341 table_sample = self._parse_table_sample() 2342 2343 if table_sample: 2344 table_sample.set("this", this) 2345 this = table_sample 2346 2347 return this 2348 2349 def _parse_unnest(self) -> t.Optional[exp.Unnest]: 2350 if not self._match(TokenType.UNNEST): 2351 return None 2352 2353 expressions = self._parse_wrapped_csv(self._parse_type) 2354 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2355 alias = self._parse_table_alias() 2356 2357 if alias and self.UNNEST_COLUMN_ONLY: 2358 if alias.args.get("columns"): 2359 self.raise_error("Unexpected extra column alias in unnest.") 2360 2361 alias.set("columns", [alias.this]) 2362 alias.set("this", None) 2363 2364 offset = None 2365 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2366 self._match(TokenType.ALIAS) 2367 offset = self._parse_id_var() or exp.to_identifier("offset") 2368 2369 return self.expression( 2370 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2371 ) 2372 2373 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2374 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2375 if not is_derived and not self._match(TokenType.VALUES): 2376 return None 2377 2378 expressions = self._parse_csv(self._parse_value) 2379 alias = self._parse_table_alias() 2380 2381 if is_derived: 2382 self._match_r_paren() 2383 2384 return self.expression( 2385 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2386 ) 2387 2388 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2389 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2390 as_modifier and self._match_text_seq("USING", "SAMPLE") 2391 ): 2392 return None 2393 2394 bucket_numerator = None 2395 bucket_denominator = None 2396 bucket_field = None 2397 percent = None 2398 rows = None 2399 size = None 2400 seed = None 2401 2402 kind = ( 2403 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2404 ) 2405 method = self._parse_var(tokens=(TokenType.ROW,)) 2406 2407 self._match(TokenType.L_PAREN) 2408 2409 num = self._parse_number() 2410 2411 if self._match_text_seq("BUCKET"): 2412 bucket_numerator = self._parse_number() 2413 self._match_text_seq("OUT", "OF") 2414 bucket_denominator = bucket_denominator = self._parse_number() 2415 self._match(TokenType.ON) 2416 bucket_field = self._parse_field() 2417 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2418 percent = num 2419 elif self._match(TokenType.ROWS): 2420 rows = num 2421 else: 2422 size = num 2423 2424 self._match(TokenType.R_PAREN) 2425 2426 if self._match(TokenType.L_PAREN): 2427 method = self._parse_var() 2428 seed = self._match(TokenType.COMMA) and self._parse_number() 2429 self._match_r_paren() 2430 elif self._match_texts(("SEED", "REPEATABLE")): 2431 seed = self._parse_wrapped(self._parse_number) 2432 2433 return self.expression( 2434 exp.TableSample, 2435 method=method, 2436 bucket_numerator=bucket_numerator, 2437 bucket_denominator=bucket_denominator, 2438 bucket_field=bucket_field, 2439 percent=percent, 2440 rows=rows, 2441 size=size, 2442 seed=seed, 2443 kind=kind, 2444 ) 2445 2446 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2447 return list(iter(self._parse_pivot, None)) 2448 2449 # https://duckdb.org/docs/sql/statements/pivot 2450 def _parse_simplified_pivot(self) -> exp.Pivot: 2451 def _parse_on() -> t.Optional[exp.Expression]: 2452 this = self._parse_bitwise() 2453 return self._parse_in(this) if self._match(TokenType.IN) else this 2454 2455 this = self._parse_table() 2456 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2457 using = self._match(TokenType.USING) and self._parse_csv( 2458 lambda: self._parse_alias(self._parse_function()) 2459 ) 2460 group = self._parse_group() 2461 return self.expression( 2462 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2463 ) 2464 2465 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2466 index = self._index 2467 2468 if self._match(TokenType.PIVOT): 2469 unpivot = False 2470 elif self._match(TokenType.UNPIVOT): 2471 unpivot = True 2472 else: 2473 return None 2474 2475 expressions = [] 2476 field = None 2477 2478 if not self._match(TokenType.L_PAREN): 2479 self._retreat(index) 2480 return None 2481 2482 if unpivot: 2483 expressions = self._parse_csv(self._parse_column) 2484 else: 2485 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2486 2487 if not expressions: 2488 self.raise_error("Failed to parse PIVOT's aggregation list") 2489 2490 if not self._match(TokenType.FOR): 2491 self.raise_error("Expecting FOR") 2492 2493 value = self._parse_column() 2494 2495 if not self._match(TokenType.IN): 2496 self.raise_error("Expecting IN") 2497 2498 field = self._parse_in(value, alias=True) 2499 2500 self._match_r_paren() 2501 2502 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2503 2504 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2505 pivot.set("alias", self._parse_table_alias()) 2506 2507 if not unpivot: 2508 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2509 2510 columns: t.List[exp.Expression] = [] 2511 for fld in pivot.args["field"].expressions: 2512 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2513 for name in names: 2514 if self.PREFIXED_PIVOT_COLUMNS: 2515 name = f"{name}_{field_name}" if name else field_name 2516 else: 2517 name = f"{field_name}_{name}" if name else field_name 2518 2519 columns.append(exp.to_identifier(name)) 2520 2521 pivot.set("columns", columns) 2522 2523 return pivot 2524 2525 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2526 return [agg.alias for agg in aggregations] 2527 2528 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2529 if not skip_where_token and not self._match(TokenType.WHERE): 2530 return None 2531 2532 return self.expression( 2533 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2534 ) 2535 2536 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2537 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2538 return None 2539 2540 elements = defaultdict(list) 2541 2542 while True: 2543 expressions = self._parse_csv(self._parse_conjunction) 2544 if expressions: 2545 elements["expressions"].extend(expressions) 2546 2547 grouping_sets = self._parse_grouping_sets() 2548 if grouping_sets: 2549 elements["grouping_sets"].extend(grouping_sets) 2550 2551 rollup = None 2552 cube = None 2553 totals = None 2554 2555 with_ = self._match(TokenType.WITH) 2556 if self._match(TokenType.ROLLUP): 2557 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2558 elements["rollup"].extend(ensure_list(rollup)) 2559 2560 if self._match(TokenType.CUBE): 2561 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2562 elements["cube"].extend(ensure_list(cube)) 2563 2564 if self._match_text_seq("TOTALS"): 2565 totals = True 2566 elements["totals"] = True # type: ignore 2567 2568 if not (grouping_sets or rollup or cube or totals): 2569 break 2570 2571 return self.expression(exp.Group, **elements) # type: ignore 2572 2573 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2574 if not self._match(TokenType.GROUPING_SETS): 2575 return None 2576 2577 return self._parse_wrapped_csv(self._parse_grouping_set) 2578 2579 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2580 if self._match(TokenType.L_PAREN): 2581 grouping_set = self._parse_csv(self._parse_column) 2582 self._match_r_paren() 2583 return self.expression(exp.Tuple, expressions=grouping_set) 2584 2585 return self._parse_column() 2586 2587 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2588 if not skip_having_token and not self._match(TokenType.HAVING): 2589 return None 2590 return self.expression(exp.Having, this=self._parse_conjunction()) 2591 2592 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2593 if not self._match(TokenType.QUALIFY): 2594 return None 2595 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2596 2597 def _parse_order( 2598 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2599 ) -> t.Optional[exp.Expression]: 2600 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2601 return this 2602 2603 return self.expression( 2604 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2605 ) 2606 2607 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2608 if not self._match_text_seq(*texts): 2609 return None 2610 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2611 2612 def _parse_ordered(self) -> exp.Ordered: 2613 this = self._parse_conjunction() 2614 self._match(TokenType.ASC) 2615 2616 is_desc = self._match(TokenType.DESC) 2617 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2618 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2619 desc = is_desc or False 2620 asc = not desc 2621 nulls_first = is_nulls_first or False 2622 explicitly_null_ordered = is_nulls_first or is_nulls_last 2623 2624 if ( 2625 not explicitly_null_ordered 2626 and ( 2627 (asc and self.NULL_ORDERING == "nulls_are_small") 2628 or (desc and self.NULL_ORDERING != "nulls_are_small") 2629 ) 2630 and self.NULL_ORDERING != "nulls_are_last" 2631 ): 2632 nulls_first = True 2633 2634 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2635 2636 def _parse_limit( 2637 self, this: t.Optional[exp.Expression] = None, top: bool = False 2638 ) -> t.Optional[exp.Expression]: 2639 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2640 limit_paren = self._match(TokenType.L_PAREN) 2641 expression = self._parse_number() if top else self._parse_term() 2642 2643 if self._match(TokenType.COMMA): 2644 offset = expression 2645 expression = self._parse_term() 2646 else: 2647 offset = None 2648 2649 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2650 2651 if limit_paren: 2652 self._match_r_paren() 2653 2654 return limit_exp 2655 2656 if self._match(TokenType.FETCH): 2657 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2658 direction = self._prev.text if direction else "FIRST" 2659 2660 count = self._parse_number() 2661 percent = self._match(TokenType.PERCENT) 2662 2663 self._match_set((TokenType.ROW, TokenType.ROWS)) 2664 2665 only = self._match_text_seq("ONLY") 2666 with_ties = self._match_text_seq("WITH", "TIES") 2667 2668 if only and with_ties: 2669 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2670 2671 return self.expression( 2672 exp.Fetch, 2673 direction=direction, 2674 count=count, 2675 percent=percent, 2676 with_ties=with_ties, 2677 ) 2678 2679 return this 2680 2681 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2682 if not self._match(TokenType.OFFSET): 2683 return this 2684 2685 count = self._parse_number() 2686 self._match_set((TokenType.ROW, TokenType.ROWS)) 2687 return self.expression(exp.Offset, this=this, expression=count) 2688 2689 def _parse_locks(self) -> t.List[exp.Lock]: 2690 locks = [] 2691 while True: 2692 if self._match_text_seq("FOR", "UPDATE"): 2693 update = True 2694 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2695 "LOCK", "IN", "SHARE", "MODE" 2696 ): 2697 update = False 2698 else: 2699 break 2700 2701 expressions = None 2702 if self._match_text_seq("OF"): 2703 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2704 2705 wait: t.Optional[bool | exp.Expression] = None 2706 if self._match_text_seq("NOWAIT"): 2707 wait = True 2708 elif self._match_text_seq("WAIT"): 2709 wait = self._parse_primary() 2710 elif self._match_text_seq("SKIP", "LOCKED"): 2711 wait = False 2712 2713 locks.append( 2714 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2715 ) 2716 2717 return locks 2718 2719 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2720 if not self._match_set(self.SET_OPERATIONS): 2721 return this 2722 2723 token_type = self._prev.token_type 2724 2725 if token_type == TokenType.UNION: 2726 expression = exp.Union 2727 elif token_type == TokenType.EXCEPT: 2728 expression = exp.Except 2729 else: 2730 expression = exp.Intersect 2731 2732 return self.expression( 2733 expression, 2734 this=this, 2735 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2736 expression=self._parse_set_operations(self._parse_select(nested=True)), 2737 ) 2738 2739 def _parse_expression(self) -> t.Optional[exp.Expression]: 2740 return self._parse_alias(self._parse_conjunction()) 2741 2742 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2743 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2744 2745 def _parse_equality(self) -> t.Optional[exp.Expression]: 2746 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2747 2748 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2749 return self._parse_tokens(self._parse_range, self.COMPARISON) 2750 2751 def _parse_range(self) -> t.Optional[exp.Expression]: 2752 this = self._parse_bitwise() 2753 negate = self._match(TokenType.NOT) 2754 2755 if self._match_set(self.RANGE_PARSERS): 2756 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2757 if not expression: 2758 return this 2759 2760 this = expression 2761 elif self._match(TokenType.ISNULL): 2762 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2763 2764 # Postgres supports ISNULL and NOTNULL for conditions. 2765 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2766 if self._match(TokenType.NOTNULL): 2767 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2768 this = self.expression(exp.Not, this=this) 2769 2770 if negate: 2771 this = self.expression(exp.Not, this=this) 2772 2773 if self._match(TokenType.IS): 2774 this = self._parse_is(this) 2775 2776 return this 2777 2778 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2779 index = self._index - 1 2780 negate = self._match(TokenType.NOT) 2781 2782 if self._match_text_seq("DISTINCT", "FROM"): 2783 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2784 return self.expression(klass, this=this, expression=self._parse_expression()) 2785 2786 expression = self._parse_null() or self._parse_boolean() 2787 if not expression: 2788 self._retreat(index) 2789 return None 2790 2791 this = self.expression(exp.Is, this=this, expression=expression) 2792 return self.expression(exp.Not, this=this) if negate else this 2793 2794 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2795 unnest = self._parse_unnest() 2796 if unnest: 2797 this = self.expression(exp.In, this=this, unnest=unnest) 2798 elif self._match(TokenType.L_PAREN): 2799 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2800 2801 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2802 this = self.expression(exp.In, this=this, query=expressions[0]) 2803 else: 2804 this = self.expression(exp.In, this=this, expressions=expressions) 2805 2806 self._match_r_paren(this) 2807 else: 2808 this = self.expression(exp.In, this=this, field=self._parse_field()) 2809 2810 return this 2811 2812 def _parse_between(self, this: exp.Expression) -> exp.Between: 2813 low = self._parse_bitwise() 2814 self._match(TokenType.AND) 2815 high = self._parse_bitwise() 2816 return self.expression(exp.Between, this=this, low=low, high=high) 2817 2818 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2819 if not self._match(TokenType.ESCAPE): 2820 return this 2821 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2822 2823 def _parse_interval(self) -> t.Optional[exp.Interval]: 2824 if not self._match(TokenType.INTERVAL): 2825 return None 2826 2827 this = self._parse_primary() or self._parse_term() 2828 unit = self._parse_function() or self._parse_var() 2829 2830 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2831 # each INTERVAL expression into this canonical form so it's easy to transpile 2832 if this and this.is_number: 2833 this = exp.Literal.string(this.name) 2834 elif this and this.is_string: 2835 parts = this.name.split() 2836 2837 if len(parts) == 2: 2838 if unit: 2839 # this is not actually a unit, it's something else 2840 unit = None 2841 self._retreat(self._index - 1) 2842 else: 2843 this = exp.Literal.string(parts[0]) 2844 unit = self.expression(exp.Var, this=parts[1]) 2845 2846 return self.expression(exp.Interval, this=this, unit=unit) 2847 2848 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2849 this = self._parse_term() 2850 2851 while True: 2852 if self._match_set(self.BITWISE): 2853 this = self.expression( 2854 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2855 ) 2856 elif self._match_pair(TokenType.LT, TokenType.LT): 2857 this = self.expression( 2858 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2859 ) 2860 elif self._match_pair(TokenType.GT, TokenType.GT): 2861 this = self.expression( 2862 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2863 ) 2864 else: 2865 break 2866 2867 return this 2868 2869 def _parse_term(self) -> t.Optional[exp.Expression]: 2870 return self._parse_tokens(self._parse_factor, self.TERM) 2871 2872 def _parse_factor(self) -> t.Optional[exp.Expression]: 2873 return self._parse_tokens(self._parse_unary, self.FACTOR) 2874 2875 def _parse_unary(self) -> t.Optional[exp.Expression]: 2876 if self._match_set(self.UNARY_PARSERS): 2877 return self.UNARY_PARSERS[self._prev.token_type](self) 2878 return self._parse_at_time_zone(self._parse_type()) 2879 2880 def _parse_type(self) -> t.Optional[exp.Expression]: 2881 interval = self._parse_interval() 2882 if interval: 2883 return interval 2884 2885 index = self._index 2886 data_type = self._parse_types(check_func=True) 2887 this = self._parse_column() 2888 2889 if data_type: 2890 if isinstance(this, exp.Literal): 2891 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2892 if parser: 2893 return parser(self, this, data_type) 2894 return self.expression(exp.Cast, this=this, to=data_type) 2895 if not data_type.expressions: 2896 self._retreat(index) 2897 return self._parse_column() 2898 return self._parse_column_ops(data_type) 2899 2900 return this 2901 2902 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2903 this = self._parse_type() 2904 if not this: 2905 return None 2906 2907 return self.expression( 2908 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2909 ) 2910 2911 def _parse_types( 2912 self, check_func: bool = False, schema: bool = False 2913 ) -> t.Optional[exp.Expression]: 2914 index = self._index 2915 2916 prefix = self._match_text_seq("SYSUDTLIB", ".") 2917 2918 if not self._match_set(self.TYPE_TOKENS): 2919 return None 2920 2921 type_token = self._prev.token_type 2922 2923 if type_token == TokenType.PSEUDO_TYPE: 2924 return self.expression(exp.PseudoType, this=self._prev.text) 2925 2926 nested = type_token in self.NESTED_TYPE_TOKENS 2927 is_struct = type_token == TokenType.STRUCT 2928 expressions = None 2929 maybe_func = False 2930 2931 if self._match(TokenType.L_PAREN): 2932 if is_struct: 2933 expressions = self._parse_csv(self._parse_struct_types) 2934 elif nested: 2935 expressions = self._parse_csv( 2936 lambda: self._parse_types(check_func=check_func, schema=schema) 2937 ) 2938 else: 2939 expressions = self._parse_csv(self._parse_type_size) 2940 2941 if not expressions or not self._match(TokenType.R_PAREN): 2942 self._retreat(index) 2943 return None 2944 2945 maybe_func = True 2946 2947 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2948 this = exp.DataType( 2949 this=exp.DataType.Type.ARRAY, 2950 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2951 nested=True, 2952 ) 2953 2954 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2955 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2956 2957 return this 2958 2959 if self._match(TokenType.L_BRACKET): 2960 self._retreat(index) 2961 return None 2962 2963 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2964 if nested and self._match(TokenType.LT): 2965 if is_struct: 2966 expressions = self._parse_csv(self._parse_struct_types) 2967 else: 2968 expressions = self._parse_csv( 2969 lambda: self._parse_types(check_func=check_func, schema=schema) 2970 ) 2971 2972 if not self._match(TokenType.GT): 2973 self.raise_error("Expecting >") 2974 2975 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2976 values = self._parse_csv(self._parse_conjunction) 2977 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2978 2979 value: t.Optional[exp.Expression] = None 2980 if type_token in self.TIMESTAMPS: 2981 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2982 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2983 elif ( 2984 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2985 or type_token == TokenType.TIMESTAMPLTZ 2986 ): 2987 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2988 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2989 if type_token == TokenType.TIME: 2990 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2991 else: 2992 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2993 2994 maybe_func = maybe_func and value is None 2995 2996 if value is None: 2997 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2998 elif type_token == TokenType.INTERVAL: 2999 unit = self._parse_var() 3000 3001 if not unit: 3002 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3003 else: 3004 value = self.expression(exp.Interval, unit=unit) 3005 3006 if maybe_func and check_func: 3007 index2 = self._index 3008 peek = self._parse_string() 3009 3010 if not peek: 3011 self._retreat(index) 3012 return None 3013 3014 self._retreat(index2) 3015 3016 if value: 3017 return value 3018 3019 return exp.DataType( 3020 this=exp.DataType.Type[type_token.value.upper()], 3021 expressions=expressions, 3022 nested=nested, 3023 values=values, 3024 prefix=prefix, 3025 ) 3026 3027 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3028 this = self._parse_type() or self._parse_id_var() 3029 self._match(TokenType.COLON) 3030 return self._parse_column_def(this) 3031 3032 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3033 if not self._match_text_seq("AT", "TIME", "ZONE"): 3034 return this 3035 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3036 3037 def _parse_column(self) -> t.Optional[exp.Expression]: 3038 this = self._parse_field() 3039 if isinstance(this, exp.Identifier): 3040 this = self.expression(exp.Column, this=this) 3041 elif not this: 3042 return self._parse_bracket(this) 3043 return self._parse_column_ops(this) 3044 3045 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3046 this = self._parse_bracket(this) 3047 3048 while self._match_set(self.COLUMN_OPERATORS): 3049 op_token = self._prev.token_type 3050 op = self.COLUMN_OPERATORS.get(op_token) 3051 3052 if op_token == TokenType.DCOLON: 3053 field = self._parse_types() 3054 if not field: 3055 self.raise_error("Expected type") 3056 elif op and self._curr: 3057 self._advance() 3058 value = self._prev.text 3059 field = ( 3060 exp.Literal.number(value) 3061 if self._prev.token_type == TokenType.NUMBER 3062 else exp.Literal.string(value) 3063 ) 3064 else: 3065 field = self._parse_field(anonymous_func=True, any_token=True) 3066 3067 if isinstance(field, exp.Func): 3068 # bigquery allows function calls like x.y.count(...) 3069 # SAFE.SUBSTR(...) 3070 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3071 this = self._replace_columns_with_dots(this) 3072 3073 if op: 3074 this = op(self, this, field) 3075 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3076 this = self.expression( 3077 exp.Column, 3078 this=field, 3079 table=this.this, 3080 db=this.args.get("table"), 3081 catalog=this.args.get("db"), 3082 ) 3083 else: 3084 this = self.expression(exp.Dot, this=this, expression=field) 3085 this = self._parse_bracket(this) 3086 return this 3087 3088 def _parse_primary(self) -> t.Optional[exp.Expression]: 3089 if self._match_set(self.PRIMARY_PARSERS): 3090 token_type = self._prev.token_type 3091 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3092 3093 if token_type == TokenType.STRING: 3094 expressions = [primary] 3095 while self._match(TokenType.STRING): 3096 expressions.append(exp.Literal.string(self._prev.text)) 3097 3098 if len(expressions) > 1: 3099 return self.expression(exp.Concat, expressions=expressions) 3100 3101 return primary 3102 3103 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3104 return exp.Literal.number(f"0.{self._prev.text}") 3105 3106 if self._match(TokenType.L_PAREN): 3107 comments = self._prev_comments 3108 query = self._parse_select() 3109 3110 if query: 3111 expressions = [query] 3112 else: 3113 expressions = self._parse_csv(self._parse_expression) 3114 3115 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3116 3117 if isinstance(this, exp.Subqueryable): 3118 this = self._parse_set_operations( 3119 self._parse_subquery(this=this, parse_alias=False) 3120 ) 3121 elif len(expressions) > 1: 3122 this = self.expression(exp.Tuple, expressions=expressions) 3123 else: 3124 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3125 3126 if this: 3127 this.add_comments(comments) 3128 3129 self._match_r_paren(expression=this) 3130 return this 3131 3132 return None 3133 3134 def _parse_field( 3135 self, 3136 any_token: bool = False, 3137 tokens: t.Optional[t.Collection[TokenType]] = None, 3138 anonymous_func: bool = False, 3139 ) -> t.Optional[exp.Expression]: 3140 return ( 3141 self._parse_primary() 3142 or self._parse_function(anonymous=anonymous_func) 3143 or self._parse_id_var(any_token=any_token, tokens=tokens) 3144 ) 3145 3146 def _parse_function( 3147 self, 3148 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3149 anonymous: bool = False, 3150 optional_parens: bool = True, 3151 ) -> t.Optional[exp.Expression]: 3152 if not self._curr: 3153 return None 3154 3155 token_type = self._curr.token_type 3156 3157 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3158 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3159 3160 if not self._next or self._next.token_type != TokenType.L_PAREN: 3161 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3162 self._advance() 3163 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3164 3165 return None 3166 3167 if token_type not in self.FUNC_TOKENS: 3168 return None 3169 3170 this = self._curr.text 3171 upper = this.upper() 3172 self._advance(2) 3173 3174 parser = self.FUNCTION_PARSERS.get(upper) 3175 3176 if parser and not anonymous: 3177 this = parser(self) 3178 else: 3179 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3180 3181 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3182 this = self.expression(subquery_predicate, this=self._parse_select()) 3183 self._match_r_paren() 3184 return this 3185 3186 if functions is None: 3187 functions = self.FUNCTIONS 3188 3189 function = functions.get(upper) 3190 3191 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3192 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3193 3194 if function and not anonymous: 3195 this = self.validate_expression(function(args), args) 3196 else: 3197 this = self.expression(exp.Anonymous, this=this, expressions=args) 3198 3199 self._match_r_paren(this) 3200 return self._parse_window(this) 3201 3202 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3203 return self._parse_column_def(self._parse_id_var()) 3204 3205 def _parse_user_defined_function( 3206 self, kind: t.Optional[TokenType] = None 3207 ) -> t.Optional[exp.Expression]: 3208 this = self._parse_id_var() 3209 3210 while self._match(TokenType.DOT): 3211 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3212 3213 if not self._match(TokenType.L_PAREN): 3214 return this 3215 3216 expressions = self._parse_csv(self._parse_function_parameter) 3217 self._match_r_paren() 3218 return self.expression( 3219 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3220 ) 3221 3222 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3223 literal = self._parse_primary() 3224 if literal: 3225 return self.expression(exp.Introducer, this=token.text, expression=literal) 3226 3227 return self.expression(exp.Identifier, this=token.text) 3228 3229 def _parse_session_parameter(self) -> exp.SessionParameter: 3230 kind = None 3231 this = self._parse_id_var() or self._parse_primary() 3232 3233 if this and self._match(TokenType.DOT): 3234 kind = this.name 3235 this = self._parse_var() or self._parse_primary() 3236 3237 return self.expression(exp.SessionParameter, this=this, kind=kind) 3238 3239 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3240 index = self._index 3241 3242 if self._match(TokenType.L_PAREN): 3243 expressions = self._parse_csv(self._parse_id_var) 3244 3245 if not self._match(TokenType.R_PAREN): 3246 self._retreat(index) 3247 else: 3248 expressions = [self._parse_id_var()] 3249 3250 if self._match_set(self.LAMBDAS): 3251 return self.LAMBDAS[self._prev.token_type](self, expressions) 3252 3253 self._retreat(index) 3254 3255 this: t.Optional[exp.Expression] 3256 3257 if self._match(TokenType.DISTINCT): 3258 this = self.expression( 3259 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3260 ) 3261 else: 3262 this = self._parse_select_or_expression(alias=alias) 3263 3264 if isinstance(this, exp.EQ): 3265 left = this.this 3266 if isinstance(left, exp.Column): 3267 left.replace(exp.var(left.text("this"))) 3268 3269 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3270 3271 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3272 index = self._index 3273 3274 if not self.errors: 3275 try: 3276 if self._parse_select(nested=True): 3277 return this 3278 except ParseError: 3279 pass 3280 finally: 3281 self.errors.clear() 3282 self._retreat(index) 3283 3284 if not self._match(TokenType.L_PAREN): 3285 return this 3286 3287 args = self._parse_csv( 3288 lambda: self._parse_constraint() 3289 or self._parse_column_def(self._parse_field(any_token=True)) 3290 ) 3291 3292 self._match_r_paren() 3293 return self.expression(exp.Schema, this=this, expressions=args) 3294 3295 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3296 # column defs are not really columns, they're identifiers 3297 if isinstance(this, exp.Column): 3298 this = this.this 3299 3300 kind = self._parse_types(schema=True) 3301 3302 if self._match_text_seq("FOR", "ORDINALITY"): 3303 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3304 3305 constraints = [] 3306 while True: 3307 constraint = self._parse_column_constraint() 3308 if not constraint: 3309 break 3310 constraints.append(constraint) 3311 3312 if not kind and not constraints: 3313 return this 3314 3315 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3316 3317 def _parse_auto_increment( 3318 self, 3319 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3320 start = None 3321 increment = None 3322 3323 if self._match(TokenType.L_PAREN, advance=False): 3324 args = self._parse_wrapped_csv(self._parse_bitwise) 3325 start = seq_get(args, 0) 3326 increment = seq_get(args, 1) 3327 elif self._match_text_seq("START"): 3328 start = self._parse_bitwise() 3329 self._match_text_seq("INCREMENT") 3330 increment = self._parse_bitwise() 3331 3332 if start and increment: 3333 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3334 3335 return exp.AutoIncrementColumnConstraint() 3336 3337 def _parse_compress(self) -> exp.CompressColumnConstraint: 3338 if self._match(TokenType.L_PAREN, advance=False): 3339 return self.expression( 3340 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3341 ) 3342 3343 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3344 3345 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3346 if self._match_text_seq("BY", "DEFAULT"): 3347 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3348 this = self.expression( 3349 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3350 ) 3351 else: 3352 self._match_text_seq("ALWAYS") 3353 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3354 3355 self._match(TokenType.ALIAS) 3356 identity = self._match_text_seq("IDENTITY") 3357 3358 if self._match(TokenType.L_PAREN): 3359 if self._match_text_seq("START", "WITH"): 3360 this.set("start", self._parse_bitwise()) 3361 if self._match_text_seq("INCREMENT", "BY"): 3362 this.set("increment", self._parse_bitwise()) 3363 if self._match_text_seq("MINVALUE"): 3364 this.set("minvalue", self._parse_bitwise()) 3365 if self._match_text_seq("MAXVALUE"): 3366 this.set("maxvalue", self._parse_bitwise()) 3367 3368 if self._match_text_seq("CYCLE"): 3369 this.set("cycle", True) 3370 elif self._match_text_seq("NO", "CYCLE"): 3371 this.set("cycle", False) 3372 3373 if not identity: 3374 this.set("expression", self._parse_bitwise()) 3375 3376 self._match_r_paren() 3377 3378 return this 3379 3380 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3381 self._match_text_seq("LENGTH") 3382 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3383 3384 def _parse_not_constraint( 3385 self, 3386 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3387 if self._match_text_seq("NULL"): 3388 return self.expression(exp.NotNullColumnConstraint) 3389 if self._match_text_seq("CASESPECIFIC"): 3390 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3391 return None 3392 3393 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3394 if self._match(TokenType.CONSTRAINT): 3395 this = self._parse_id_var() 3396 else: 3397 this = None 3398 3399 if self._match_texts(self.CONSTRAINT_PARSERS): 3400 return self.expression( 3401 exp.ColumnConstraint, 3402 this=this, 3403 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3404 ) 3405 3406 return this 3407 3408 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3409 if not self._match(TokenType.CONSTRAINT): 3410 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3411 3412 this = self._parse_id_var() 3413 expressions = [] 3414 3415 while True: 3416 constraint = self._parse_unnamed_constraint() or self._parse_function() 3417 if not constraint: 3418 break 3419 expressions.append(constraint) 3420 3421 return self.expression(exp.Constraint, this=this, expressions=expressions) 3422 3423 def _parse_unnamed_constraint( 3424 self, constraints: t.Optional[t.Collection[str]] = None 3425 ) -> t.Optional[exp.Expression]: 3426 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3427 return None 3428 3429 constraint = self._prev.text.upper() 3430 if constraint not in self.CONSTRAINT_PARSERS: 3431 self.raise_error(f"No parser found for schema constraint {constraint}.") 3432 3433 return self.CONSTRAINT_PARSERS[constraint](self) 3434 3435 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3436 self._match_text_seq("KEY") 3437 return self.expression( 3438 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3439 ) 3440 3441 def _parse_key_constraint_options(self) -> t.List[str]: 3442 options = [] 3443 while True: 3444 if not self._curr: 3445 break 3446 3447 if self._match(TokenType.ON): 3448 action = None 3449 on = self._advance_any() and self._prev.text 3450 3451 if self._match_text_seq("NO", "ACTION"): 3452 action = "NO ACTION" 3453 elif self._match_text_seq("CASCADE"): 3454 action = "CASCADE" 3455 elif self._match_pair(TokenType.SET, TokenType.NULL): 3456 action = "SET NULL" 3457 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3458 action = "SET DEFAULT" 3459 else: 3460 self.raise_error("Invalid key constraint") 3461 3462 options.append(f"ON {on} {action}") 3463 elif self._match_text_seq("NOT", "ENFORCED"): 3464 options.append("NOT ENFORCED") 3465 elif self._match_text_seq("DEFERRABLE"): 3466 options.append("DEFERRABLE") 3467 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3468 options.append("INITIALLY DEFERRED") 3469 elif self._match_text_seq("NORELY"): 3470 options.append("NORELY") 3471 elif self._match_text_seq("MATCH", "FULL"): 3472 options.append("MATCH FULL") 3473 else: 3474 break 3475 3476 return options 3477 3478 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3479 if match and not self._match(TokenType.REFERENCES): 3480 return None 3481 3482 expressions = None 3483 this = self._parse_id_var() 3484 3485 if self._match(TokenType.L_PAREN, advance=False): 3486 expressions = self._parse_wrapped_id_vars() 3487 3488 options = self._parse_key_constraint_options() 3489 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3490 3491 def _parse_foreign_key(self) -> exp.ForeignKey: 3492 expressions = self._parse_wrapped_id_vars() 3493 reference = self._parse_references() 3494 options = {} 3495 3496 while self._match(TokenType.ON): 3497 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3498 self.raise_error("Expected DELETE or UPDATE") 3499 3500 kind = self._prev.text.lower() 3501 3502 if self._match_text_seq("NO", "ACTION"): 3503 action = "NO ACTION" 3504 elif self._match(TokenType.SET): 3505 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3506 action = "SET " + self._prev.text.upper() 3507 else: 3508 self._advance() 3509 action = self._prev.text.upper() 3510 3511 options[kind] = action 3512 3513 return self.expression( 3514 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3515 ) 3516 3517 def _parse_primary_key( 3518 self, wrapped_optional: bool = False, in_props: bool = False 3519 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3520 desc = ( 3521 self._match_set((TokenType.ASC, TokenType.DESC)) 3522 and self._prev.token_type == TokenType.DESC 3523 ) 3524 3525 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3526 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3527 3528 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3529 options = self._parse_key_constraint_options() 3530 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3531 3532 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3533 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3534 return this 3535 3536 bracket_kind = self._prev.token_type 3537 3538 if self._match(TokenType.COLON): 3539 expressions: t.List[t.Optional[exp.Expression]] = [ 3540 self.expression(exp.Slice, expression=self._parse_conjunction()) 3541 ] 3542 else: 3543 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3544 3545 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3546 if bracket_kind == TokenType.L_BRACE: 3547 this = self.expression(exp.Struct, expressions=expressions) 3548 elif not this or this.name.upper() == "ARRAY": 3549 this = self.expression(exp.Array, expressions=expressions) 3550 else: 3551 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3552 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3553 3554 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3555 self.raise_error("Expected ]") 3556 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3557 self.raise_error("Expected }") 3558 3559 self._add_comments(this) 3560 return self._parse_bracket(this) 3561 3562 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3563 if self._match(TokenType.COLON): 3564 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3565 return this 3566 3567 def _parse_case(self) -> t.Optional[exp.Expression]: 3568 ifs = [] 3569 default = None 3570 3571 expression = self._parse_conjunction() 3572 3573 while self._match(TokenType.WHEN): 3574 this = self._parse_conjunction() 3575 self._match(TokenType.THEN) 3576 then = self._parse_conjunction() 3577 ifs.append(self.expression(exp.If, this=this, true=then)) 3578 3579 if self._match(TokenType.ELSE): 3580 default = self._parse_conjunction() 3581 3582 if not self._match(TokenType.END): 3583 self.raise_error("Expected END after CASE", self._prev) 3584 3585 return self._parse_window( 3586 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3587 ) 3588 3589 def _parse_if(self) -> t.Optional[exp.Expression]: 3590 if self._match(TokenType.L_PAREN): 3591 args = self._parse_csv(self._parse_conjunction) 3592 this = self.validate_expression(exp.If.from_arg_list(args), args) 3593 self._match_r_paren() 3594 else: 3595 index = self._index - 1 3596 condition = self._parse_conjunction() 3597 3598 if not condition: 3599 self._retreat(index) 3600 return None 3601 3602 self._match(TokenType.THEN) 3603 true = self._parse_conjunction() 3604 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3605 self._match(TokenType.END) 3606 this = self.expression(exp.If, this=condition, true=true, false=false) 3607 3608 return self._parse_window(this) 3609 3610 def _parse_extract(self) -> exp.Extract: 3611 this = self._parse_function() or self._parse_var() or self._parse_type() 3612 3613 if self._match(TokenType.FROM): 3614 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3615 3616 if not self._match(TokenType.COMMA): 3617 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3618 3619 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3620 3621 def _parse_cast(self, strict: bool) -> exp.Expression: 3622 this = self._parse_conjunction() 3623 3624 if not self._match(TokenType.ALIAS): 3625 if self._match(TokenType.COMMA): 3626 return self.expression( 3627 exp.CastToStrType, this=this, expression=self._parse_string() 3628 ) 3629 else: 3630 self.raise_error("Expected AS after CAST") 3631 3632 to = self._parse_types() 3633 3634 if not to: 3635 self.raise_error("Expected TYPE after CAST") 3636 elif to.this == exp.DataType.Type.CHAR: 3637 if self._match(TokenType.CHARACTER_SET): 3638 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3639 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3640 fmt = self._parse_string() 3641 3642 return self.expression( 3643 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3644 this=this, 3645 format=exp.Literal.string( 3646 format_time( 3647 fmt.this if fmt else "", 3648 self.FORMAT_MAPPING or self.TIME_MAPPING, 3649 self.FORMAT_TRIE or self.TIME_TRIE, 3650 ) 3651 ), 3652 ) 3653 3654 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3655 3656 def _parse_concat(self) -> t.Optional[exp.Expression]: 3657 args = self._parse_csv(self._parse_conjunction) 3658 if self.CONCAT_NULL_OUTPUTS_STRING: 3659 args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args] 3660 3661 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3662 # we find such a call we replace it with its argument. 3663 if len(args) == 1: 3664 return args[0] 3665 3666 return self.expression( 3667 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3668 ) 3669 3670 def _parse_string_agg(self) -> exp.Expression: 3671 expression: t.Optional[exp.Expression] 3672 3673 if self._match(TokenType.DISTINCT): 3674 args = self._parse_csv(self._parse_conjunction) 3675 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3676 else: 3677 args = self._parse_csv(self._parse_conjunction) 3678 expression = seq_get(args, 0) 3679 3680 index = self._index 3681 if not self._match(TokenType.R_PAREN): 3682 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3683 order = self._parse_order(this=expression) 3684 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3685 3686 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3687 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3688 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3689 if not self._match_text_seq("WITHIN", "GROUP"): 3690 self._retreat(index) 3691 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3692 3693 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3694 order = self._parse_order(this=expression) 3695 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3696 3697 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3698 to: t.Optional[exp.Expression] 3699 this = self._parse_bitwise() 3700 3701 if self._match(TokenType.USING): 3702 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3703 elif self._match(TokenType.COMMA): 3704 to = self._parse_bitwise() 3705 else: 3706 to = None 3707 3708 # Swap the argument order if needed to produce the correct AST 3709 if self.CONVERT_TYPE_FIRST: 3710 this, to = to, this 3711 3712 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3713 3714 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3715 """ 3716 There are generally two variants of the DECODE function: 3717 3718 - DECODE(bin, charset) 3719 - DECODE(expression, search, result [, search, result] ... [, default]) 3720 3721 The second variant will always be parsed into a CASE expression. Note that NULL 3722 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3723 instead of relying on pattern matching. 3724 """ 3725 args = self._parse_csv(self._parse_conjunction) 3726 3727 if len(args) < 3: 3728 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3729 3730 expression, *expressions = args 3731 if not expression: 3732 return None 3733 3734 ifs = [] 3735 for search, result in zip(expressions[::2], expressions[1::2]): 3736 if not search or not result: 3737 return None 3738 3739 if isinstance(search, exp.Literal): 3740 ifs.append( 3741 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3742 ) 3743 elif isinstance(search, exp.Null): 3744 ifs.append( 3745 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3746 ) 3747 else: 3748 cond = exp.or_( 3749 exp.EQ(this=expression.copy(), expression=search), 3750 exp.and_( 3751 exp.Is(this=expression.copy(), expression=exp.Null()), 3752 exp.Is(this=search.copy(), expression=exp.Null()), 3753 copy=False, 3754 ), 3755 copy=False, 3756 ) 3757 ifs.append(exp.If(this=cond, true=result)) 3758 3759 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3760 3761 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3762 self._match_text_seq("KEY") 3763 key = self._parse_field() 3764 self._match(TokenType.COLON) 3765 self._match_text_seq("VALUE") 3766 value = self._parse_field() 3767 3768 if not key and not value: 3769 return None 3770 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3771 3772 def _parse_json_object(self) -> exp.JSONObject: 3773 star = self._parse_star() 3774 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3775 3776 null_handling = None 3777 if self._match_text_seq("NULL", "ON", "NULL"): 3778 null_handling = "NULL ON NULL" 3779 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3780 null_handling = "ABSENT ON NULL" 3781 3782 unique_keys = None 3783 if self._match_text_seq("WITH", "UNIQUE"): 3784 unique_keys = True 3785 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3786 unique_keys = False 3787 3788 self._match_text_seq("KEYS") 3789 3790 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3791 format_json = self._match_text_seq("FORMAT", "JSON") 3792 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3793 3794 return self.expression( 3795 exp.JSONObject, 3796 expressions=expressions, 3797 null_handling=null_handling, 3798 unique_keys=unique_keys, 3799 return_type=return_type, 3800 format_json=format_json, 3801 encoding=encoding, 3802 ) 3803 3804 def _parse_logarithm(self) -> exp.Func: 3805 # Default argument order is base, expression 3806 args = self._parse_csv(self._parse_range) 3807 3808 if len(args) > 1: 3809 if not self.LOG_BASE_FIRST: 3810 args.reverse() 3811 return exp.Log.from_arg_list(args) 3812 3813 return self.expression( 3814 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3815 ) 3816 3817 def _parse_match_against(self) -> exp.MatchAgainst: 3818 expressions = self._parse_csv(self._parse_column) 3819 3820 self._match_text_seq(")", "AGAINST", "(") 3821 3822 this = self._parse_string() 3823 3824 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3825 modifier = "IN NATURAL LANGUAGE MODE" 3826 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3827 modifier = f"{modifier} WITH QUERY EXPANSION" 3828 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3829 modifier = "IN BOOLEAN MODE" 3830 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3831 modifier = "WITH QUERY EXPANSION" 3832 else: 3833 modifier = None 3834 3835 return self.expression( 3836 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3837 ) 3838 3839 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3840 def _parse_open_json(self) -> exp.OpenJSON: 3841 this = self._parse_bitwise() 3842 path = self._match(TokenType.COMMA) and self._parse_string() 3843 3844 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3845 this = self._parse_field(any_token=True) 3846 kind = self._parse_types() 3847 path = self._parse_string() 3848 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3849 3850 return self.expression( 3851 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3852 ) 3853 3854 expressions = None 3855 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3856 self._match_l_paren() 3857 expressions = self._parse_csv(_parse_open_json_column_def) 3858 3859 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3860 3861 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3862 args = self._parse_csv(self._parse_bitwise) 3863 3864 if self._match(TokenType.IN): 3865 return self.expression( 3866 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3867 ) 3868 3869 if haystack_first: 3870 haystack = seq_get(args, 0) 3871 needle = seq_get(args, 1) 3872 else: 3873 needle = seq_get(args, 0) 3874 haystack = seq_get(args, 1) 3875 3876 return self.expression( 3877 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3878 ) 3879 3880 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3881 args = self._parse_csv(self._parse_table) 3882 return exp.JoinHint(this=func_name.upper(), expressions=args) 3883 3884 def _parse_substring(self) -> exp.Substring: 3885 # Postgres supports the form: substring(string [from int] [for int]) 3886 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3887 3888 args = self._parse_csv(self._parse_bitwise) 3889 3890 if self._match(TokenType.FROM): 3891 args.append(self._parse_bitwise()) 3892 if self._match(TokenType.FOR): 3893 args.append(self._parse_bitwise()) 3894 3895 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3896 3897 def _parse_trim(self) -> exp.Trim: 3898 # https://www.w3resource.com/sql/character-functions/trim.php 3899 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3900 3901 position = None 3902 collation = None 3903 3904 if self._match_texts(self.TRIM_TYPES): 3905 position = self._prev.text.upper() 3906 3907 expression = self._parse_bitwise() 3908 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3909 this = self._parse_bitwise() 3910 else: 3911 this = expression 3912 expression = None 3913 3914 if self._match(TokenType.COLLATE): 3915 collation = self._parse_bitwise() 3916 3917 return self.expression( 3918 exp.Trim, this=this, position=position, expression=expression, collation=collation 3919 ) 3920 3921 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3922 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3923 3924 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3925 return self._parse_window(self._parse_id_var(), alias=True) 3926 3927 def _parse_respect_or_ignore_nulls( 3928 self, this: t.Optional[exp.Expression] 3929 ) -> t.Optional[exp.Expression]: 3930 if self._match_text_seq("IGNORE", "NULLS"): 3931 return self.expression(exp.IgnoreNulls, this=this) 3932 if self._match_text_seq("RESPECT", "NULLS"): 3933 return self.expression(exp.RespectNulls, this=this) 3934 return this 3935 3936 def _parse_window( 3937 self, this: t.Optional[exp.Expression], alias: bool = False 3938 ) -> t.Optional[exp.Expression]: 3939 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3940 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3941 self._match_r_paren() 3942 3943 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3944 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3945 if self._match_text_seq("WITHIN", "GROUP"): 3946 order = self._parse_wrapped(self._parse_order) 3947 this = self.expression(exp.WithinGroup, this=this, expression=order) 3948 3949 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3950 # Some dialects choose to implement and some do not. 3951 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3952 3953 # There is some code above in _parse_lambda that handles 3954 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3955 3956 # The below changes handle 3957 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3958 3959 # Oracle allows both formats 3960 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3961 # and Snowflake chose to do the same for familiarity 3962 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3963 this = self._parse_respect_or_ignore_nulls(this) 3964 3965 # bigquery select from window x AS (partition by ...) 3966 if alias: 3967 over = None 3968 self._match(TokenType.ALIAS) 3969 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3970 return this 3971 else: 3972 over = self._prev.text.upper() 3973 3974 if not self._match(TokenType.L_PAREN): 3975 return self.expression( 3976 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3977 ) 3978 3979 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3980 3981 first = self._match(TokenType.FIRST) 3982 if self._match_text_seq("LAST"): 3983 first = False 3984 3985 partition = self._parse_partition_by() 3986 order = self._parse_order() 3987 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3988 3989 if kind: 3990 self._match(TokenType.BETWEEN) 3991 start = self._parse_window_spec() 3992 self._match(TokenType.AND) 3993 end = self._parse_window_spec() 3994 3995 spec = self.expression( 3996 exp.WindowSpec, 3997 kind=kind, 3998 start=start["value"], 3999 start_side=start["side"], 4000 end=end["value"], 4001 end_side=end["side"], 4002 ) 4003 else: 4004 spec = None 4005 4006 self._match_r_paren() 4007 4008 return self.expression( 4009 exp.Window, 4010 this=this, 4011 partition_by=partition, 4012 order=order, 4013 spec=spec, 4014 alias=window_alias, 4015 over=over, 4016 first=first, 4017 ) 4018 4019 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4020 self._match(TokenType.BETWEEN) 4021 4022 return { 4023 "value": ( 4024 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4025 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4026 or self._parse_bitwise() 4027 ), 4028 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4029 } 4030 4031 def _parse_alias( 4032 self, this: t.Optional[exp.Expression], explicit: bool = False 4033 ) -> t.Optional[exp.Expression]: 4034 any_token = self._match(TokenType.ALIAS) 4035 4036 if explicit and not any_token: 4037 return this 4038 4039 if self._match(TokenType.L_PAREN): 4040 aliases = self.expression( 4041 exp.Aliases, 4042 this=this, 4043 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4044 ) 4045 self._match_r_paren(aliases) 4046 return aliases 4047 4048 alias = self._parse_id_var(any_token) 4049 4050 if alias: 4051 return self.expression(exp.Alias, this=this, alias=alias) 4052 4053 return this 4054 4055 def _parse_id_var( 4056 self, 4057 any_token: bool = True, 4058 tokens: t.Optional[t.Collection[TokenType]] = None, 4059 ) -> t.Optional[exp.Expression]: 4060 identifier = self._parse_identifier() 4061 4062 if identifier: 4063 return identifier 4064 4065 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4066 quoted = self._prev.token_type == TokenType.STRING 4067 return exp.Identifier(this=self._prev.text, quoted=quoted) 4068 4069 return None 4070 4071 def _parse_string(self) -> t.Optional[exp.Expression]: 4072 if self._match(TokenType.STRING): 4073 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4074 return self._parse_placeholder() 4075 4076 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4077 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4078 4079 def _parse_number(self) -> t.Optional[exp.Expression]: 4080 if self._match(TokenType.NUMBER): 4081 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4082 return self._parse_placeholder() 4083 4084 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4085 if self._match(TokenType.IDENTIFIER): 4086 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4087 return self._parse_placeholder() 4088 4089 def _parse_var( 4090 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4091 ) -> t.Optional[exp.Expression]: 4092 if ( 4093 (any_token and self._advance_any()) 4094 or self._match(TokenType.VAR) 4095 or (self._match_set(tokens) if tokens else False) 4096 ): 4097 return self.expression(exp.Var, this=self._prev.text) 4098 return self._parse_placeholder() 4099 4100 def _advance_any(self) -> t.Optional[Token]: 4101 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4102 self._advance() 4103 return self._prev 4104 return None 4105 4106 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4107 return self._parse_var() or self._parse_string() 4108 4109 def _parse_null(self) -> t.Optional[exp.Expression]: 4110 if self._match(TokenType.NULL): 4111 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4112 return None 4113 4114 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4115 if self._match(TokenType.TRUE): 4116 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4117 if self._match(TokenType.FALSE): 4118 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4119 return None 4120 4121 def _parse_star(self) -> t.Optional[exp.Expression]: 4122 if self._match(TokenType.STAR): 4123 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4124 return None 4125 4126 def _parse_parameter(self) -> exp.Parameter: 4127 wrapped = self._match(TokenType.L_BRACE) 4128 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4129 self._match(TokenType.R_BRACE) 4130 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4131 4132 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4133 if self._match_set(self.PLACEHOLDER_PARSERS): 4134 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4135 if placeholder: 4136 return placeholder 4137 self._advance(-1) 4138 return None 4139 4140 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4141 if not self._match(TokenType.EXCEPT): 4142 return None 4143 if self._match(TokenType.L_PAREN, advance=False): 4144 return self._parse_wrapped_csv(self._parse_column) 4145 return self._parse_csv(self._parse_column) 4146 4147 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4148 if not self._match(TokenType.REPLACE): 4149 return None 4150 if self._match(TokenType.L_PAREN, advance=False): 4151 return self._parse_wrapped_csv(self._parse_expression) 4152 return self._parse_csv(self._parse_expression) 4153 4154 def _parse_csv( 4155 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4156 ) -> t.List[t.Optional[exp.Expression]]: 4157 parse_result = parse_method() 4158 items = [parse_result] if parse_result is not None else [] 4159 4160 while self._match(sep): 4161 self._add_comments(parse_result) 4162 parse_result = parse_method() 4163 if parse_result is not None: 4164 items.append(parse_result) 4165 4166 return items 4167 4168 def _parse_tokens( 4169 self, parse_method: t.Callable, expressions: t.Dict 4170 ) -> t.Optional[exp.Expression]: 4171 this = parse_method() 4172 4173 while self._match_set(expressions): 4174 this = self.expression( 4175 expressions[self._prev.token_type], 4176 this=this, 4177 comments=self._prev_comments, 4178 expression=parse_method(), 4179 ) 4180 4181 return this 4182 4183 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4184 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4185 4186 def _parse_wrapped_csv( 4187 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4188 ) -> t.List[t.Optional[exp.Expression]]: 4189 return self._parse_wrapped( 4190 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4191 ) 4192 4193 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4194 wrapped = self._match(TokenType.L_PAREN) 4195 if not wrapped and not optional: 4196 self.raise_error("Expecting (") 4197 parse_result = parse_method() 4198 if wrapped: 4199 self._match_r_paren() 4200 return parse_result 4201 4202 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4203 return self._parse_select() or self._parse_set_operations( 4204 self._parse_expression() if alias else self._parse_conjunction() 4205 ) 4206 4207 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4208 return self._parse_query_modifiers( 4209 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4210 ) 4211 4212 def _parse_transaction(self) -> exp.Transaction: 4213 this = None 4214 if self._match_texts(self.TRANSACTION_KIND): 4215 this = self._prev.text 4216 4217 self._match_texts({"TRANSACTION", "WORK"}) 4218 4219 modes = [] 4220 while True: 4221 mode = [] 4222 while self._match(TokenType.VAR): 4223 mode.append(self._prev.text) 4224 4225 if mode: 4226 modes.append(" ".join(mode)) 4227 if not self._match(TokenType.COMMA): 4228 break 4229 4230 return self.expression(exp.Transaction, this=this, modes=modes) 4231 4232 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4233 chain = None 4234 savepoint = None 4235 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4236 4237 self._match_texts({"TRANSACTION", "WORK"}) 4238 4239 if self._match_text_seq("TO"): 4240 self._match_text_seq("SAVEPOINT") 4241 savepoint = self._parse_id_var() 4242 4243 if self._match(TokenType.AND): 4244 chain = not self._match_text_seq("NO") 4245 self._match_text_seq("CHAIN") 4246 4247 if is_rollback: 4248 return self.expression(exp.Rollback, savepoint=savepoint) 4249 4250 return self.expression(exp.Commit, chain=chain) 4251 4252 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4253 if not self._match_text_seq("ADD"): 4254 return None 4255 4256 self._match(TokenType.COLUMN) 4257 exists_column = self._parse_exists(not_=True) 4258 expression = self._parse_column_def(self._parse_field(any_token=True)) 4259 4260 if expression: 4261 expression.set("exists", exists_column) 4262 4263 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4264 if self._match_texts(("FIRST", "AFTER")): 4265 position = self._prev.text 4266 column_position = self.expression( 4267 exp.ColumnPosition, this=self._parse_column(), position=position 4268 ) 4269 expression.set("position", column_position) 4270 4271 return expression 4272 4273 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4274 drop = self._match(TokenType.DROP) and self._parse_drop() 4275 if drop and not isinstance(drop, exp.Command): 4276 drop.set("kind", drop.args.get("kind", "COLUMN")) 4277 return drop 4278 4279 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4280 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4281 return self.expression( 4282 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4283 ) 4284 4285 def _parse_add_constraint(self) -> exp.AddConstraint: 4286 this = None 4287 kind = self._prev.token_type 4288 4289 if kind == TokenType.CONSTRAINT: 4290 this = self._parse_id_var() 4291 4292 if self._match_text_seq("CHECK"): 4293 expression = self._parse_wrapped(self._parse_conjunction) 4294 enforced = self._match_text_seq("ENFORCED") 4295 4296 return self.expression( 4297 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4298 ) 4299 4300 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4301 expression = self._parse_foreign_key() 4302 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4303 expression = self._parse_primary_key() 4304 else: 4305 expression = None 4306 4307 return self.expression(exp.AddConstraint, this=this, expression=expression) 4308 4309 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4310 index = self._index - 1 4311 4312 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4313 return self._parse_csv(self._parse_add_constraint) 4314 4315 self._retreat(index) 4316 return self._parse_csv(self._parse_add_column) 4317 4318 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4319 self._match(TokenType.COLUMN) 4320 column = self._parse_field(any_token=True) 4321 4322 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4323 return self.expression(exp.AlterColumn, this=column, drop=True) 4324 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4325 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4326 4327 self._match_text_seq("SET", "DATA") 4328 return self.expression( 4329 exp.AlterColumn, 4330 this=column, 4331 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4332 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4333 using=self._match(TokenType.USING) and self._parse_conjunction(), 4334 ) 4335 4336 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4337 index = self._index - 1 4338 4339 partition_exists = self._parse_exists() 4340 if self._match(TokenType.PARTITION, advance=False): 4341 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4342 4343 self._retreat(index) 4344 return self._parse_csv(self._parse_drop_column) 4345 4346 def _parse_alter_table_rename(self) -> exp.RenameTable: 4347 self._match_text_seq("TO") 4348 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4349 4350 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4351 start = self._prev 4352 4353 if not self._match(TokenType.TABLE): 4354 return self._parse_as_command(start) 4355 4356 exists = self._parse_exists() 4357 this = self._parse_table(schema=True) 4358 4359 if self._next: 4360 self._advance() 4361 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4362 4363 if parser: 4364 actions = ensure_list(parser(self)) 4365 4366 if not self._curr: 4367 return self.expression( 4368 exp.AlterTable, 4369 this=this, 4370 exists=exists, 4371 actions=actions, 4372 ) 4373 return self._parse_as_command(start) 4374 4375 def _parse_merge(self) -> exp.Merge: 4376 self._match(TokenType.INTO) 4377 target = self._parse_table() 4378 4379 self._match(TokenType.USING) 4380 using = self._parse_table() 4381 4382 self._match(TokenType.ON) 4383 on = self._parse_conjunction() 4384 4385 whens = [] 4386 while self._match(TokenType.WHEN): 4387 matched = not self._match(TokenType.NOT) 4388 self._match_text_seq("MATCHED") 4389 source = ( 4390 False 4391 if self._match_text_seq("BY", "TARGET") 4392 else self._match_text_seq("BY", "SOURCE") 4393 ) 4394 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4395 4396 self._match(TokenType.THEN) 4397 4398 if self._match(TokenType.INSERT): 4399 _this = self._parse_star() 4400 if _this: 4401 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4402 else: 4403 then = self.expression( 4404 exp.Insert, 4405 this=self._parse_value(), 4406 expression=self._match(TokenType.VALUES) and self._parse_value(), 4407 ) 4408 elif self._match(TokenType.UPDATE): 4409 expressions = self._parse_star() 4410 if expressions: 4411 then = self.expression(exp.Update, expressions=expressions) 4412 else: 4413 then = self.expression( 4414 exp.Update, 4415 expressions=self._match(TokenType.SET) 4416 and self._parse_csv(self._parse_equality), 4417 ) 4418 elif self._match(TokenType.DELETE): 4419 then = self.expression(exp.Var, this=self._prev.text) 4420 else: 4421 then = None 4422 4423 whens.append( 4424 self.expression( 4425 exp.When, 4426 matched=matched, 4427 source=source, 4428 condition=condition, 4429 then=then, 4430 ) 4431 ) 4432 4433 return self.expression( 4434 exp.Merge, 4435 this=target, 4436 using=using, 4437 on=on, 4438 expressions=whens, 4439 ) 4440 4441 def _parse_show(self) -> t.Optional[exp.Expression]: 4442 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4443 if parser: 4444 return parser(self) 4445 self._advance() 4446 return self.expression(exp.Show, this=self._prev.text.upper()) 4447 4448 def _parse_set_item_assignment( 4449 self, kind: t.Optional[str] = None 4450 ) -> t.Optional[exp.Expression]: 4451 index = self._index 4452 4453 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4454 return self._parse_set_transaction(global_=kind == "GLOBAL") 4455 4456 left = self._parse_primary() or self._parse_id_var() 4457 4458 if not self._match_texts(("=", "TO")): 4459 self._retreat(index) 4460 return None 4461 4462 right = self._parse_statement() or self._parse_id_var() 4463 this = self.expression(exp.EQ, this=left, expression=right) 4464 4465 return self.expression(exp.SetItem, this=this, kind=kind) 4466 4467 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4468 self._match_text_seq("TRANSACTION") 4469 characteristics = self._parse_csv( 4470 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4471 ) 4472 return self.expression( 4473 exp.SetItem, 4474 expressions=characteristics, 4475 kind="TRANSACTION", 4476 **{"global": global_}, # type: ignore 4477 ) 4478 4479 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4480 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4481 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4482 4483 def _parse_set(self) -> exp.Set | exp.Command: 4484 index = self._index 4485 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4486 4487 if self._curr: 4488 self._retreat(index) 4489 return self._parse_as_command(self._prev) 4490 4491 return set_ 4492 4493 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4494 for option in options: 4495 if self._match_text_seq(*option.split(" ")): 4496 return exp.var(option) 4497 return None 4498 4499 def _parse_as_command(self, start: Token) -> exp.Command: 4500 while self._curr: 4501 self._advance() 4502 text = self._find_sql(start, self._prev) 4503 size = len(start.text) 4504 return exp.Command(this=text[:size], expression=text[size:]) 4505 4506 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4507 settings = [] 4508 4509 self._match_l_paren() 4510 kind = self._parse_id_var() 4511 4512 if self._match(TokenType.L_PAREN): 4513 while True: 4514 key = self._parse_id_var() 4515 value = self._parse_primary() 4516 4517 if not key and value is None: 4518 break 4519 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4520 self._match(TokenType.R_PAREN) 4521 4522 self._match_r_paren() 4523 4524 return self.expression( 4525 exp.DictProperty, 4526 this=this, 4527 kind=kind.this if kind else None, 4528 settings=settings, 4529 ) 4530 4531 def _parse_dict_range(self, this: str) -> exp.DictRange: 4532 self._match_l_paren() 4533 has_min = self._match_text_seq("MIN") 4534 if has_min: 4535 min = self._parse_var() or self._parse_primary() 4536 self._match_text_seq("MAX") 4537 max = self._parse_var() or self._parse_primary() 4538 else: 4539 max = self._parse_var() or self._parse_primary() 4540 min = exp.Literal.number(0) 4541 self._match_r_paren() 4542 return self.expression(exp.DictRange, this=this, min=min, max=max) 4543 4544 def _find_parser( 4545 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4546 ) -> t.Optional[t.Callable]: 4547 if not self._curr: 4548 return None 4549 4550 index = self._index 4551 this = [] 4552 while True: 4553 # The current token might be multiple words 4554 curr = self._curr.text.upper() 4555 key = curr.split(" ") 4556 this.append(curr) 4557 self._advance() 4558 result, trie = in_trie(trie, key) 4559 if result == 0: 4560 break 4561 if result == 2: 4562 subparser = parsers[" ".join(this)] 4563 return subparser 4564 self._retreat(index) 4565 return None 4566 4567 def _match(self, token_type, advance=True, expression=None): 4568 if not self._curr: 4569 return None 4570 4571 if self._curr.token_type == token_type: 4572 if advance: 4573 self._advance() 4574 self._add_comments(expression) 4575 return True 4576 4577 return None 4578 4579 def _match_set(self, types, advance=True): 4580 if not self._curr: 4581 return None 4582 4583 if self._curr.token_type in types: 4584 if advance: 4585 self._advance() 4586 return True 4587 4588 return None 4589 4590 def _match_pair(self, token_type_a, token_type_b, advance=True): 4591 if not self._curr or not self._next: 4592 return None 4593 4594 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4595 if advance: 4596 self._advance(2) 4597 return True 4598 4599 return None 4600 4601 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4602 if not self._match(TokenType.L_PAREN, expression=expression): 4603 self.raise_error("Expecting (") 4604 4605 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4606 if not self._match(TokenType.R_PAREN, expression=expression): 4607 self.raise_error("Expecting )") 4608 4609 def _match_texts(self, texts, advance=True): 4610 if self._curr and self._curr.text.upper() in texts: 4611 if advance: 4612 self._advance() 4613 return True 4614 return False 4615 4616 def _match_text_seq(self, *texts, advance=True): 4617 index = self._index 4618 for text in texts: 4619 if self._curr and self._curr.text.upper() == text: 4620 self._advance() 4621 else: 4622 self._retreat(index) 4623 return False 4624 4625 if not advance: 4626 self._retreat(index) 4627 4628 return True 4629 4630 @t.overload 4631 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4632 ... 4633 4634 @t.overload 4635 def _replace_columns_with_dots( 4636 self, this: t.Optional[exp.Expression] 4637 ) -> t.Optional[exp.Expression]: 4638 ... 4639 4640 def _replace_columns_with_dots(self, this): 4641 if isinstance(this, exp.Dot): 4642 exp.replace_children(this, self._replace_columns_with_dots) 4643 elif isinstance(this, exp.Column): 4644 exp.replace_children(this, self._replace_columns_with_dots) 4645 table = this.args.get("table") 4646 this = ( 4647 self.expression(exp.Dot, this=table, expression=this.this) 4648 if table 4649 else self.expression(exp.Var, this=this.name) 4650 ) 4651 elif isinstance(this, exp.Identifier): 4652 this = self.expression(exp.Var, this=this.name) 4653 4654 return this 4655 4656 def _replace_lambda( 4657 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4658 ) -> t.Optional[exp.Expression]: 4659 if not node: 4660 return node 4661 4662 for column in node.find_all(exp.Column): 4663 if column.parts[0].name in lambda_variables: 4664 dot_or_id = column.to_dot() if column.table else column.this 4665 parent = column.parent 4666 4667 while isinstance(parent, exp.Dot): 4668 if not isinstance(parent.parent, exp.Dot): 4669 parent.replace(dot_or_id) 4670 break 4671 parent = parent.parent 4672 else: 4673 if column is node: 4674 node = dot_or_id 4675 else: 4676 column.replace(dot_or_id) 4677 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 TYPE_TOKENS = { 114 TokenType.BIT, 115 TokenType.BOOLEAN, 116 TokenType.TINYINT, 117 TokenType.UTINYINT, 118 TokenType.SMALLINT, 119 TokenType.USMALLINT, 120 TokenType.INT, 121 TokenType.UINT, 122 TokenType.BIGINT, 123 TokenType.UBIGINT, 124 TokenType.INT128, 125 TokenType.UINT128, 126 TokenType.INT256, 127 TokenType.UINT256, 128 TokenType.FLOAT, 129 TokenType.DOUBLE, 130 TokenType.CHAR, 131 TokenType.NCHAR, 132 TokenType.VARCHAR, 133 TokenType.NVARCHAR, 134 TokenType.TEXT, 135 TokenType.MEDIUMTEXT, 136 TokenType.LONGTEXT, 137 TokenType.MEDIUMBLOB, 138 TokenType.LONGBLOB, 139 TokenType.BINARY, 140 TokenType.VARBINARY, 141 TokenType.JSON, 142 TokenType.JSONB, 143 TokenType.INTERVAL, 144 TokenType.TIME, 145 TokenType.TIMESTAMP, 146 TokenType.TIMESTAMPTZ, 147 TokenType.TIMESTAMPLTZ, 148 TokenType.DATETIME, 149 TokenType.DATETIME64, 150 TokenType.DATE, 151 TokenType.INT4RANGE, 152 TokenType.INT4MULTIRANGE, 153 TokenType.INT8RANGE, 154 TokenType.INT8MULTIRANGE, 155 TokenType.NUMRANGE, 156 TokenType.NUMMULTIRANGE, 157 TokenType.TSRANGE, 158 TokenType.TSMULTIRANGE, 159 TokenType.TSTZRANGE, 160 TokenType.TSTZMULTIRANGE, 161 TokenType.DATERANGE, 162 TokenType.DATEMULTIRANGE, 163 TokenType.DECIMAL, 164 TokenType.BIGDECIMAL, 165 TokenType.UUID, 166 TokenType.GEOGRAPHY, 167 TokenType.GEOMETRY, 168 TokenType.HLLSKETCH, 169 TokenType.HSTORE, 170 TokenType.PSEUDO_TYPE, 171 TokenType.SUPER, 172 TokenType.SERIAL, 173 TokenType.SMALLSERIAL, 174 TokenType.BIGSERIAL, 175 TokenType.XML, 176 TokenType.UNIQUEIDENTIFIER, 177 TokenType.MONEY, 178 TokenType.SMALLMONEY, 179 TokenType.ROWVERSION, 180 TokenType.IMAGE, 181 TokenType.VARIANT, 182 TokenType.OBJECT, 183 TokenType.INET, 184 *NESTED_TYPE_TOKENS, 185 } 186 187 SUBQUERY_PREDICATES = { 188 TokenType.ANY: exp.Any, 189 TokenType.ALL: exp.All, 190 TokenType.EXISTS: exp.Exists, 191 TokenType.SOME: exp.Any, 192 } 193 194 RESERVED_KEYWORDS = { 195 *Tokenizer.SINGLE_TOKENS.values(), 196 TokenType.SELECT, 197 } 198 199 DB_CREATABLES = { 200 TokenType.DATABASE, 201 TokenType.SCHEMA, 202 TokenType.TABLE, 203 TokenType.VIEW, 204 TokenType.DICTIONARY, 205 } 206 207 CREATABLES = { 208 TokenType.COLUMN, 209 TokenType.FUNCTION, 210 TokenType.INDEX, 211 TokenType.PROCEDURE, 212 *DB_CREATABLES, 213 } 214 215 # Tokens that can represent identifiers 216 ID_VAR_TOKENS = { 217 TokenType.VAR, 218 TokenType.ANTI, 219 TokenType.APPLY, 220 TokenType.ASC, 221 TokenType.AUTO_INCREMENT, 222 TokenType.BEGIN, 223 TokenType.CACHE, 224 TokenType.CASE, 225 TokenType.COLLATE, 226 TokenType.COMMAND, 227 TokenType.COMMENT, 228 TokenType.COMMIT, 229 TokenType.CONSTRAINT, 230 TokenType.DEFAULT, 231 TokenType.DELETE, 232 TokenType.DESC, 233 TokenType.DESCRIBE, 234 TokenType.DICTIONARY, 235 TokenType.DIV, 236 TokenType.END, 237 TokenType.EXECUTE, 238 TokenType.ESCAPE, 239 TokenType.FALSE, 240 TokenType.FIRST, 241 TokenType.FILTER, 242 TokenType.FORMAT, 243 TokenType.FULL, 244 TokenType.IF, 245 TokenType.IS, 246 TokenType.ISNULL, 247 TokenType.INTERVAL, 248 TokenType.KEEP, 249 TokenType.LEFT, 250 TokenType.LOAD, 251 TokenType.MERGE, 252 TokenType.NATURAL, 253 TokenType.NEXT, 254 TokenType.OFFSET, 255 TokenType.ORDINALITY, 256 TokenType.OVERWRITE, 257 TokenType.PARTITION, 258 TokenType.PERCENT, 259 TokenType.PIVOT, 260 TokenType.PRAGMA, 261 TokenType.RANGE, 262 TokenType.REFERENCES, 263 TokenType.RIGHT, 264 TokenType.ROW, 265 TokenType.ROWS, 266 TokenType.SEMI, 267 TokenType.SET, 268 TokenType.SETTINGS, 269 TokenType.SHOW, 270 TokenType.TEMPORARY, 271 TokenType.TOP, 272 TokenType.TRUE, 273 TokenType.UNIQUE, 274 TokenType.UNPIVOT, 275 TokenType.UPDATE, 276 TokenType.VOLATILE, 277 TokenType.WINDOW, 278 *CREATABLES, 279 *SUBQUERY_PREDICATES, 280 *TYPE_TOKENS, 281 *NO_PAREN_FUNCTIONS, 282 } 283 284 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 285 286 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 287 TokenType.APPLY, 288 TokenType.ASOF, 289 TokenType.FULL, 290 TokenType.LEFT, 291 TokenType.LOCK, 292 TokenType.NATURAL, 293 TokenType.OFFSET, 294 TokenType.RIGHT, 295 TokenType.WINDOW, 296 } 297 298 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 299 300 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 301 302 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 303 304 FUNC_TOKENS = { 305 TokenType.COMMAND, 306 TokenType.CURRENT_DATE, 307 TokenType.CURRENT_DATETIME, 308 TokenType.CURRENT_TIMESTAMP, 309 TokenType.CURRENT_TIME, 310 TokenType.CURRENT_USER, 311 TokenType.FILTER, 312 TokenType.FIRST, 313 TokenType.FORMAT, 314 TokenType.GLOB, 315 TokenType.IDENTIFIER, 316 TokenType.INDEX, 317 TokenType.ISNULL, 318 TokenType.ILIKE, 319 TokenType.LIKE, 320 TokenType.MERGE, 321 TokenType.OFFSET, 322 TokenType.PRIMARY_KEY, 323 TokenType.RANGE, 324 TokenType.REPLACE, 325 TokenType.ROW, 326 TokenType.UNNEST, 327 TokenType.VAR, 328 TokenType.LEFT, 329 TokenType.RIGHT, 330 TokenType.DATE, 331 TokenType.DATETIME, 332 TokenType.TABLE, 333 TokenType.TIMESTAMP, 334 TokenType.TIMESTAMPTZ, 335 TokenType.WINDOW, 336 *TYPE_TOKENS, 337 *SUBQUERY_PREDICATES, 338 } 339 340 CONJUNCTION = { 341 TokenType.AND: exp.And, 342 TokenType.OR: exp.Or, 343 } 344 345 EQUALITY = { 346 TokenType.EQ: exp.EQ, 347 TokenType.NEQ: exp.NEQ, 348 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 349 } 350 351 COMPARISON = { 352 TokenType.GT: exp.GT, 353 TokenType.GTE: exp.GTE, 354 TokenType.LT: exp.LT, 355 TokenType.LTE: exp.LTE, 356 } 357 358 BITWISE = { 359 TokenType.AMP: exp.BitwiseAnd, 360 TokenType.CARET: exp.BitwiseXor, 361 TokenType.PIPE: exp.BitwiseOr, 362 TokenType.DPIPE: exp.DPipe, 363 } 364 365 TERM = { 366 TokenType.DASH: exp.Sub, 367 TokenType.PLUS: exp.Add, 368 TokenType.MOD: exp.Mod, 369 TokenType.COLLATE: exp.Collate, 370 } 371 372 FACTOR = { 373 TokenType.DIV: exp.IntDiv, 374 TokenType.LR_ARROW: exp.Distance, 375 TokenType.SLASH: exp.Div, 376 TokenType.STAR: exp.Mul, 377 } 378 379 TIMESTAMPS = { 380 TokenType.TIME, 381 TokenType.TIMESTAMP, 382 TokenType.TIMESTAMPTZ, 383 TokenType.TIMESTAMPLTZ, 384 } 385 386 SET_OPERATIONS = { 387 TokenType.UNION, 388 TokenType.INTERSECT, 389 TokenType.EXCEPT, 390 } 391 392 JOIN_METHODS = { 393 TokenType.NATURAL, 394 TokenType.ASOF, 395 } 396 397 JOIN_SIDES = { 398 TokenType.LEFT, 399 TokenType.RIGHT, 400 TokenType.FULL, 401 } 402 403 JOIN_KINDS = { 404 TokenType.INNER, 405 TokenType.OUTER, 406 TokenType.CROSS, 407 TokenType.SEMI, 408 TokenType.ANTI, 409 } 410 411 JOIN_HINTS: t.Set[str] = set() 412 413 LAMBDAS = { 414 TokenType.ARROW: lambda self, expressions: self.expression( 415 exp.Lambda, 416 this=self._replace_lambda( 417 self._parse_conjunction(), 418 {node.name for node in expressions}, 419 ), 420 expressions=expressions, 421 ), 422 TokenType.FARROW: lambda self, expressions: self.expression( 423 exp.Kwarg, 424 this=exp.var(expressions[0].name), 425 expression=self._parse_conjunction(), 426 ), 427 } 428 429 COLUMN_OPERATORS = { 430 TokenType.DOT: None, 431 TokenType.DCOLON: lambda self, this, to: self.expression( 432 exp.Cast if self.STRICT_CAST else exp.TryCast, 433 this=this, 434 to=to, 435 ), 436 TokenType.ARROW: lambda self, this, path: self.expression( 437 exp.JSONExtract, 438 this=this, 439 expression=path, 440 ), 441 TokenType.DARROW: lambda self, this, path: self.expression( 442 exp.JSONExtractScalar, 443 this=this, 444 expression=path, 445 ), 446 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 447 exp.JSONBExtract, 448 this=this, 449 expression=path, 450 ), 451 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtractScalar, 453 this=this, 454 expression=path, 455 ), 456 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 457 exp.JSONBContains, 458 this=this, 459 expression=key, 460 ), 461 } 462 463 EXPRESSION_PARSERS = { 464 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 465 exp.Column: lambda self: self._parse_column(), 466 exp.Condition: lambda self: self._parse_conjunction(), 467 exp.DataType: lambda self: self._parse_types(), 468 exp.Expression: lambda self: self._parse_statement(), 469 exp.From: lambda self: self._parse_from(), 470 exp.Group: lambda self: self._parse_group(), 471 exp.Having: lambda self: self._parse_having(), 472 exp.Identifier: lambda self: self._parse_id_var(), 473 exp.Join: lambda self: self._parse_join(), 474 exp.Lambda: lambda self: self._parse_lambda(), 475 exp.Lateral: lambda self: self._parse_lateral(), 476 exp.Limit: lambda self: self._parse_limit(), 477 exp.Offset: lambda self: self._parse_offset(), 478 exp.Order: lambda self: self._parse_order(), 479 exp.Ordered: lambda self: self._parse_ordered(), 480 exp.Properties: lambda self: self._parse_properties(), 481 exp.Qualify: lambda self: self._parse_qualify(), 482 exp.Returning: lambda self: self._parse_returning(), 483 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 484 exp.Table: lambda self: self._parse_table_parts(), 485 exp.TableAlias: lambda self: self._parse_table_alias(), 486 exp.Where: lambda self: self._parse_where(), 487 exp.Window: lambda self: self._parse_named_window(), 488 exp.With: lambda self: self._parse_with(), 489 "JOIN_TYPE": lambda self: self._parse_join_parts(), 490 } 491 492 STATEMENT_PARSERS = { 493 TokenType.ALTER: lambda self: self._parse_alter(), 494 TokenType.BEGIN: lambda self: self._parse_transaction(), 495 TokenType.CACHE: lambda self: self._parse_cache(), 496 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 497 TokenType.COMMENT: lambda self: self._parse_comment(), 498 TokenType.CREATE: lambda self: self._parse_create(), 499 TokenType.DELETE: lambda self: self._parse_delete(), 500 TokenType.DESC: lambda self: self._parse_describe(), 501 TokenType.DESCRIBE: lambda self: self._parse_describe(), 502 TokenType.DROP: lambda self: self._parse_drop(), 503 TokenType.END: lambda self: self._parse_commit_or_rollback(), 504 TokenType.FROM: lambda self: exp.select("*").from_( 505 t.cast(exp.From, self._parse_from(skip_from_token=True)) 506 ), 507 TokenType.INSERT: lambda self: self._parse_insert(), 508 TokenType.LOAD: lambda self: self._parse_load(), 509 TokenType.MERGE: lambda self: self._parse_merge(), 510 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 511 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 512 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 513 TokenType.SET: lambda self: self._parse_set(), 514 TokenType.UNCACHE: lambda self: self._parse_uncache(), 515 TokenType.UPDATE: lambda self: self._parse_update(), 516 TokenType.USE: lambda self: self.expression( 517 exp.Use, 518 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 519 and exp.var(self._prev.text), 520 this=self._parse_table(schema=False), 521 ), 522 } 523 524 UNARY_PARSERS = { 525 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 526 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 527 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 528 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 529 } 530 531 PRIMARY_PARSERS = { 532 TokenType.STRING: lambda self, token: self.expression( 533 exp.Literal, this=token.text, is_string=True 534 ), 535 TokenType.NUMBER: lambda self, token: self.expression( 536 exp.Literal, this=token.text, is_string=False 537 ), 538 TokenType.STAR: lambda self, _: self.expression( 539 exp.Star, 540 **{"except": self._parse_except(), "replace": self._parse_replace()}, 541 ), 542 TokenType.NULL: lambda self, _: self.expression(exp.Null), 543 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 544 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 545 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 546 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 547 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 548 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 549 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 550 exp.National, this=token.text 551 ), 552 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 553 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 554 } 555 556 PLACEHOLDER_PARSERS = { 557 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 558 TokenType.PARAMETER: lambda self: self._parse_parameter(), 559 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 560 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 561 else None, 562 } 563 564 RANGE_PARSERS = { 565 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 566 TokenType.GLOB: binary_range_parser(exp.Glob), 567 TokenType.ILIKE: binary_range_parser(exp.ILike), 568 TokenType.IN: lambda self, this: self._parse_in(this), 569 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 570 TokenType.IS: lambda self, this: self._parse_is(this), 571 TokenType.LIKE: binary_range_parser(exp.Like), 572 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 573 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 574 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 575 } 576 577 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 578 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 579 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 580 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 581 "CHARACTER SET": lambda self: self._parse_character_set(), 582 "CHECKSUM": lambda self: self._parse_checksum(), 583 "CLUSTER": lambda self: self._parse_cluster(), 584 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 585 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 586 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 587 "DEFINER": lambda self: self._parse_definer(), 588 "DETERMINISTIC": lambda self: self.expression( 589 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 590 ), 591 "DISTKEY": lambda self: self._parse_distkey(), 592 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 593 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 594 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 595 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 596 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 597 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 598 "FREESPACE": lambda self: self._parse_freespace(), 599 "IMMUTABLE": lambda self: self.expression( 600 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 601 ), 602 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 603 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 604 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 605 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 606 "LIKE": lambda self: self._parse_create_like(), 607 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 608 "LOCK": lambda self: self._parse_locking(), 609 "LOCKING": lambda self: self._parse_locking(), 610 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 611 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 612 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 613 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 614 "NO": lambda self: self._parse_no_property(), 615 "ON": lambda self: self._parse_on_property(), 616 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 617 "PARTITION BY": lambda self: self._parse_partitioned_by(), 618 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 619 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 620 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 621 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 622 "RETURNS": lambda self: self._parse_returns(), 623 "ROW": lambda self: self._parse_row(), 624 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 625 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 626 "SETTINGS": lambda self: self.expression( 627 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 628 ), 629 "SORTKEY": lambda self: self._parse_sortkey(), 630 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 631 "STABLE": lambda self: self.expression( 632 exp.StabilityProperty, this=exp.Literal.string("STABLE") 633 ), 634 "STORED": lambda self: self._parse_stored(), 635 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 636 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 637 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 638 "TO": lambda self: self._parse_to_table(), 639 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 640 "TTL": lambda self: self._parse_ttl(), 641 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 642 "VOLATILE": lambda self: self._parse_volatile_property(), 643 "WITH": lambda self: self._parse_with_property(), 644 } 645 646 CONSTRAINT_PARSERS = { 647 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 648 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 649 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 650 "CHARACTER SET": lambda self: self.expression( 651 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 652 ), 653 "CHECK": lambda self: self.expression( 654 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 655 ), 656 "COLLATE": lambda self: self.expression( 657 exp.CollateColumnConstraint, this=self._parse_var() 658 ), 659 "COMMENT": lambda self: self.expression( 660 exp.CommentColumnConstraint, this=self._parse_string() 661 ), 662 "COMPRESS": lambda self: self._parse_compress(), 663 "DEFAULT": lambda self: self.expression( 664 exp.DefaultColumnConstraint, this=self._parse_bitwise() 665 ), 666 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 667 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 668 "FORMAT": lambda self: self.expression( 669 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 670 ), 671 "GENERATED": lambda self: self._parse_generated_as_identity(), 672 "IDENTITY": lambda self: self._parse_auto_increment(), 673 "INLINE": lambda self: self._parse_inline(), 674 "LIKE": lambda self: self._parse_create_like(), 675 "NOT": lambda self: self._parse_not_constraint(), 676 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 677 "ON": lambda self: self._match(TokenType.UPDATE) 678 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 679 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 680 "PRIMARY KEY": lambda self: self._parse_primary_key(), 681 "REFERENCES": lambda self: self._parse_references(match=False), 682 "TITLE": lambda self: self.expression( 683 exp.TitleColumnConstraint, this=self._parse_var_or_string() 684 ), 685 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 686 "UNIQUE": lambda self: self._parse_unique(), 687 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 688 } 689 690 ALTER_PARSERS = { 691 "ADD": lambda self: self._parse_alter_table_add(), 692 "ALTER": lambda self: self._parse_alter_table_alter(), 693 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 694 "DROP": lambda self: self._parse_alter_table_drop(), 695 "RENAME": lambda self: self._parse_alter_table_rename(), 696 } 697 698 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 699 700 NO_PAREN_FUNCTION_PARSERS = { 701 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 702 TokenType.CASE: lambda self: self._parse_case(), 703 TokenType.IF: lambda self: self._parse_if(), 704 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 705 exp.NextValueFor, 706 this=self._parse_column(), 707 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 708 ), 709 } 710 711 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 712 713 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 714 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 715 "CONCAT": lambda self: self._parse_concat(), 716 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 717 "DECODE": lambda self: self._parse_decode(), 718 "EXTRACT": lambda self: self._parse_extract(), 719 "JSON_OBJECT": lambda self: self._parse_json_object(), 720 "LOG": lambda self: self._parse_logarithm(), 721 "MATCH": lambda self: self._parse_match_against(), 722 "OPENJSON": lambda self: self._parse_open_json(), 723 "POSITION": lambda self: self._parse_position(), 724 "SAFE_CAST": lambda self: self._parse_cast(False), 725 "STRING_AGG": lambda self: self._parse_string_agg(), 726 "SUBSTRING": lambda self: self._parse_substring(), 727 "TRIM": lambda self: self._parse_trim(), 728 "TRY_CAST": lambda self: self._parse_cast(False), 729 "TRY_CONVERT": lambda self: self._parse_convert(False), 730 } 731 732 QUERY_MODIFIER_PARSERS = { 733 "joins": lambda self: list(iter(self._parse_join, None)), 734 "laterals": lambda self: list(iter(self._parse_lateral, None)), 735 "match": lambda self: self._parse_match_recognize(), 736 "where": lambda self: self._parse_where(), 737 "group": lambda self: self._parse_group(), 738 "having": lambda self: self._parse_having(), 739 "qualify": lambda self: self._parse_qualify(), 740 "windows": lambda self: self._parse_window_clause(), 741 "order": lambda self: self._parse_order(), 742 "limit": lambda self: self._parse_limit(), 743 "offset": lambda self: self._parse_offset(), 744 "locks": lambda self: self._parse_locks(), 745 "sample": lambda self: self._parse_table_sample(as_modifier=True), 746 } 747 748 SET_PARSERS = { 749 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 750 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 751 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 752 "TRANSACTION": lambda self: self._parse_set_transaction(), 753 } 754 755 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 756 757 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 758 759 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 760 761 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 762 763 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 764 765 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 766 TRANSACTION_CHARACTERISTICS = { 767 "ISOLATION LEVEL REPEATABLE READ", 768 "ISOLATION LEVEL READ COMMITTED", 769 "ISOLATION LEVEL READ UNCOMMITTED", 770 "ISOLATION LEVEL SERIALIZABLE", 771 "READ WRITE", 772 "READ ONLY", 773 } 774 775 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 776 777 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 778 779 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 780 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 781 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 782 783 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 784 785 STRICT_CAST = True 786 787 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 788 789 CONVERT_TYPE_FIRST = False 790 791 PREFIXED_PIVOT_COLUMNS = False 792 IDENTIFY_PIVOT_STRINGS = False 793 794 LOG_BASE_FIRST = True 795 LOG_DEFAULTS_TO_LN = False 796 797 __slots__ = ( 798 "error_level", 799 "error_message_context", 800 "max_errors", 801 "sql", 802 "errors", 803 "_tokens", 804 "_index", 805 "_curr", 806 "_next", 807 "_prev", 808 "_prev_comments", 809 ) 810 811 # Autofilled 812 INDEX_OFFSET: int = 0 813 UNNEST_COLUMN_ONLY: bool = False 814 ALIAS_POST_TABLESAMPLE: bool = False 815 STRICT_STRING_CONCAT = False 816 NULL_ORDERING: str = "nulls_are_small" 817 SHOW_TRIE: t.Dict = {} 818 SET_TRIE: t.Dict = {} 819 FORMAT_MAPPING: t.Dict[str, str] = {} 820 FORMAT_TRIE: t.Dict = {} 821 TIME_MAPPING: t.Dict[str, str] = {} 822 TIME_TRIE: t.Dict = {} 823 824 def __init__( 825 self, 826 error_level: t.Optional[ErrorLevel] = None, 827 error_message_context: int = 100, 828 max_errors: int = 3, 829 ): 830 self.error_level = error_level or ErrorLevel.IMMEDIATE 831 self.error_message_context = error_message_context 832 self.max_errors = max_errors 833 self.reset() 834 835 def reset(self): 836 self.sql = "" 837 self.errors = [] 838 self._tokens = [] 839 self._index = 0 840 self._curr = None 841 self._next = None 842 self._prev = None 843 self._prev_comments = None 844 845 def parse( 846 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 847 ) -> t.List[t.Optional[exp.Expression]]: 848 """ 849 Parses a list of tokens and returns a list of syntax trees, one tree 850 per parsed SQL statement. 851 852 Args: 853 raw_tokens: The list of tokens. 854 sql: The original SQL string, used to produce helpful debug messages. 855 856 Returns: 857 The list of the produced syntax trees. 858 """ 859 return self._parse( 860 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 861 ) 862 863 def parse_into( 864 self, 865 expression_types: exp.IntoType, 866 raw_tokens: t.List[Token], 867 sql: t.Optional[str] = None, 868 ) -> t.List[t.Optional[exp.Expression]]: 869 """ 870 Parses a list of tokens into a given Expression type. If a collection of Expression 871 types is given instead, this method will try to parse the token list into each one 872 of them, stopping at the first for which the parsing succeeds. 873 874 Args: 875 expression_types: The expression type(s) to try and parse the token list into. 876 raw_tokens: The list of tokens. 877 sql: The original SQL string, used to produce helpful debug messages. 878 879 Returns: 880 The target Expression. 881 """ 882 errors = [] 883 for expression_type in ensure_list(expression_types): 884 parser = self.EXPRESSION_PARSERS.get(expression_type) 885 if not parser: 886 raise TypeError(f"No parser registered for {expression_type}") 887 888 try: 889 return self._parse(parser, raw_tokens, sql) 890 except ParseError as e: 891 e.errors[0]["into_expression"] = expression_type 892 errors.append(e) 893 894 raise ParseError( 895 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 896 errors=merge_errors(errors), 897 ) from errors[-1] 898 899 def _parse( 900 self, 901 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 902 raw_tokens: t.List[Token], 903 sql: t.Optional[str] = None, 904 ) -> t.List[t.Optional[exp.Expression]]: 905 self.reset() 906 self.sql = sql or "" 907 908 total = len(raw_tokens) 909 chunks: t.List[t.List[Token]] = [[]] 910 911 for i, token in enumerate(raw_tokens): 912 if token.token_type == TokenType.SEMICOLON: 913 if i < total - 1: 914 chunks.append([]) 915 else: 916 chunks[-1].append(token) 917 918 expressions = [] 919 920 for tokens in chunks: 921 self._index = -1 922 self._tokens = tokens 923 self._advance() 924 925 expressions.append(parse_method(self)) 926 927 if self._index < len(self._tokens): 928 self.raise_error("Invalid expression / Unexpected token") 929 930 self.check_errors() 931 932 return expressions 933 934 def check_errors(self) -> None: 935 """Logs or raises any found errors, depending on the chosen error level setting.""" 936 if self.error_level == ErrorLevel.WARN: 937 for error in self.errors: 938 logger.error(str(error)) 939 elif self.error_level == ErrorLevel.RAISE and self.errors: 940 raise ParseError( 941 concat_messages(self.errors, self.max_errors), 942 errors=merge_errors(self.errors), 943 ) 944 945 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 946 """ 947 Appends an error in the list of recorded errors or raises it, depending on the chosen 948 error level setting. 949 """ 950 token = token or self._curr or self._prev or Token.string("") 951 start = token.start 952 end = token.end + 1 953 start_context = self.sql[max(start - self.error_message_context, 0) : start] 954 highlight = self.sql[start:end] 955 end_context = self.sql[end : end + self.error_message_context] 956 957 error = ParseError.new( 958 f"{message}. Line {token.line}, Col: {token.col}.\n" 959 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 960 description=message, 961 line=token.line, 962 col=token.col, 963 start_context=start_context, 964 highlight=highlight, 965 end_context=end_context, 966 ) 967 968 if self.error_level == ErrorLevel.IMMEDIATE: 969 raise error 970 971 self.errors.append(error) 972 973 def expression( 974 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 975 ) -> E: 976 """ 977 Creates a new, validated Expression. 978 979 Args: 980 exp_class: The expression class to instantiate. 981 comments: An optional list of comments to attach to the expression. 982 kwargs: The arguments to set for the expression along with their respective values. 983 984 Returns: 985 The target expression. 986 """ 987 instance = exp_class(**kwargs) 988 instance.add_comments(comments) if comments else self._add_comments(instance) 989 return self.validate_expression(instance) 990 991 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 992 if expression and self._prev_comments: 993 expression.add_comments(self._prev_comments) 994 self._prev_comments = None 995 996 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 997 """ 998 Validates an Expression, making sure that all its mandatory arguments are set. 999 1000 Args: 1001 expression: The expression to validate. 1002 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1003 1004 Returns: 1005 The validated expression. 1006 """ 1007 if self.error_level != ErrorLevel.IGNORE: 1008 for error_message in expression.error_messages(args): 1009 self.raise_error(error_message) 1010 1011 return expression 1012 1013 def _find_sql(self, start: Token, end: Token) -> str: 1014 return self.sql[start.start : end.end + 1] 1015 1016 def _advance(self, times: int = 1) -> None: 1017 self._index += times 1018 self._curr = seq_get(self._tokens, self._index) 1019 self._next = seq_get(self._tokens, self._index + 1) 1020 1021 if self._index > 0: 1022 self._prev = self._tokens[self._index - 1] 1023 self._prev_comments = self._prev.comments 1024 else: 1025 self._prev = None 1026 self._prev_comments = None 1027 1028 def _retreat(self, index: int) -> None: 1029 if index != self._index: 1030 self._advance(index - self._index) 1031 1032 def _parse_command(self) -> exp.Command: 1033 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1034 1035 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1036 start = self._prev 1037 exists = self._parse_exists() if allow_exists else None 1038 1039 self._match(TokenType.ON) 1040 1041 kind = self._match_set(self.CREATABLES) and self._prev 1042 if not kind: 1043 return self._parse_as_command(start) 1044 1045 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1046 this = self._parse_user_defined_function(kind=kind.token_type) 1047 elif kind.token_type == TokenType.TABLE: 1048 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1049 elif kind.token_type == TokenType.COLUMN: 1050 this = self._parse_column() 1051 else: 1052 this = self._parse_id_var() 1053 1054 self._match(TokenType.IS) 1055 1056 return self.expression( 1057 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1058 ) 1059 1060 def _parse_to_table( 1061 self, 1062 ) -> exp.ToTableProperty: 1063 table = self._parse_table_parts(schema=True) 1064 return self.expression(exp.ToTableProperty, this=table) 1065 1066 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1067 def _parse_ttl(self) -> exp.Expression: 1068 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1069 this = self._parse_bitwise() 1070 1071 if self._match_text_seq("DELETE"): 1072 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1073 if self._match_text_seq("RECOMPRESS"): 1074 return self.expression( 1075 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1076 ) 1077 if self._match_text_seq("TO", "DISK"): 1078 return self.expression( 1079 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1080 ) 1081 if self._match_text_seq("TO", "VOLUME"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1084 ) 1085 1086 return this 1087 1088 expressions = self._parse_csv(_parse_ttl_action) 1089 where = self._parse_where() 1090 group = self._parse_group() 1091 1092 aggregates = None 1093 if group and self._match(TokenType.SET): 1094 aggregates = self._parse_csv(self._parse_set_item) 1095 1096 return self.expression( 1097 exp.MergeTreeTTL, 1098 expressions=expressions, 1099 where=where, 1100 group=group, 1101 aggregates=aggregates, 1102 ) 1103 1104 def _parse_statement(self) -> t.Optional[exp.Expression]: 1105 if self._curr is None: 1106 return None 1107 1108 if self._match_set(self.STATEMENT_PARSERS): 1109 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1110 1111 if self._match_set(Tokenizer.COMMANDS): 1112 return self._parse_command() 1113 1114 expression = self._parse_expression() 1115 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1116 return self._parse_query_modifiers(expression) 1117 1118 def _parse_drop(self) -> exp.Drop | exp.Command: 1119 start = self._prev 1120 temporary = self._match(TokenType.TEMPORARY) 1121 materialized = self._match_text_seq("MATERIALIZED") 1122 1123 kind = self._match_set(self.CREATABLES) and self._prev.text 1124 if not kind: 1125 return self._parse_as_command(start) 1126 1127 return self.expression( 1128 exp.Drop, 1129 exists=self._parse_exists(), 1130 this=self._parse_table(schema=True), 1131 kind=kind, 1132 temporary=temporary, 1133 materialized=materialized, 1134 cascade=self._match_text_seq("CASCADE"), 1135 constraints=self._match_text_seq("CONSTRAINTS"), 1136 purge=self._match_text_seq("PURGE"), 1137 ) 1138 1139 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1140 return ( 1141 self._match(TokenType.IF) 1142 and (not not_ or self._match(TokenType.NOT)) 1143 and self._match(TokenType.EXISTS) 1144 ) 1145 1146 def _parse_create(self) -> exp.Create | exp.Command: 1147 # Note: this can't be None because we've matched a statement parser 1148 start = self._prev 1149 replace = start.text.upper() == "REPLACE" or self._match_pair( 1150 TokenType.OR, TokenType.REPLACE 1151 ) 1152 unique = self._match(TokenType.UNIQUE) 1153 1154 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1155 self._advance() 1156 1157 properties = None 1158 create_token = self._match_set(self.CREATABLES) and self._prev 1159 1160 if not create_token: 1161 # exp.Properties.Location.POST_CREATE 1162 properties = self._parse_properties() 1163 create_token = self._match_set(self.CREATABLES) and self._prev 1164 1165 if not properties or not create_token: 1166 return self._parse_as_command(start) 1167 1168 exists = self._parse_exists(not_=True) 1169 this = None 1170 expression = None 1171 indexes = None 1172 no_schema_binding = None 1173 begin = None 1174 clone = None 1175 1176 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1177 nonlocal properties 1178 if properties and temp_props: 1179 properties.expressions.extend(temp_props.expressions) 1180 elif temp_props: 1181 properties = temp_props 1182 1183 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1184 this = self._parse_user_defined_function(kind=create_token.token_type) 1185 1186 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1187 extend_props(self._parse_properties()) 1188 1189 self._match(TokenType.ALIAS) 1190 begin = self._match(TokenType.BEGIN) 1191 return_ = self._match_text_seq("RETURN") 1192 expression = self._parse_statement() 1193 1194 if return_: 1195 expression = self.expression(exp.Return, this=expression) 1196 elif create_token.token_type == TokenType.INDEX: 1197 this = self._parse_index(index=self._parse_id_var()) 1198 elif create_token.token_type in self.DB_CREATABLES: 1199 table_parts = self._parse_table_parts(schema=True) 1200 1201 # exp.Properties.Location.POST_NAME 1202 self._match(TokenType.COMMA) 1203 extend_props(self._parse_properties(before=True)) 1204 1205 this = self._parse_schema(this=table_parts) 1206 1207 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1208 extend_props(self._parse_properties()) 1209 1210 self._match(TokenType.ALIAS) 1211 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1212 # exp.Properties.Location.POST_ALIAS 1213 extend_props(self._parse_properties()) 1214 1215 expression = self._parse_ddl_select() 1216 1217 if create_token.token_type == TokenType.TABLE: 1218 indexes = [] 1219 while True: 1220 index = self._parse_index() 1221 1222 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1223 extend_props(self._parse_properties()) 1224 1225 if not index: 1226 break 1227 else: 1228 self._match(TokenType.COMMA) 1229 indexes.append(index) 1230 elif create_token.token_type == TokenType.VIEW: 1231 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1232 no_schema_binding = True 1233 1234 if self._match_text_seq("CLONE"): 1235 clone = self._parse_table(schema=True) 1236 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1237 clone_kind = ( 1238 self._match(TokenType.L_PAREN) 1239 and self._match_texts(self.CLONE_KINDS) 1240 and self._prev.text.upper() 1241 ) 1242 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1243 self._match(TokenType.R_PAREN) 1244 clone = self.expression( 1245 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1246 ) 1247 1248 return self.expression( 1249 exp.Create, 1250 this=this, 1251 kind=create_token.text, 1252 replace=replace, 1253 unique=unique, 1254 expression=expression, 1255 exists=exists, 1256 properties=properties, 1257 indexes=indexes, 1258 no_schema_binding=no_schema_binding, 1259 begin=begin, 1260 clone=clone, 1261 ) 1262 1263 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1264 # only used for teradata currently 1265 self._match(TokenType.COMMA) 1266 1267 kwargs = { 1268 "no": self._match_text_seq("NO"), 1269 "dual": self._match_text_seq("DUAL"), 1270 "before": self._match_text_seq("BEFORE"), 1271 "default": self._match_text_seq("DEFAULT"), 1272 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1273 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1274 "after": self._match_text_seq("AFTER"), 1275 "minimum": self._match_texts(("MIN", "MINIMUM")), 1276 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1277 } 1278 1279 if self._match_texts(self.PROPERTY_PARSERS): 1280 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1281 try: 1282 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1283 except TypeError: 1284 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1285 1286 return None 1287 1288 def _parse_property(self) -> t.Optional[exp.Expression]: 1289 if self._match_texts(self.PROPERTY_PARSERS): 1290 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1291 1292 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1293 return self._parse_character_set(default=True) 1294 1295 if self._match_text_seq("COMPOUND", "SORTKEY"): 1296 return self._parse_sortkey(compound=True) 1297 1298 if self._match_text_seq("SQL", "SECURITY"): 1299 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1300 1301 assignment = self._match_pair( 1302 TokenType.VAR, TokenType.EQ, advance=False 1303 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1304 1305 if assignment: 1306 key = self._parse_var_or_string() 1307 self._match(TokenType.EQ) 1308 return self.expression(exp.Property, this=key, value=self._parse_column()) 1309 1310 return None 1311 1312 def _parse_stored(self) -> exp.FileFormatProperty: 1313 self._match(TokenType.ALIAS) 1314 1315 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1316 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1317 1318 return self.expression( 1319 exp.FileFormatProperty, 1320 this=self.expression( 1321 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1322 ) 1323 if input_format or output_format 1324 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1325 ) 1326 1327 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1328 self._match(TokenType.EQ) 1329 self._match(TokenType.ALIAS) 1330 return self.expression(exp_class, this=self._parse_field()) 1331 1332 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1333 properties = [] 1334 while True: 1335 if before: 1336 prop = self._parse_property_before() 1337 else: 1338 prop = self._parse_property() 1339 1340 if not prop: 1341 break 1342 for p in ensure_list(prop): 1343 properties.append(p) 1344 1345 if properties: 1346 return self.expression(exp.Properties, expressions=properties) 1347 1348 return None 1349 1350 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1351 return self.expression( 1352 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1353 ) 1354 1355 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1356 if self._index >= 2: 1357 pre_volatile_token = self._tokens[self._index - 2] 1358 else: 1359 pre_volatile_token = None 1360 1361 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1362 return exp.VolatileProperty() 1363 1364 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1365 1366 def _parse_with_property( 1367 self, 1368 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1369 self._match(TokenType.WITH) 1370 if self._match(TokenType.L_PAREN, advance=False): 1371 return self._parse_wrapped_csv(self._parse_property) 1372 1373 if self._match_text_seq("JOURNAL"): 1374 return self._parse_withjournaltable() 1375 1376 if self._match_text_seq("DATA"): 1377 return self._parse_withdata(no=False) 1378 elif self._match_text_seq("NO", "DATA"): 1379 return self._parse_withdata(no=True) 1380 1381 if not self._next: 1382 return None 1383 1384 return self._parse_withisolatedloading() 1385 1386 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1387 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1388 self._match(TokenType.EQ) 1389 1390 user = self._parse_id_var() 1391 self._match(TokenType.PARAMETER) 1392 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1393 1394 if not user or not host: 1395 return None 1396 1397 return exp.DefinerProperty(this=f"{user}@{host}") 1398 1399 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1400 self._match(TokenType.TABLE) 1401 self._match(TokenType.EQ) 1402 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1403 1404 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1405 return self.expression(exp.LogProperty, no=no) 1406 1407 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1408 return self.expression(exp.JournalProperty, **kwargs) 1409 1410 def _parse_checksum(self) -> exp.ChecksumProperty: 1411 self._match(TokenType.EQ) 1412 1413 on = None 1414 if self._match(TokenType.ON): 1415 on = True 1416 elif self._match_text_seq("OFF"): 1417 on = False 1418 1419 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1420 1421 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1422 if not self._match_text_seq("BY"): 1423 self._retreat(self._index - 1) 1424 return None 1425 1426 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1427 1428 def _parse_freespace(self) -> exp.FreespaceProperty: 1429 self._match(TokenType.EQ) 1430 return self.expression( 1431 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1432 ) 1433 1434 def _parse_mergeblockratio( 1435 self, no: bool = False, default: bool = False 1436 ) -> exp.MergeBlockRatioProperty: 1437 if self._match(TokenType.EQ): 1438 return self.expression( 1439 exp.MergeBlockRatioProperty, 1440 this=self._parse_number(), 1441 percent=self._match(TokenType.PERCENT), 1442 ) 1443 1444 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1445 1446 def _parse_datablocksize( 1447 self, 1448 default: t.Optional[bool] = None, 1449 minimum: t.Optional[bool] = None, 1450 maximum: t.Optional[bool] = None, 1451 ) -> exp.DataBlocksizeProperty: 1452 self._match(TokenType.EQ) 1453 size = self._parse_number() 1454 1455 units = None 1456 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1457 units = self._prev.text 1458 1459 return self.expression( 1460 exp.DataBlocksizeProperty, 1461 size=size, 1462 units=units, 1463 default=default, 1464 minimum=minimum, 1465 maximum=maximum, 1466 ) 1467 1468 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1469 self._match(TokenType.EQ) 1470 always = self._match_text_seq("ALWAYS") 1471 manual = self._match_text_seq("MANUAL") 1472 never = self._match_text_seq("NEVER") 1473 default = self._match_text_seq("DEFAULT") 1474 1475 autotemp = None 1476 if self._match_text_seq("AUTOTEMP"): 1477 autotemp = self._parse_schema() 1478 1479 return self.expression( 1480 exp.BlockCompressionProperty, 1481 always=always, 1482 manual=manual, 1483 never=never, 1484 default=default, 1485 autotemp=autotemp, 1486 ) 1487 1488 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1489 no = self._match_text_seq("NO") 1490 concurrent = self._match_text_seq("CONCURRENT") 1491 self._match_text_seq("ISOLATED", "LOADING") 1492 for_all = self._match_text_seq("FOR", "ALL") 1493 for_insert = self._match_text_seq("FOR", "INSERT") 1494 for_none = self._match_text_seq("FOR", "NONE") 1495 return self.expression( 1496 exp.IsolatedLoadingProperty, 1497 no=no, 1498 concurrent=concurrent, 1499 for_all=for_all, 1500 for_insert=for_insert, 1501 for_none=for_none, 1502 ) 1503 1504 def _parse_locking(self) -> exp.LockingProperty: 1505 if self._match(TokenType.TABLE): 1506 kind = "TABLE" 1507 elif self._match(TokenType.VIEW): 1508 kind = "VIEW" 1509 elif self._match(TokenType.ROW): 1510 kind = "ROW" 1511 elif self._match_text_seq("DATABASE"): 1512 kind = "DATABASE" 1513 else: 1514 kind = None 1515 1516 if kind in ("DATABASE", "TABLE", "VIEW"): 1517 this = self._parse_table_parts() 1518 else: 1519 this = None 1520 1521 if self._match(TokenType.FOR): 1522 for_or_in = "FOR" 1523 elif self._match(TokenType.IN): 1524 for_or_in = "IN" 1525 else: 1526 for_or_in = None 1527 1528 if self._match_text_seq("ACCESS"): 1529 lock_type = "ACCESS" 1530 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1531 lock_type = "EXCLUSIVE" 1532 elif self._match_text_seq("SHARE"): 1533 lock_type = "SHARE" 1534 elif self._match_text_seq("READ"): 1535 lock_type = "READ" 1536 elif self._match_text_seq("WRITE"): 1537 lock_type = "WRITE" 1538 elif self._match_text_seq("CHECKSUM"): 1539 lock_type = "CHECKSUM" 1540 else: 1541 lock_type = None 1542 1543 override = self._match_text_seq("OVERRIDE") 1544 1545 return self.expression( 1546 exp.LockingProperty, 1547 this=this, 1548 kind=kind, 1549 for_or_in=for_or_in, 1550 lock_type=lock_type, 1551 override=override, 1552 ) 1553 1554 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1555 if self._match(TokenType.PARTITION_BY): 1556 return self._parse_csv(self._parse_conjunction) 1557 return [] 1558 1559 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1560 self._match(TokenType.EQ) 1561 return self.expression( 1562 exp.PartitionedByProperty, 1563 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1564 ) 1565 1566 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1567 if self._match_text_seq("AND", "STATISTICS"): 1568 statistics = True 1569 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1570 statistics = False 1571 else: 1572 statistics = None 1573 1574 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1575 1576 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1577 if self._match_text_seq("PRIMARY", "INDEX"): 1578 return exp.NoPrimaryIndexProperty() 1579 return None 1580 1581 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1582 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1583 return exp.OnCommitProperty() 1584 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1585 return exp.OnCommitProperty(delete=True) 1586 return None 1587 1588 def _parse_distkey(self) -> exp.DistKeyProperty: 1589 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1590 1591 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1592 table = self._parse_table(schema=True) 1593 1594 options = [] 1595 while self._match_texts(("INCLUDING", "EXCLUDING")): 1596 this = self._prev.text.upper() 1597 1598 id_var = self._parse_id_var() 1599 if not id_var: 1600 return None 1601 1602 options.append( 1603 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1604 ) 1605 1606 return self.expression(exp.LikeProperty, this=table, expressions=options) 1607 1608 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1609 return self.expression( 1610 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1611 ) 1612 1613 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1614 self._match(TokenType.EQ) 1615 return self.expression( 1616 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1617 ) 1618 1619 def _parse_returns(self) -> exp.ReturnsProperty: 1620 value: t.Optional[exp.Expression] 1621 is_table = self._match(TokenType.TABLE) 1622 1623 if is_table: 1624 if self._match(TokenType.LT): 1625 value = self.expression( 1626 exp.Schema, 1627 this="TABLE", 1628 expressions=self._parse_csv(self._parse_struct_types), 1629 ) 1630 if not self._match(TokenType.GT): 1631 self.raise_error("Expecting >") 1632 else: 1633 value = self._parse_schema(exp.var("TABLE")) 1634 else: 1635 value = self._parse_types() 1636 1637 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1638 1639 def _parse_describe(self) -> exp.Describe: 1640 kind = self._match_set(self.CREATABLES) and self._prev.text 1641 this = self._parse_table() 1642 return self.expression(exp.Describe, this=this, kind=kind) 1643 1644 def _parse_insert(self) -> exp.Insert: 1645 overwrite = self._match(TokenType.OVERWRITE) 1646 local = self._match_text_seq("LOCAL") 1647 alternative = None 1648 1649 if self._match_text_seq("DIRECTORY"): 1650 this: t.Optional[exp.Expression] = self.expression( 1651 exp.Directory, 1652 this=self._parse_var_or_string(), 1653 local=local, 1654 row_format=self._parse_row_format(match_row=True), 1655 ) 1656 else: 1657 if self._match(TokenType.OR): 1658 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1659 1660 self._match(TokenType.INTO) 1661 self._match(TokenType.TABLE) 1662 this = self._parse_table(schema=True) 1663 1664 return self.expression( 1665 exp.Insert, 1666 this=this, 1667 exists=self._parse_exists(), 1668 partition=self._parse_partition(), 1669 expression=self._parse_ddl_select(), 1670 conflict=self._parse_on_conflict(), 1671 returning=self._parse_returning(), 1672 overwrite=overwrite, 1673 alternative=alternative, 1674 ) 1675 1676 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1677 conflict = self._match_text_seq("ON", "CONFLICT") 1678 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1679 1680 if not conflict and not duplicate: 1681 return None 1682 1683 nothing = None 1684 expressions = None 1685 key = None 1686 constraint = None 1687 1688 if conflict: 1689 if self._match_text_seq("ON", "CONSTRAINT"): 1690 constraint = self._parse_id_var() 1691 else: 1692 key = self._parse_csv(self._parse_value) 1693 1694 self._match_text_seq("DO") 1695 if self._match_text_seq("NOTHING"): 1696 nothing = True 1697 else: 1698 self._match(TokenType.UPDATE) 1699 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1700 1701 return self.expression( 1702 exp.OnConflict, 1703 duplicate=duplicate, 1704 expressions=expressions, 1705 nothing=nothing, 1706 key=key, 1707 constraint=constraint, 1708 ) 1709 1710 def _parse_returning(self) -> t.Optional[exp.Returning]: 1711 if not self._match(TokenType.RETURNING): 1712 return None 1713 1714 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1715 1716 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1717 if not self._match(TokenType.FORMAT): 1718 return None 1719 return self._parse_row_format() 1720 1721 def _parse_row_format( 1722 self, match_row: bool = False 1723 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1724 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1725 return None 1726 1727 if self._match_text_seq("SERDE"): 1728 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1729 1730 self._match_text_seq("DELIMITED") 1731 1732 kwargs = {} 1733 1734 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1735 kwargs["fields"] = self._parse_string() 1736 if self._match_text_seq("ESCAPED", "BY"): 1737 kwargs["escaped"] = self._parse_string() 1738 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1739 kwargs["collection_items"] = self._parse_string() 1740 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1741 kwargs["map_keys"] = self._parse_string() 1742 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1743 kwargs["lines"] = self._parse_string() 1744 if self._match_text_seq("NULL", "DEFINED", "AS"): 1745 kwargs["null"] = self._parse_string() 1746 1747 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1748 1749 def _parse_load(self) -> exp.LoadData | exp.Command: 1750 if self._match_text_seq("DATA"): 1751 local = self._match_text_seq("LOCAL") 1752 self._match_text_seq("INPATH") 1753 inpath = self._parse_string() 1754 overwrite = self._match(TokenType.OVERWRITE) 1755 self._match_pair(TokenType.INTO, TokenType.TABLE) 1756 1757 return self.expression( 1758 exp.LoadData, 1759 this=self._parse_table(schema=True), 1760 local=local, 1761 overwrite=overwrite, 1762 inpath=inpath, 1763 partition=self._parse_partition(), 1764 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1765 serde=self._match_text_seq("SERDE") and self._parse_string(), 1766 ) 1767 return self._parse_as_command(self._prev) 1768 1769 def _parse_delete(self) -> exp.Delete: 1770 self._match(TokenType.FROM) 1771 1772 return self.expression( 1773 exp.Delete, 1774 this=self._parse_table(), 1775 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1776 where=self._parse_where(), 1777 returning=self._parse_returning(), 1778 ) 1779 1780 def _parse_update(self) -> exp.Update: 1781 return self.expression( 1782 exp.Update, 1783 **{ # type: ignore 1784 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1785 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1786 "from": self._parse_from(modifiers=True), 1787 "where": self._parse_where(), 1788 "returning": self._parse_returning(), 1789 }, 1790 ) 1791 1792 def _parse_uncache(self) -> exp.Uncache: 1793 if not self._match(TokenType.TABLE): 1794 self.raise_error("Expecting TABLE after UNCACHE") 1795 1796 return self.expression( 1797 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1798 ) 1799 1800 def _parse_cache(self) -> exp.Cache: 1801 lazy = self._match_text_seq("LAZY") 1802 self._match(TokenType.TABLE) 1803 table = self._parse_table(schema=True) 1804 1805 options = [] 1806 if self._match_text_seq("OPTIONS"): 1807 self._match_l_paren() 1808 k = self._parse_string() 1809 self._match(TokenType.EQ) 1810 v = self._parse_string() 1811 options = [k, v] 1812 self._match_r_paren() 1813 1814 self._match(TokenType.ALIAS) 1815 return self.expression( 1816 exp.Cache, 1817 this=table, 1818 lazy=lazy, 1819 options=options, 1820 expression=self._parse_select(nested=True), 1821 ) 1822 1823 def _parse_partition(self) -> t.Optional[exp.Partition]: 1824 if not self._match(TokenType.PARTITION): 1825 return None 1826 1827 return self.expression( 1828 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1829 ) 1830 1831 def _parse_value(self) -> exp.Tuple: 1832 if self._match(TokenType.L_PAREN): 1833 expressions = self._parse_csv(self._parse_conjunction) 1834 self._match_r_paren() 1835 return self.expression(exp.Tuple, expressions=expressions) 1836 1837 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1838 # Source: https://prestodb.io/docs/current/sql/values.html 1839 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1840 1841 def _parse_select( 1842 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1843 ) -> t.Optional[exp.Expression]: 1844 cte = self._parse_with() 1845 if cte: 1846 this = self._parse_statement() 1847 1848 if not this: 1849 self.raise_error("Failed to parse any statement following CTE") 1850 return cte 1851 1852 if "with" in this.arg_types: 1853 this.set("with", cte) 1854 else: 1855 self.raise_error(f"{this.key} does not support CTE") 1856 this = cte 1857 elif self._match(TokenType.SELECT): 1858 comments = self._prev_comments 1859 1860 hint = self._parse_hint() 1861 all_ = self._match(TokenType.ALL) 1862 distinct = self._match(TokenType.DISTINCT) 1863 1864 kind = ( 1865 self._match(TokenType.ALIAS) 1866 and self._match_texts(("STRUCT", "VALUE")) 1867 and self._prev.text 1868 ) 1869 1870 if distinct: 1871 distinct = self.expression( 1872 exp.Distinct, 1873 on=self._parse_value() if self._match(TokenType.ON) else None, 1874 ) 1875 1876 if all_ and distinct: 1877 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1878 1879 limit = self._parse_limit(top=True) 1880 expressions = self._parse_csv(self._parse_expression) 1881 1882 this = self.expression( 1883 exp.Select, 1884 kind=kind, 1885 hint=hint, 1886 distinct=distinct, 1887 expressions=expressions, 1888 limit=limit, 1889 ) 1890 this.comments = comments 1891 1892 into = self._parse_into() 1893 if into: 1894 this.set("into", into) 1895 1896 from_ = self._parse_from() 1897 if from_: 1898 this.set("from", from_) 1899 1900 this = self._parse_query_modifiers(this) 1901 elif (table or nested) and self._match(TokenType.L_PAREN): 1902 if self._match(TokenType.PIVOT): 1903 this = self._parse_simplified_pivot() 1904 elif self._match(TokenType.FROM): 1905 this = exp.select("*").from_( 1906 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1907 ) 1908 else: 1909 this = self._parse_table() if table else self._parse_select(nested=True) 1910 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1911 1912 self._match_r_paren() 1913 1914 # early return so that subquery unions aren't parsed again 1915 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1916 # Union ALL should be a property of the top select node, not the subquery 1917 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1918 elif self._match(TokenType.VALUES): 1919 this = self.expression( 1920 exp.Values, 1921 expressions=self._parse_csv(self._parse_value), 1922 alias=self._parse_table_alias(), 1923 ) 1924 else: 1925 this = None 1926 1927 return self._parse_set_operations(this) 1928 1929 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1930 if not skip_with_token and not self._match(TokenType.WITH): 1931 return None 1932 1933 comments = self._prev_comments 1934 recursive = self._match(TokenType.RECURSIVE) 1935 1936 expressions = [] 1937 while True: 1938 expressions.append(self._parse_cte()) 1939 1940 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1941 break 1942 else: 1943 self._match(TokenType.WITH) 1944 1945 return self.expression( 1946 exp.With, comments=comments, expressions=expressions, recursive=recursive 1947 ) 1948 1949 def _parse_cte(self) -> exp.CTE: 1950 alias = self._parse_table_alias() 1951 if not alias or not alias.this: 1952 self.raise_error("Expected CTE to have alias") 1953 1954 self._match(TokenType.ALIAS) 1955 return self.expression( 1956 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1957 ) 1958 1959 def _parse_table_alias( 1960 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1961 ) -> t.Optional[exp.TableAlias]: 1962 any_token = self._match(TokenType.ALIAS) 1963 alias = ( 1964 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1965 or self._parse_string_as_identifier() 1966 ) 1967 1968 index = self._index 1969 if self._match(TokenType.L_PAREN): 1970 columns = self._parse_csv(self._parse_function_parameter) 1971 self._match_r_paren() if columns else self._retreat(index) 1972 else: 1973 columns = None 1974 1975 if not alias and not columns: 1976 return None 1977 1978 return self.expression(exp.TableAlias, this=alias, columns=columns) 1979 1980 def _parse_subquery( 1981 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1982 ) -> t.Optional[exp.Subquery]: 1983 if not this: 1984 return None 1985 1986 return self.expression( 1987 exp.Subquery, 1988 this=this, 1989 pivots=self._parse_pivots(), 1990 alias=self._parse_table_alias() if parse_alias else None, 1991 ) 1992 1993 def _parse_query_modifiers( 1994 self, this: t.Optional[exp.Expression] 1995 ) -> t.Optional[exp.Expression]: 1996 if isinstance(this, self.MODIFIABLES): 1997 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1998 expression = parser(self) 1999 2000 if expression: 2001 if key == "limit": 2002 offset = expression.args.pop("offset", None) 2003 if offset: 2004 this.set("offset", exp.Offset(expression=offset)) 2005 this.set(key, expression) 2006 return this 2007 2008 def _parse_hint(self) -> t.Optional[exp.Hint]: 2009 if self._match(TokenType.HINT): 2010 hints = self._parse_csv(self._parse_function) 2011 2012 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2013 self.raise_error("Expected */ after HINT") 2014 2015 return self.expression(exp.Hint, expressions=hints) 2016 2017 return None 2018 2019 def _parse_into(self) -> t.Optional[exp.Into]: 2020 if not self._match(TokenType.INTO): 2021 return None 2022 2023 temp = self._match(TokenType.TEMPORARY) 2024 unlogged = self._match_text_seq("UNLOGGED") 2025 self._match(TokenType.TABLE) 2026 2027 return self.expression( 2028 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2029 ) 2030 2031 def _parse_from( 2032 self, modifiers: bool = False, skip_from_token: bool = False 2033 ) -> t.Optional[exp.From]: 2034 if not skip_from_token and not self._match(TokenType.FROM): 2035 return None 2036 2037 comments = self._prev_comments 2038 this = self._parse_table() 2039 2040 return self.expression( 2041 exp.From, 2042 comments=comments, 2043 this=self._parse_query_modifiers(this) if modifiers else this, 2044 ) 2045 2046 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2047 if not self._match(TokenType.MATCH_RECOGNIZE): 2048 return None 2049 2050 self._match_l_paren() 2051 2052 partition = self._parse_partition_by() 2053 order = self._parse_order() 2054 measures = ( 2055 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2056 ) 2057 2058 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2059 rows = exp.var("ONE ROW PER MATCH") 2060 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2061 text = "ALL ROWS PER MATCH" 2062 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2063 text += f" SHOW EMPTY MATCHES" 2064 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2065 text += f" OMIT EMPTY MATCHES" 2066 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2067 text += f" WITH UNMATCHED ROWS" 2068 rows = exp.var(text) 2069 else: 2070 rows = None 2071 2072 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2073 text = "AFTER MATCH SKIP" 2074 if self._match_text_seq("PAST", "LAST", "ROW"): 2075 text += f" PAST LAST ROW" 2076 elif self._match_text_seq("TO", "NEXT", "ROW"): 2077 text += f" TO NEXT ROW" 2078 elif self._match_text_seq("TO", "FIRST"): 2079 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2080 elif self._match_text_seq("TO", "LAST"): 2081 text += f" TO LAST {self._advance_any().text}" # type: ignore 2082 after = exp.var(text) 2083 else: 2084 after = None 2085 2086 if self._match_text_seq("PATTERN"): 2087 self._match_l_paren() 2088 2089 if not self._curr: 2090 self.raise_error("Expecting )", self._curr) 2091 2092 paren = 1 2093 start = self._curr 2094 2095 while self._curr and paren > 0: 2096 if self._curr.token_type == TokenType.L_PAREN: 2097 paren += 1 2098 if self._curr.token_type == TokenType.R_PAREN: 2099 paren -= 1 2100 2101 end = self._prev 2102 self._advance() 2103 2104 if paren > 0: 2105 self.raise_error("Expecting )", self._curr) 2106 2107 pattern = exp.var(self._find_sql(start, end)) 2108 else: 2109 pattern = None 2110 2111 define = ( 2112 self._parse_csv( 2113 lambda: self.expression( 2114 exp.Alias, 2115 alias=self._parse_id_var(any_token=True), 2116 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2117 ) 2118 ) 2119 if self._match_text_seq("DEFINE") 2120 else None 2121 ) 2122 2123 self._match_r_paren() 2124 2125 return self.expression( 2126 exp.MatchRecognize, 2127 partition_by=partition, 2128 order=order, 2129 measures=measures, 2130 rows=rows, 2131 after=after, 2132 pattern=pattern, 2133 define=define, 2134 alias=self._parse_table_alias(), 2135 ) 2136 2137 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2138 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2139 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2140 2141 if outer_apply or cross_apply: 2142 this = self._parse_select(table=True) 2143 view = None 2144 outer = not cross_apply 2145 elif self._match(TokenType.LATERAL): 2146 this = self._parse_select(table=True) 2147 view = self._match(TokenType.VIEW) 2148 outer = self._match(TokenType.OUTER) 2149 else: 2150 return None 2151 2152 if not this: 2153 this = self._parse_function() or self._parse_id_var(any_token=False) 2154 while self._match(TokenType.DOT): 2155 this = exp.Dot( 2156 this=this, 2157 expression=self._parse_function() or self._parse_id_var(any_token=False), 2158 ) 2159 2160 if view: 2161 table = self._parse_id_var(any_token=False) 2162 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2163 table_alias: t.Optional[exp.TableAlias] = self.expression( 2164 exp.TableAlias, this=table, columns=columns 2165 ) 2166 elif isinstance(this, exp.Subquery) and this.alias: 2167 # Ensures parity between the Subquery's and the Lateral's "alias" args 2168 table_alias = this.args["alias"].copy() 2169 else: 2170 table_alias = self._parse_table_alias() 2171 2172 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2173 2174 def _parse_join_parts( 2175 self, 2176 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2177 return ( 2178 self._match_set(self.JOIN_METHODS) and self._prev, 2179 self._match_set(self.JOIN_SIDES) and self._prev, 2180 self._match_set(self.JOIN_KINDS) and self._prev, 2181 ) 2182 2183 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2184 if self._match(TokenType.COMMA): 2185 return self.expression(exp.Join, this=self._parse_table()) 2186 2187 index = self._index 2188 method, side, kind = self._parse_join_parts() 2189 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2190 join = self._match(TokenType.JOIN) 2191 2192 if not skip_join_token and not join: 2193 self._retreat(index) 2194 kind = None 2195 method = None 2196 side = None 2197 2198 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2199 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2200 2201 if not skip_join_token and not join and not outer_apply and not cross_apply: 2202 return None 2203 2204 if outer_apply: 2205 side = Token(TokenType.LEFT, "LEFT") 2206 2207 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2208 2209 if method: 2210 kwargs["method"] = method.text 2211 if side: 2212 kwargs["side"] = side.text 2213 if kind: 2214 kwargs["kind"] = kind.text 2215 if hint: 2216 kwargs["hint"] = hint 2217 2218 if self._match(TokenType.ON): 2219 kwargs["on"] = self._parse_conjunction() 2220 elif self._match(TokenType.USING): 2221 kwargs["using"] = self._parse_wrapped_id_vars() 2222 2223 return self.expression(exp.Join, **kwargs) 2224 2225 def _parse_index( 2226 self, 2227 index: t.Optional[exp.Expression] = None, 2228 ) -> t.Optional[exp.Index]: 2229 if index: 2230 unique = None 2231 primary = None 2232 amp = None 2233 2234 self._match(TokenType.ON) 2235 self._match(TokenType.TABLE) # hive 2236 table = self._parse_table_parts(schema=True) 2237 else: 2238 unique = self._match(TokenType.UNIQUE) 2239 primary = self._match_text_seq("PRIMARY") 2240 amp = self._match_text_seq("AMP") 2241 2242 if not self._match(TokenType.INDEX): 2243 return None 2244 2245 index = self._parse_id_var() 2246 table = None 2247 2248 using = self._parse_field() if self._match(TokenType.USING) else None 2249 2250 if self._match(TokenType.L_PAREN, advance=False): 2251 columns = self._parse_wrapped_csv(self._parse_ordered) 2252 else: 2253 columns = None 2254 2255 return self.expression( 2256 exp.Index, 2257 this=index, 2258 table=table, 2259 using=using, 2260 columns=columns, 2261 unique=unique, 2262 primary=primary, 2263 amp=amp, 2264 partition_by=self._parse_partition_by(), 2265 ) 2266 2267 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2268 return ( 2269 (not schema and self._parse_function(optional_parens=False)) 2270 or self._parse_id_var(any_token=False) 2271 or self._parse_string_as_identifier() 2272 or self._parse_placeholder() 2273 ) 2274 2275 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2276 catalog = None 2277 db = None 2278 table = self._parse_table_part(schema=schema) 2279 2280 while self._match(TokenType.DOT): 2281 if catalog: 2282 # This allows nesting the table in arbitrarily many dot expressions if needed 2283 table = self.expression( 2284 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2285 ) 2286 else: 2287 catalog = db 2288 db = table 2289 table = self._parse_table_part(schema=schema) 2290 2291 if not table: 2292 self.raise_error(f"Expected table name but got {self._curr}") 2293 2294 return self.expression( 2295 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2296 ) 2297 2298 def _parse_table( 2299 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2300 ) -> t.Optional[exp.Expression]: 2301 lateral = self._parse_lateral() 2302 if lateral: 2303 return lateral 2304 2305 unnest = self._parse_unnest() 2306 if unnest: 2307 return unnest 2308 2309 values = self._parse_derived_table_values() 2310 if values: 2311 return values 2312 2313 subquery = self._parse_select(table=True) 2314 if subquery: 2315 if not subquery.args.get("pivots"): 2316 subquery.set("pivots", self._parse_pivots()) 2317 return subquery 2318 2319 this: exp.Expression = self._parse_table_parts(schema=schema) 2320 2321 if schema: 2322 return self._parse_schema(this=this) 2323 2324 if self.ALIAS_POST_TABLESAMPLE: 2325 table_sample = self._parse_table_sample() 2326 2327 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2328 if alias: 2329 this.set("alias", alias) 2330 2331 if not this.args.get("pivots"): 2332 this.set("pivots", self._parse_pivots()) 2333 2334 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2335 this.set( 2336 "hints", 2337 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2338 ) 2339 self._match_r_paren() 2340 2341 if not self.ALIAS_POST_TABLESAMPLE: 2342 table_sample = self._parse_table_sample() 2343 2344 if table_sample: 2345 table_sample.set("this", this) 2346 this = table_sample 2347 2348 return this 2349 2350 def _parse_unnest(self) -> t.Optional[exp.Unnest]: 2351 if not self._match(TokenType.UNNEST): 2352 return None 2353 2354 expressions = self._parse_wrapped_csv(self._parse_type) 2355 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2356 alias = self._parse_table_alias() 2357 2358 if alias and self.UNNEST_COLUMN_ONLY: 2359 if alias.args.get("columns"): 2360 self.raise_error("Unexpected extra column alias in unnest.") 2361 2362 alias.set("columns", [alias.this]) 2363 alias.set("this", None) 2364 2365 offset = None 2366 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2367 self._match(TokenType.ALIAS) 2368 offset = self._parse_id_var() or exp.to_identifier("offset") 2369 2370 return self.expression( 2371 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2372 ) 2373 2374 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2375 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2376 if not is_derived and not self._match(TokenType.VALUES): 2377 return None 2378 2379 expressions = self._parse_csv(self._parse_value) 2380 alias = self._parse_table_alias() 2381 2382 if is_derived: 2383 self._match_r_paren() 2384 2385 return self.expression( 2386 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2387 ) 2388 2389 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2390 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2391 as_modifier and self._match_text_seq("USING", "SAMPLE") 2392 ): 2393 return None 2394 2395 bucket_numerator = None 2396 bucket_denominator = None 2397 bucket_field = None 2398 percent = None 2399 rows = None 2400 size = None 2401 seed = None 2402 2403 kind = ( 2404 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2405 ) 2406 method = self._parse_var(tokens=(TokenType.ROW,)) 2407 2408 self._match(TokenType.L_PAREN) 2409 2410 num = self._parse_number() 2411 2412 if self._match_text_seq("BUCKET"): 2413 bucket_numerator = self._parse_number() 2414 self._match_text_seq("OUT", "OF") 2415 bucket_denominator = bucket_denominator = self._parse_number() 2416 self._match(TokenType.ON) 2417 bucket_field = self._parse_field() 2418 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2419 percent = num 2420 elif self._match(TokenType.ROWS): 2421 rows = num 2422 else: 2423 size = num 2424 2425 self._match(TokenType.R_PAREN) 2426 2427 if self._match(TokenType.L_PAREN): 2428 method = self._parse_var() 2429 seed = self._match(TokenType.COMMA) and self._parse_number() 2430 self._match_r_paren() 2431 elif self._match_texts(("SEED", "REPEATABLE")): 2432 seed = self._parse_wrapped(self._parse_number) 2433 2434 return self.expression( 2435 exp.TableSample, 2436 method=method, 2437 bucket_numerator=bucket_numerator, 2438 bucket_denominator=bucket_denominator, 2439 bucket_field=bucket_field, 2440 percent=percent, 2441 rows=rows, 2442 size=size, 2443 seed=seed, 2444 kind=kind, 2445 ) 2446 2447 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2448 return list(iter(self._parse_pivot, None)) 2449 2450 # https://duckdb.org/docs/sql/statements/pivot 2451 def _parse_simplified_pivot(self) -> exp.Pivot: 2452 def _parse_on() -> t.Optional[exp.Expression]: 2453 this = self._parse_bitwise() 2454 return self._parse_in(this) if self._match(TokenType.IN) else this 2455 2456 this = self._parse_table() 2457 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2458 using = self._match(TokenType.USING) and self._parse_csv( 2459 lambda: self._parse_alias(self._parse_function()) 2460 ) 2461 group = self._parse_group() 2462 return self.expression( 2463 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2464 ) 2465 2466 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2467 index = self._index 2468 2469 if self._match(TokenType.PIVOT): 2470 unpivot = False 2471 elif self._match(TokenType.UNPIVOT): 2472 unpivot = True 2473 else: 2474 return None 2475 2476 expressions = [] 2477 field = None 2478 2479 if not self._match(TokenType.L_PAREN): 2480 self._retreat(index) 2481 return None 2482 2483 if unpivot: 2484 expressions = self._parse_csv(self._parse_column) 2485 else: 2486 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2487 2488 if not expressions: 2489 self.raise_error("Failed to parse PIVOT's aggregation list") 2490 2491 if not self._match(TokenType.FOR): 2492 self.raise_error("Expecting FOR") 2493 2494 value = self._parse_column() 2495 2496 if not self._match(TokenType.IN): 2497 self.raise_error("Expecting IN") 2498 2499 field = self._parse_in(value, alias=True) 2500 2501 self._match_r_paren() 2502 2503 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2504 2505 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2506 pivot.set("alias", self._parse_table_alias()) 2507 2508 if not unpivot: 2509 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2510 2511 columns: t.List[exp.Expression] = [] 2512 for fld in pivot.args["field"].expressions: 2513 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2514 for name in names: 2515 if self.PREFIXED_PIVOT_COLUMNS: 2516 name = f"{name}_{field_name}" if name else field_name 2517 else: 2518 name = f"{field_name}_{name}" if name else field_name 2519 2520 columns.append(exp.to_identifier(name)) 2521 2522 pivot.set("columns", columns) 2523 2524 return pivot 2525 2526 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2527 return [agg.alias for agg in aggregations] 2528 2529 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2530 if not skip_where_token and not self._match(TokenType.WHERE): 2531 return None 2532 2533 return self.expression( 2534 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2535 ) 2536 2537 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2538 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2539 return None 2540 2541 elements = defaultdict(list) 2542 2543 while True: 2544 expressions = self._parse_csv(self._parse_conjunction) 2545 if expressions: 2546 elements["expressions"].extend(expressions) 2547 2548 grouping_sets = self._parse_grouping_sets() 2549 if grouping_sets: 2550 elements["grouping_sets"].extend(grouping_sets) 2551 2552 rollup = None 2553 cube = None 2554 totals = None 2555 2556 with_ = self._match(TokenType.WITH) 2557 if self._match(TokenType.ROLLUP): 2558 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2559 elements["rollup"].extend(ensure_list(rollup)) 2560 2561 if self._match(TokenType.CUBE): 2562 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2563 elements["cube"].extend(ensure_list(cube)) 2564 2565 if self._match_text_seq("TOTALS"): 2566 totals = True 2567 elements["totals"] = True # type: ignore 2568 2569 if not (grouping_sets or rollup or cube or totals): 2570 break 2571 2572 return self.expression(exp.Group, **elements) # type: ignore 2573 2574 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2575 if not self._match(TokenType.GROUPING_SETS): 2576 return None 2577 2578 return self._parse_wrapped_csv(self._parse_grouping_set) 2579 2580 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2581 if self._match(TokenType.L_PAREN): 2582 grouping_set = self._parse_csv(self._parse_column) 2583 self._match_r_paren() 2584 return self.expression(exp.Tuple, expressions=grouping_set) 2585 2586 return self._parse_column() 2587 2588 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2589 if not skip_having_token and not self._match(TokenType.HAVING): 2590 return None 2591 return self.expression(exp.Having, this=self._parse_conjunction()) 2592 2593 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2594 if not self._match(TokenType.QUALIFY): 2595 return None 2596 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2597 2598 def _parse_order( 2599 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2600 ) -> t.Optional[exp.Expression]: 2601 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2602 return this 2603 2604 return self.expression( 2605 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2606 ) 2607 2608 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2609 if not self._match_text_seq(*texts): 2610 return None 2611 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2612 2613 def _parse_ordered(self) -> exp.Ordered: 2614 this = self._parse_conjunction() 2615 self._match(TokenType.ASC) 2616 2617 is_desc = self._match(TokenType.DESC) 2618 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2619 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2620 desc = is_desc or False 2621 asc = not desc 2622 nulls_first = is_nulls_first or False 2623 explicitly_null_ordered = is_nulls_first or is_nulls_last 2624 2625 if ( 2626 not explicitly_null_ordered 2627 and ( 2628 (asc and self.NULL_ORDERING == "nulls_are_small") 2629 or (desc and self.NULL_ORDERING != "nulls_are_small") 2630 ) 2631 and self.NULL_ORDERING != "nulls_are_last" 2632 ): 2633 nulls_first = True 2634 2635 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2636 2637 def _parse_limit( 2638 self, this: t.Optional[exp.Expression] = None, top: bool = False 2639 ) -> t.Optional[exp.Expression]: 2640 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2641 limit_paren = self._match(TokenType.L_PAREN) 2642 expression = self._parse_number() if top else self._parse_term() 2643 2644 if self._match(TokenType.COMMA): 2645 offset = expression 2646 expression = self._parse_term() 2647 else: 2648 offset = None 2649 2650 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2651 2652 if limit_paren: 2653 self._match_r_paren() 2654 2655 return limit_exp 2656 2657 if self._match(TokenType.FETCH): 2658 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2659 direction = self._prev.text if direction else "FIRST" 2660 2661 count = self._parse_number() 2662 percent = self._match(TokenType.PERCENT) 2663 2664 self._match_set((TokenType.ROW, TokenType.ROWS)) 2665 2666 only = self._match_text_seq("ONLY") 2667 with_ties = self._match_text_seq("WITH", "TIES") 2668 2669 if only and with_ties: 2670 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2671 2672 return self.expression( 2673 exp.Fetch, 2674 direction=direction, 2675 count=count, 2676 percent=percent, 2677 with_ties=with_ties, 2678 ) 2679 2680 return this 2681 2682 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2683 if not self._match(TokenType.OFFSET): 2684 return this 2685 2686 count = self._parse_number() 2687 self._match_set((TokenType.ROW, TokenType.ROWS)) 2688 return self.expression(exp.Offset, this=this, expression=count) 2689 2690 def _parse_locks(self) -> t.List[exp.Lock]: 2691 locks = [] 2692 while True: 2693 if self._match_text_seq("FOR", "UPDATE"): 2694 update = True 2695 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2696 "LOCK", "IN", "SHARE", "MODE" 2697 ): 2698 update = False 2699 else: 2700 break 2701 2702 expressions = None 2703 if self._match_text_seq("OF"): 2704 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2705 2706 wait: t.Optional[bool | exp.Expression] = None 2707 if self._match_text_seq("NOWAIT"): 2708 wait = True 2709 elif self._match_text_seq("WAIT"): 2710 wait = self._parse_primary() 2711 elif self._match_text_seq("SKIP", "LOCKED"): 2712 wait = False 2713 2714 locks.append( 2715 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2716 ) 2717 2718 return locks 2719 2720 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2721 if not self._match_set(self.SET_OPERATIONS): 2722 return this 2723 2724 token_type = self._prev.token_type 2725 2726 if token_type == TokenType.UNION: 2727 expression = exp.Union 2728 elif token_type == TokenType.EXCEPT: 2729 expression = exp.Except 2730 else: 2731 expression = exp.Intersect 2732 2733 return self.expression( 2734 expression, 2735 this=this, 2736 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2737 expression=self._parse_set_operations(self._parse_select(nested=True)), 2738 ) 2739 2740 def _parse_expression(self) -> t.Optional[exp.Expression]: 2741 return self._parse_alias(self._parse_conjunction()) 2742 2743 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2744 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2745 2746 def _parse_equality(self) -> t.Optional[exp.Expression]: 2747 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2748 2749 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2750 return self._parse_tokens(self._parse_range, self.COMPARISON) 2751 2752 def _parse_range(self) -> t.Optional[exp.Expression]: 2753 this = self._parse_bitwise() 2754 negate = self._match(TokenType.NOT) 2755 2756 if self._match_set(self.RANGE_PARSERS): 2757 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2758 if not expression: 2759 return this 2760 2761 this = expression 2762 elif self._match(TokenType.ISNULL): 2763 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2764 2765 # Postgres supports ISNULL and NOTNULL for conditions. 2766 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2767 if self._match(TokenType.NOTNULL): 2768 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2769 this = self.expression(exp.Not, this=this) 2770 2771 if negate: 2772 this = self.expression(exp.Not, this=this) 2773 2774 if self._match(TokenType.IS): 2775 this = self._parse_is(this) 2776 2777 return this 2778 2779 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2780 index = self._index - 1 2781 negate = self._match(TokenType.NOT) 2782 2783 if self._match_text_seq("DISTINCT", "FROM"): 2784 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2785 return self.expression(klass, this=this, expression=self._parse_expression()) 2786 2787 expression = self._parse_null() or self._parse_boolean() 2788 if not expression: 2789 self._retreat(index) 2790 return None 2791 2792 this = self.expression(exp.Is, this=this, expression=expression) 2793 return self.expression(exp.Not, this=this) if negate else this 2794 2795 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2796 unnest = self._parse_unnest() 2797 if unnest: 2798 this = self.expression(exp.In, this=this, unnest=unnest) 2799 elif self._match(TokenType.L_PAREN): 2800 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2801 2802 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2803 this = self.expression(exp.In, this=this, query=expressions[0]) 2804 else: 2805 this = self.expression(exp.In, this=this, expressions=expressions) 2806 2807 self._match_r_paren(this) 2808 else: 2809 this = self.expression(exp.In, this=this, field=self._parse_field()) 2810 2811 return this 2812 2813 def _parse_between(self, this: exp.Expression) -> exp.Between: 2814 low = self._parse_bitwise() 2815 self._match(TokenType.AND) 2816 high = self._parse_bitwise() 2817 return self.expression(exp.Between, this=this, low=low, high=high) 2818 2819 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2820 if not self._match(TokenType.ESCAPE): 2821 return this 2822 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2823 2824 def _parse_interval(self) -> t.Optional[exp.Interval]: 2825 if not self._match(TokenType.INTERVAL): 2826 return None 2827 2828 this = self._parse_primary() or self._parse_term() 2829 unit = self._parse_function() or self._parse_var() 2830 2831 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2832 # each INTERVAL expression into this canonical form so it's easy to transpile 2833 if this and this.is_number: 2834 this = exp.Literal.string(this.name) 2835 elif this and this.is_string: 2836 parts = this.name.split() 2837 2838 if len(parts) == 2: 2839 if unit: 2840 # this is not actually a unit, it's something else 2841 unit = None 2842 self._retreat(self._index - 1) 2843 else: 2844 this = exp.Literal.string(parts[0]) 2845 unit = self.expression(exp.Var, this=parts[1]) 2846 2847 return self.expression(exp.Interval, this=this, unit=unit) 2848 2849 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2850 this = self._parse_term() 2851 2852 while True: 2853 if self._match_set(self.BITWISE): 2854 this = self.expression( 2855 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2856 ) 2857 elif self._match_pair(TokenType.LT, TokenType.LT): 2858 this = self.expression( 2859 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2860 ) 2861 elif self._match_pair(TokenType.GT, TokenType.GT): 2862 this = self.expression( 2863 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2864 ) 2865 else: 2866 break 2867 2868 return this 2869 2870 def _parse_term(self) -> t.Optional[exp.Expression]: 2871 return self._parse_tokens(self._parse_factor, self.TERM) 2872 2873 def _parse_factor(self) -> t.Optional[exp.Expression]: 2874 return self._parse_tokens(self._parse_unary, self.FACTOR) 2875 2876 def _parse_unary(self) -> t.Optional[exp.Expression]: 2877 if self._match_set(self.UNARY_PARSERS): 2878 return self.UNARY_PARSERS[self._prev.token_type](self) 2879 return self._parse_at_time_zone(self._parse_type()) 2880 2881 def _parse_type(self) -> t.Optional[exp.Expression]: 2882 interval = self._parse_interval() 2883 if interval: 2884 return interval 2885 2886 index = self._index 2887 data_type = self._parse_types(check_func=True) 2888 this = self._parse_column() 2889 2890 if data_type: 2891 if isinstance(this, exp.Literal): 2892 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2893 if parser: 2894 return parser(self, this, data_type) 2895 return self.expression(exp.Cast, this=this, to=data_type) 2896 if not data_type.expressions: 2897 self._retreat(index) 2898 return self._parse_column() 2899 return self._parse_column_ops(data_type) 2900 2901 return this 2902 2903 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2904 this = self._parse_type() 2905 if not this: 2906 return None 2907 2908 return self.expression( 2909 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2910 ) 2911 2912 def _parse_types( 2913 self, check_func: bool = False, schema: bool = False 2914 ) -> t.Optional[exp.Expression]: 2915 index = self._index 2916 2917 prefix = self._match_text_seq("SYSUDTLIB", ".") 2918 2919 if not self._match_set(self.TYPE_TOKENS): 2920 return None 2921 2922 type_token = self._prev.token_type 2923 2924 if type_token == TokenType.PSEUDO_TYPE: 2925 return self.expression(exp.PseudoType, this=self._prev.text) 2926 2927 nested = type_token in self.NESTED_TYPE_TOKENS 2928 is_struct = type_token == TokenType.STRUCT 2929 expressions = None 2930 maybe_func = False 2931 2932 if self._match(TokenType.L_PAREN): 2933 if is_struct: 2934 expressions = self._parse_csv(self._parse_struct_types) 2935 elif nested: 2936 expressions = self._parse_csv( 2937 lambda: self._parse_types(check_func=check_func, schema=schema) 2938 ) 2939 else: 2940 expressions = self._parse_csv(self._parse_type_size) 2941 2942 if not expressions or not self._match(TokenType.R_PAREN): 2943 self._retreat(index) 2944 return None 2945 2946 maybe_func = True 2947 2948 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2949 this = exp.DataType( 2950 this=exp.DataType.Type.ARRAY, 2951 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2952 nested=True, 2953 ) 2954 2955 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2956 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2957 2958 return this 2959 2960 if self._match(TokenType.L_BRACKET): 2961 self._retreat(index) 2962 return None 2963 2964 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2965 if nested and self._match(TokenType.LT): 2966 if is_struct: 2967 expressions = self._parse_csv(self._parse_struct_types) 2968 else: 2969 expressions = self._parse_csv( 2970 lambda: self._parse_types(check_func=check_func, schema=schema) 2971 ) 2972 2973 if not self._match(TokenType.GT): 2974 self.raise_error("Expecting >") 2975 2976 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2977 values = self._parse_csv(self._parse_conjunction) 2978 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2979 2980 value: t.Optional[exp.Expression] = None 2981 if type_token in self.TIMESTAMPS: 2982 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2983 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2984 elif ( 2985 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2986 or type_token == TokenType.TIMESTAMPLTZ 2987 ): 2988 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2989 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2990 if type_token == TokenType.TIME: 2991 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2992 else: 2993 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2994 2995 maybe_func = maybe_func and value is None 2996 2997 if value is None: 2998 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2999 elif type_token == TokenType.INTERVAL: 3000 unit = self._parse_var() 3001 3002 if not unit: 3003 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3004 else: 3005 value = self.expression(exp.Interval, unit=unit) 3006 3007 if maybe_func and check_func: 3008 index2 = self._index 3009 peek = self._parse_string() 3010 3011 if not peek: 3012 self._retreat(index) 3013 return None 3014 3015 self._retreat(index2) 3016 3017 if value: 3018 return value 3019 3020 return exp.DataType( 3021 this=exp.DataType.Type[type_token.value.upper()], 3022 expressions=expressions, 3023 nested=nested, 3024 values=values, 3025 prefix=prefix, 3026 ) 3027 3028 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3029 this = self._parse_type() or self._parse_id_var() 3030 self._match(TokenType.COLON) 3031 return self._parse_column_def(this) 3032 3033 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3034 if not self._match_text_seq("AT", "TIME", "ZONE"): 3035 return this 3036 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3037 3038 def _parse_column(self) -> t.Optional[exp.Expression]: 3039 this = self._parse_field() 3040 if isinstance(this, exp.Identifier): 3041 this = self.expression(exp.Column, this=this) 3042 elif not this: 3043 return self._parse_bracket(this) 3044 return self._parse_column_ops(this) 3045 3046 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3047 this = self._parse_bracket(this) 3048 3049 while self._match_set(self.COLUMN_OPERATORS): 3050 op_token = self._prev.token_type 3051 op = self.COLUMN_OPERATORS.get(op_token) 3052 3053 if op_token == TokenType.DCOLON: 3054 field = self._parse_types() 3055 if not field: 3056 self.raise_error("Expected type") 3057 elif op and self._curr: 3058 self._advance() 3059 value = self._prev.text 3060 field = ( 3061 exp.Literal.number(value) 3062 if self._prev.token_type == TokenType.NUMBER 3063 else exp.Literal.string(value) 3064 ) 3065 else: 3066 field = self._parse_field(anonymous_func=True, any_token=True) 3067 3068 if isinstance(field, exp.Func): 3069 # bigquery allows function calls like x.y.count(...) 3070 # SAFE.SUBSTR(...) 3071 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3072 this = self._replace_columns_with_dots(this) 3073 3074 if op: 3075 this = op(self, this, field) 3076 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3077 this = self.expression( 3078 exp.Column, 3079 this=field, 3080 table=this.this, 3081 db=this.args.get("table"), 3082 catalog=this.args.get("db"), 3083 ) 3084 else: 3085 this = self.expression(exp.Dot, this=this, expression=field) 3086 this = self._parse_bracket(this) 3087 return this 3088 3089 def _parse_primary(self) -> t.Optional[exp.Expression]: 3090 if self._match_set(self.PRIMARY_PARSERS): 3091 token_type = self._prev.token_type 3092 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3093 3094 if token_type == TokenType.STRING: 3095 expressions = [primary] 3096 while self._match(TokenType.STRING): 3097 expressions.append(exp.Literal.string(self._prev.text)) 3098 3099 if len(expressions) > 1: 3100 return self.expression(exp.Concat, expressions=expressions) 3101 3102 return primary 3103 3104 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3105 return exp.Literal.number(f"0.{self._prev.text}") 3106 3107 if self._match(TokenType.L_PAREN): 3108 comments = self._prev_comments 3109 query = self._parse_select() 3110 3111 if query: 3112 expressions = [query] 3113 else: 3114 expressions = self._parse_csv(self._parse_expression) 3115 3116 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3117 3118 if isinstance(this, exp.Subqueryable): 3119 this = self._parse_set_operations( 3120 self._parse_subquery(this=this, parse_alias=False) 3121 ) 3122 elif len(expressions) > 1: 3123 this = self.expression(exp.Tuple, expressions=expressions) 3124 else: 3125 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3126 3127 if this: 3128 this.add_comments(comments) 3129 3130 self._match_r_paren(expression=this) 3131 return this 3132 3133 return None 3134 3135 def _parse_field( 3136 self, 3137 any_token: bool = False, 3138 tokens: t.Optional[t.Collection[TokenType]] = None, 3139 anonymous_func: bool = False, 3140 ) -> t.Optional[exp.Expression]: 3141 return ( 3142 self._parse_primary() 3143 or self._parse_function(anonymous=anonymous_func) 3144 or self._parse_id_var(any_token=any_token, tokens=tokens) 3145 ) 3146 3147 def _parse_function( 3148 self, 3149 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3150 anonymous: bool = False, 3151 optional_parens: bool = True, 3152 ) -> t.Optional[exp.Expression]: 3153 if not self._curr: 3154 return None 3155 3156 token_type = self._curr.token_type 3157 3158 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3159 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3160 3161 if not self._next or self._next.token_type != TokenType.L_PAREN: 3162 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3163 self._advance() 3164 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3165 3166 return None 3167 3168 if token_type not in self.FUNC_TOKENS: 3169 return None 3170 3171 this = self._curr.text 3172 upper = this.upper() 3173 self._advance(2) 3174 3175 parser = self.FUNCTION_PARSERS.get(upper) 3176 3177 if parser and not anonymous: 3178 this = parser(self) 3179 else: 3180 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3181 3182 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3183 this = self.expression(subquery_predicate, this=self._parse_select()) 3184 self._match_r_paren() 3185 return this 3186 3187 if functions is None: 3188 functions = self.FUNCTIONS 3189 3190 function = functions.get(upper) 3191 3192 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3193 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3194 3195 if function and not anonymous: 3196 this = self.validate_expression(function(args), args) 3197 else: 3198 this = self.expression(exp.Anonymous, this=this, expressions=args) 3199 3200 self._match_r_paren(this) 3201 return self._parse_window(this) 3202 3203 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3204 return self._parse_column_def(self._parse_id_var()) 3205 3206 def _parse_user_defined_function( 3207 self, kind: t.Optional[TokenType] = None 3208 ) -> t.Optional[exp.Expression]: 3209 this = self._parse_id_var() 3210 3211 while self._match(TokenType.DOT): 3212 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3213 3214 if not self._match(TokenType.L_PAREN): 3215 return this 3216 3217 expressions = self._parse_csv(self._parse_function_parameter) 3218 self._match_r_paren() 3219 return self.expression( 3220 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3221 ) 3222 3223 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3224 literal = self._parse_primary() 3225 if literal: 3226 return self.expression(exp.Introducer, this=token.text, expression=literal) 3227 3228 return self.expression(exp.Identifier, this=token.text) 3229 3230 def _parse_session_parameter(self) -> exp.SessionParameter: 3231 kind = None 3232 this = self._parse_id_var() or self._parse_primary() 3233 3234 if this and self._match(TokenType.DOT): 3235 kind = this.name 3236 this = self._parse_var() or self._parse_primary() 3237 3238 return self.expression(exp.SessionParameter, this=this, kind=kind) 3239 3240 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3241 index = self._index 3242 3243 if self._match(TokenType.L_PAREN): 3244 expressions = self._parse_csv(self._parse_id_var) 3245 3246 if not self._match(TokenType.R_PAREN): 3247 self._retreat(index) 3248 else: 3249 expressions = [self._parse_id_var()] 3250 3251 if self._match_set(self.LAMBDAS): 3252 return self.LAMBDAS[self._prev.token_type](self, expressions) 3253 3254 self._retreat(index) 3255 3256 this: t.Optional[exp.Expression] 3257 3258 if self._match(TokenType.DISTINCT): 3259 this = self.expression( 3260 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3261 ) 3262 else: 3263 this = self._parse_select_or_expression(alias=alias) 3264 3265 if isinstance(this, exp.EQ): 3266 left = this.this 3267 if isinstance(left, exp.Column): 3268 left.replace(exp.var(left.text("this"))) 3269 3270 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3271 3272 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3273 index = self._index 3274 3275 if not self.errors: 3276 try: 3277 if self._parse_select(nested=True): 3278 return this 3279 except ParseError: 3280 pass 3281 finally: 3282 self.errors.clear() 3283 self._retreat(index) 3284 3285 if not self._match(TokenType.L_PAREN): 3286 return this 3287 3288 args = self._parse_csv( 3289 lambda: self._parse_constraint() 3290 or self._parse_column_def(self._parse_field(any_token=True)) 3291 ) 3292 3293 self._match_r_paren() 3294 return self.expression(exp.Schema, this=this, expressions=args) 3295 3296 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3297 # column defs are not really columns, they're identifiers 3298 if isinstance(this, exp.Column): 3299 this = this.this 3300 3301 kind = self._parse_types(schema=True) 3302 3303 if self._match_text_seq("FOR", "ORDINALITY"): 3304 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3305 3306 constraints = [] 3307 while True: 3308 constraint = self._parse_column_constraint() 3309 if not constraint: 3310 break 3311 constraints.append(constraint) 3312 3313 if not kind and not constraints: 3314 return this 3315 3316 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3317 3318 def _parse_auto_increment( 3319 self, 3320 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3321 start = None 3322 increment = None 3323 3324 if self._match(TokenType.L_PAREN, advance=False): 3325 args = self._parse_wrapped_csv(self._parse_bitwise) 3326 start = seq_get(args, 0) 3327 increment = seq_get(args, 1) 3328 elif self._match_text_seq("START"): 3329 start = self._parse_bitwise() 3330 self._match_text_seq("INCREMENT") 3331 increment = self._parse_bitwise() 3332 3333 if start and increment: 3334 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3335 3336 return exp.AutoIncrementColumnConstraint() 3337 3338 def _parse_compress(self) -> exp.CompressColumnConstraint: 3339 if self._match(TokenType.L_PAREN, advance=False): 3340 return self.expression( 3341 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3342 ) 3343 3344 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3345 3346 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3347 if self._match_text_seq("BY", "DEFAULT"): 3348 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3349 this = self.expression( 3350 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3351 ) 3352 else: 3353 self._match_text_seq("ALWAYS") 3354 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3355 3356 self._match(TokenType.ALIAS) 3357 identity = self._match_text_seq("IDENTITY") 3358 3359 if self._match(TokenType.L_PAREN): 3360 if self._match_text_seq("START", "WITH"): 3361 this.set("start", self._parse_bitwise()) 3362 if self._match_text_seq("INCREMENT", "BY"): 3363 this.set("increment", self._parse_bitwise()) 3364 if self._match_text_seq("MINVALUE"): 3365 this.set("minvalue", self._parse_bitwise()) 3366 if self._match_text_seq("MAXVALUE"): 3367 this.set("maxvalue", self._parse_bitwise()) 3368 3369 if self._match_text_seq("CYCLE"): 3370 this.set("cycle", True) 3371 elif self._match_text_seq("NO", "CYCLE"): 3372 this.set("cycle", False) 3373 3374 if not identity: 3375 this.set("expression", self._parse_bitwise()) 3376 3377 self._match_r_paren() 3378 3379 return this 3380 3381 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3382 self._match_text_seq("LENGTH") 3383 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3384 3385 def _parse_not_constraint( 3386 self, 3387 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3388 if self._match_text_seq("NULL"): 3389 return self.expression(exp.NotNullColumnConstraint) 3390 if self._match_text_seq("CASESPECIFIC"): 3391 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3392 return None 3393 3394 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3395 if self._match(TokenType.CONSTRAINT): 3396 this = self._parse_id_var() 3397 else: 3398 this = None 3399 3400 if self._match_texts(self.CONSTRAINT_PARSERS): 3401 return self.expression( 3402 exp.ColumnConstraint, 3403 this=this, 3404 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3405 ) 3406 3407 return this 3408 3409 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3410 if not self._match(TokenType.CONSTRAINT): 3411 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3412 3413 this = self._parse_id_var() 3414 expressions = [] 3415 3416 while True: 3417 constraint = self._parse_unnamed_constraint() or self._parse_function() 3418 if not constraint: 3419 break 3420 expressions.append(constraint) 3421 3422 return self.expression(exp.Constraint, this=this, expressions=expressions) 3423 3424 def _parse_unnamed_constraint( 3425 self, constraints: t.Optional[t.Collection[str]] = None 3426 ) -> t.Optional[exp.Expression]: 3427 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3428 return None 3429 3430 constraint = self._prev.text.upper() 3431 if constraint not in self.CONSTRAINT_PARSERS: 3432 self.raise_error(f"No parser found for schema constraint {constraint}.") 3433 3434 return self.CONSTRAINT_PARSERS[constraint](self) 3435 3436 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3437 self._match_text_seq("KEY") 3438 return self.expression( 3439 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3440 ) 3441 3442 def _parse_key_constraint_options(self) -> t.List[str]: 3443 options = [] 3444 while True: 3445 if not self._curr: 3446 break 3447 3448 if self._match(TokenType.ON): 3449 action = None 3450 on = self._advance_any() and self._prev.text 3451 3452 if self._match_text_seq("NO", "ACTION"): 3453 action = "NO ACTION" 3454 elif self._match_text_seq("CASCADE"): 3455 action = "CASCADE" 3456 elif self._match_pair(TokenType.SET, TokenType.NULL): 3457 action = "SET NULL" 3458 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3459 action = "SET DEFAULT" 3460 else: 3461 self.raise_error("Invalid key constraint") 3462 3463 options.append(f"ON {on} {action}") 3464 elif self._match_text_seq("NOT", "ENFORCED"): 3465 options.append("NOT ENFORCED") 3466 elif self._match_text_seq("DEFERRABLE"): 3467 options.append("DEFERRABLE") 3468 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3469 options.append("INITIALLY DEFERRED") 3470 elif self._match_text_seq("NORELY"): 3471 options.append("NORELY") 3472 elif self._match_text_seq("MATCH", "FULL"): 3473 options.append("MATCH FULL") 3474 else: 3475 break 3476 3477 return options 3478 3479 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3480 if match and not self._match(TokenType.REFERENCES): 3481 return None 3482 3483 expressions = None 3484 this = self._parse_id_var() 3485 3486 if self._match(TokenType.L_PAREN, advance=False): 3487 expressions = self._parse_wrapped_id_vars() 3488 3489 options = self._parse_key_constraint_options() 3490 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3491 3492 def _parse_foreign_key(self) -> exp.ForeignKey: 3493 expressions = self._parse_wrapped_id_vars() 3494 reference = self._parse_references() 3495 options = {} 3496 3497 while self._match(TokenType.ON): 3498 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3499 self.raise_error("Expected DELETE or UPDATE") 3500 3501 kind = self._prev.text.lower() 3502 3503 if self._match_text_seq("NO", "ACTION"): 3504 action = "NO ACTION" 3505 elif self._match(TokenType.SET): 3506 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3507 action = "SET " + self._prev.text.upper() 3508 else: 3509 self._advance() 3510 action = self._prev.text.upper() 3511 3512 options[kind] = action 3513 3514 return self.expression( 3515 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3516 ) 3517 3518 def _parse_primary_key( 3519 self, wrapped_optional: bool = False, in_props: bool = False 3520 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3521 desc = ( 3522 self._match_set((TokenType.ASC, TokenType.DESC)) 3523 and self._prev.token_type == TokenType.DESC 3524 ) 3525 3526 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3527 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3528 3529 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3530 options = self._parse_key_constraint_options() 3531 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3532 3533 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3534 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3535 return this 3536 3537 bracket_kind = self._prev.token_type 3538 3539 if self._match(TokenType.COLON): 3540 expressions: t.List[t.Optional[exp.Expression]] = [ 3541 self.expression(exp.Slice, expression=self._parse_conjunction()) 3542 ] 3543 else: 3544 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3545 3546 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3547 if bracket_kind == TokenType.L_BRACE: 3548 this = self.expression(exp.Struct, expressions=expressions) 3549 elif not this or this.name.upper() == "ARRAY": 3550 this = self.expression(exp.Array, expressions=expressions) 3551 else: 3552 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3553 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3554 3555 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3556 self.raise_error("Expected ]") 3557 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3558 self.raise_error("Expected }") 3559 3560 self._add_comments(this) 3561 return self._parse_bracket(this) 3562 3563 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3564 if self._match(TokenType.COLON): 3565 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3566 return this 3567 3568 def _parse_case(self) -> t.Optional[exp.Expression]: 3569 ifs = [] 3570 default = None 3571 3572 expression = self._parse_conjunction() 3573 3574 while self._match(TokenType.WHEN): 3575 this = self._parse_conjunction() 3576 self._match(TokenType.THEN) 3577 then = self._parse_conjunction() 3578 ifs.append(self.expression(exp.If, this=this, true=then)) 3579 3580 if self._match(TokenType.ELSE): 3581 default = self._parse_conjunction() 3582 3583 if not self._match(TokenType.END): 3584 self.raise_error("Expected END after CASE", self._prev) 3585 3586 return self._parse_window( 3587 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3588 ) 3589 3590 def _parse_if(self) -> t.Optional[exp.Expression]: 3591 if self._match(TokenType.L_PAREN): 3592 args = self._parse_csv(self._parse_conjunction) 3593 this = self.validate_expression(exp.If.from_arg_list(args), args) 3594 self._match_r_paren() 3595 else: 3596 index = self._index - 1 3597 condition = self._parse_conjunction() 3598 3599 if not condition: 3600 self._retreat(index) 3601 return None 3602 3603 self._match(TokenType.THEN) 3604 true = self._parse_conjunction() 3605 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3606 self._match(TokenType.END) 3607 this = self.expression(exp.If, this=condition, true=true, false=false) 3608 3609 return self._parse_window(this) 3610 3611 def _parse_extract(self) -> exp.Extract: 3612 this = self._parse_function() or self._parse_var() or self._parse_type() 3613 3614 if self._match(TokenType.FROM): 3615 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3616 3617 if not self._match(TokenType.COMMA): 3618 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3619 3620 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3621 3622 def _parse_cast(self, strict: bool) -> exp.Expression: 3623 this = self._parse_conjunction() 3624 3625 if not self._match(TokenType.ALIAS): 3626 if self._match(TokenType.COMMA): 3627 return self.expression( 3628 exp.CastToStrType, this=this, expression=self._parse_string() 3629 ) 3630 else: 3631 self.raise_error("Expected AS after CAST") 3632 3633 to = self._parse_types() 3634 3635 if not to: 3636 self.raise_error("Expected TYPE after CAST") 3637 elif to.this == exp.DataType.Type.CHAR: 3638 if self._match(TokenType.CHARACTER_SET): 3639 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3640 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3641 fmt = self._parse_string() 3642 3643 return self.expression( 3644 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3645 this=this, 3646 format=exp.Literal.string( 3647 format_time( 3648 fmt.this if fmt else "", 3649 self.FORMAT_MAPPING or self.TIME_MAPPING, 3650 self.FORMAT_TRIE or self.TIME_TRIE, 3651 ) 3652 ), 3653 ) 3654 3655 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3656 3657 def _parse_concat(self) -> t.Optional[exp.Expression]: 3658 args = self._parse_csv(self._parse_conjunction) 3659 if self.CONCAT_NULL_OUTPUTS_STRING: 3660 args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args] 3661 3662 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3663 # we find such a call we replace it with its argument. 3664 if len(args) == 1: 3665 return args[0] 3666 3667 return self.expression( 3668 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3669 ) 3670 3671 def _parse_string_agg(self) -> exp.Expression: 3672 expression: t.Optional[exp.Expression] 3673 3674 if self._match(TokenType.DISTINCT): 3675 args = self._parse_csv(self._parse_conjunction) 3676 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3677 else: 3678 args = self._parse_csv(self._parse_conjunction) 3679 expression = seq_get(args, 0) 3680 3681 index = self._index 3682 if not self._match(TokenType.R_PAREN): 3683 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3684 order = self._parse_order(this=expression) 3685 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3686 3687 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3688 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3689 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3690 if not self._match_text_seq("WITHIN", "GROUP"): 3691 self._retreat(index) 3692 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3693 3694 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3695 order = self._parse_order(this=expression) 3696 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3697 3698 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3699 to: t.Optional[exp.Expression] 3700 this = self._parse_bitwise() 3701 3702 if self._match(TokenType.USING): 3703 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3704 elif self._match(TokenType.COMMA): 3705 to = self._parse_bitwise() 3706 else: 3707 to = None 3708 3709 # Swap the argument order if needed to produce the correct AST 3710 if self.CONVERT_TYPE_FIRST: 3711 this, to = to, this 3712 3713 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3714 3715 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3716 """ 3717 There are generally two variants of the DECODE function: 3718 3719 - DECODE(bin, charset) 3720 - DECODE(expression, search, result [, search, result] ... [, default]) 3721 3722 The second variant will always be parsed into a CASE expression. Note that NULL 3723 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3724 instead of relying on pattern matching. 3725 """ 3726 args = self._parse_csv(self._parse_conjunction) 3727 3728 if len(args) < 3: 3729 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3730 3731 expression, *expressions = args 3732 if not expression: 3733 return None 3734 3735 ifs = [] 3736 for search, result in zip(expressions[::2], expressions[1::2]): 3737 if not search or not result: 3738 return None 3739 3740 if isinstance(search, exp.Literal): 3741 ifs.append( 3742 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3743 ) 3744 elif isinstance(search, exp.Null): 3745 ifs.append( 3746 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3747 ) 3748 else: 3749 cond = exp.or_( 3750 exp.EQ(this=expression.copy(), expression=search), 3751 exp.and_( 3752 exp.Is(this=expression.copy(), expression=exp.Null()), 3753 exp.Is(this=search.copy(), expression=exp.Null()), 3754 copy=False, 3755 ), 3756 copy=False, 3757 ) 3758 ifs.append(exp.If(this=cond, true=result)) 3759 3760 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3761 3762 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3763 self._match_text_seq("KEY") 3764 key = self._parse_field() 3765 self._match(TokenType.COLON) 3766 self._match_text_seq("VALUE") 3767 value = self._parse_field() 3768 3769 if not key and not value: 3770 return None 3771 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3772 3773 def _parse_json_object(self) -> exp.JSONObject: 3774 star = self._parse_star() 3775 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3776 3777 null_handling = None 3778 if self._match_text_seq("NULL", "ON", "NULL"): 3779 null_handling = "NULL ON NULL" 3780 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3781 null_handling = "ABSENT ON NULL" 3782 3783 unique_keys = None 3784 if self._match_text_seq("WITH", "UNIQUE"): 3785 unique_keys = True 3786 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3787 unique_keys = False 3788 3789 self._match_text_seq("KEYS") 3790 3791 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3792 format_json = self._match_text_seq("FORMAT", "JSON") 3793 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3794 3795 return self.expression( 3796 exp.JSONObject, 3797 expressions=expressions, 3798 null_handling=null_handling, 3799 unique_keys=unique_keys, 3800 return_type=return_type, 3801 format_json=format_json, 3802 encoding=encoding, 3803 ) 3804 3805 def _parse_logarithm(self) -> exp.Func: 3806 # Default argument order is base, expression 3807 args = self._parse_csv(self._parse_range) 3808 3809 if len(args) > 1: 3810 if not self.LOG_BASE_FIRST: 3811 args.reverse() 3812 return exp.Log.from_arg_list(args) 3813 3814 return self.expression( 3815 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3816 ) 3817 3818 def _parse_match_against(self) -> exp.MatchAgainst: 3819 expressions = self._parse_csv(self._parse_column) 3820 3821 self._match_text_seq(")", "AGAINST", "(") 3822 3823 this = self._parse_string() 3824 3825 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3826 modifier = "IN NATURAL LANGUAGE MODE" 3827 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3828 modifier = f"{modifier} WITH QUERY EXPANSION" 3829 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3830 modifier = "IN BOOLEAN MODE" 3831 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3832 modifier = "WITH QUERY EXPANSION" 3833 else: 3834 modifier = None 3835 3836 return self.expression( 3837 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3838 ) 3839 3840 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3841 def _parse_open_json(self) -> exp.OpenJSON: 3842 this = self._parse_bitwise() 3843 path = self._match(TokenType.COMMA) and self._parse_string() 3844 3845 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3846 this = self._parse_field(any_token=True) 3847 kind = self._parse_types() 3848 path = self._parse_string() 3849 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3850 3851 return self.expression( 3852 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3853 ) 3854 3855 expressions = None 3856 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3857 self._match_l_paren() 3858 expressions = self._parse_csv(_parse_open_json_column_def) 3859 3860 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3861 3862 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3863 args = self._parse_csv(self._parse_bitwise) 3864 3865 if self._match(TokenType.IN): 3866 return self.expression( 3867 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3868 ) 3869 3870 if haystack_first: 3871 haystack = seq_get(args, 0) 3872 needle = seq_get(args, 1) 3873 else: 3874 needle = seq_get(args, 0) 3875 haystack = seq_get(args, 1) 3876 3877 return self.expression( 3878 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3879 ) 3880 3881 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3882 args = self._parse_csv(self._parse_table) 3883 return exp.JoinHint(this=func_name.upper(), expressions=args) 3884 3885 def _parse_substring(self) -> exp.Substring: 3886 # Postgres supports the form: substring(string [from int] [for int]) 3887 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3888 3889 args = self._parse_csv(self._parse_bitwise) 3890 3891 if self._match(TokenType.FROM): 3892 args.append(self._parse_bitwise()) 3893 if self._match(TokenType.FOR): 3894 args.append(self._parse_bitwise()) 3895 3896 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3897 3898 def _parse_trim(self) -> exp.Trim: 3899 # https://www.w3resource.com/sql/character-functions/trim.php 3900 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3901 3902 position = None 3903 collation = None 3904 3905 if self._match_texts(self.TRIM_TYPES): 3906 position = self._prev.text.upper() 3907 3908 expression = self._parse_bitwise() 3909 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3910 this = self._parse_bitwise() 3911 else: 3912 this = expression 3913 expression = None 3914 3915 if self._match(TokenType.COLLATE): 3916 collation = self._parse_bitwise() 3917 3918 return self.expression( 3919 exp.Trim, this=this, position=position, expression=expression, collation=collation 3920 ) 3921 3922 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3923 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3924 3925 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3926 return self._parse_window(self._parse_id_var(), alias=True) 3927 3928 def _parse_respect_or_ignore_nulls( 3929 self, this: t.Optional[exp.Expression] 3930 ) -> t.Optional[exp.Expression]: 3931 if self._match_text_seq("IGNORE", "NULLS"): 3932 return self.expression(exp.IgnoreNulls, this=this) 3933 if self._match_text_seq("RESPECT", "NULLS"): 3934 return self.expression(exp.RespectNulls, this=this) 3935 return this 3936 3937 def _parse_window( 3938 self, this: t.Optional[exp.Expression], alias: bool = False 3939 ) -> t.Optional[exp.Expression]: 3940 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3941 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3942 self._match_r_paren() 3943 3944 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3945 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3946 if self._match_text_seq("WITHIN", "GROUP"): 3947 order = self._parse_wrapped(self._parse_order) 3948 this = self.expression(exp.WithinGroup, this=this, expression=order) 3949 3950 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3951 # Some dialects choose to implement and some do not. 3952 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3953 3954 # There is some code above in _parse_lambda that handles 3955 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3956 3957 # The below changes handle 3958 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3959 3960 # Oracle allows both formats 3961 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3962 # and Snowflake chose to do the same for familiarity 3963 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3964 this = self._parse_respect_or_ignore_nulls(this) 3965 3966 # bigquery select from window x AS (partition by ...) 3967 if alias: 3968 over = None 3969 self._match(TokenType.ALIAS) 3970 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3971 return this 3972 else: 3973 over = self._prev.text.upper() 3974 3975 if not self._match(TokenType.L_PAREN): 3976 return self.expression( 3977 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3978 ) 3979 3980 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3981 3982 first = self._match(TokenType.FIRST) 3983 if self._match_text_seq("LAST"): 3984 first = False 3985 3986 partition = self._parse_partition_by() 3987 order = self._parse_order() 3988 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3989 3990 if kind: 3991 self._match(TokenType.BETWEEN) 3992 start = self._parse_window_spec() 3993 self._match(TokenType.AND) 3994 end = self._parse_window_spec() 3995 3996 spec = self.expression( 3997 exp.WindowSpec, 3998 kind=kind, 3999 start=start["value"], 4000 start_side=start["side"], 4001 end=end["value"], 4002 end_side=end["side"], 4003 ) 4004 else: 4005 spec = None 4006 4007 self._match_r_paren() 4008 4009 return self.expression( 4010 exp.Window, 4011 this=this, 4012 partition_by=partition, 4013 order=order, 4014 spec=spec, 4015 alias=window_alias, 4016 over=over, 4017 first=first, 4018 ) 4019 4020 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4021 self._match(TokenType.BETWEEN) 4022 4023 return { 4024 "value": ( 4025 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4026 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4027 or self._parse_bitwise() 4028 ), 4029 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4030 } 4031 4032 def _parse_alias( 4033 self, this: t.Optional[exp.Expression], explicit: bool = False 4034 ) -> t.Optional[exp.Expression]: 4035 any_token = self._match(TokenType.ALIAS) 4036 4037 if explicit and not any_token: 4038 return this 4039 4040 if self._match(TokenType.L_PAREN): 4041 aliases = self.expression( 4042 exp.Aliases, 4043 this=this, 4044 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4045 ) 4046 self._match_r_paren(aliases) 4047 return aliases 4048 4049 alias = self._parse_id_var(any_token) 4050 4051 if alias: 4052 return self.expression(exp.Alias, this=this, alias=alias) 4053 4054 return this 4055 4056 def _parse_id_var( 4057 self, 4058 any_token: bool = True, 4059 tokens: t.Optional[t.Collection[TokenType]] = None, 4060 ) -> t.Optional[exp.Expression]: 4061 identifier = self._parse_identifier() 4062 4063 if identifier: 4064 return identifier 4065 4066 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4067 quoted = self._prev.token_type == TokenType.STRING 4068 return exp.Identifier(this=self._prev.text, quoted=quoted) 4069 4070 return None 4071 4072 def _parse_string(self) -> t.Optional[exp.Expression]: 4073 if self._match(TokenType.STRING): 4074 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4075 return self._parse_placeholder() 4076 4077 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4078 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4079 4080 def _parse_number(self) -> t.Optional[exp.Expression]: 4081 if self._match(TokenType.NUMBER): 4082 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4083 return self._parse_placeholder() 4084 4085 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4086 if self._match(TokenType.IDENTIFIER): 4087 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4088 return self._parse_placeholder() 4089 4090 def _parse_var( 4091 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4092 ) -> t.Optional[exp.Expression]: 4093 if ( 4094 (any_token and self._advance_any()) 4095 or self._match(TokenType.VAR) 4096 or (self._match_set(tokens) if tokens else False) 4097 ): 4098 return self.expression(exp.Var, this=self._prev.text) 4099 return self._parse_placeholder() 4100 4101 def _advance_any(self) -> t.Optional[Token]: 4102 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4103 self._advance() 4104 return self._prev 4105 return None 4106 4107 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4108 return self._parse_var() or self._parse_string() 4109 4110 def _parse_null(self) -> t.Optional[exp.Expression]: 4111 if self._match(TokenType.NULL): 4112 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4113 return None 4114 4115 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4116 if self._match(TokenType.TRUE): 4117 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4118 if self._match(TokenType.FALSE): 4119 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4120 return None 4121 4122 def _parse_star(self) -> t.Optional[exp.Expression]: 4123 if self._match(TokenType.STAR): 4124 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4125 return None 4126 4127 def _parse_parameter(self) -> exp.Parameter: 4128 wrapped = self._match(TokenType.L_BRACE) 4129 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4130 self._match(TokenType.R_BRACE) 4131 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4132 4133 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4134 if self._match_set(self.PLACEHOLDER_PARSERS): 4135 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4136 if placeholder: 4137 return placeholder 4138 self._advance(-1) 4139 return None 4140 4141 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4142 if not self._match(TokenType.EXCEPT): 4143 return None 4144 if self._match(TokenType.L_PAREN, advance=False): 4145 return self._parse_wrapped_csv(self._parse_column) 4146 return self._parse_csv(self._parse_column) 4147 4148 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4149 if not self._match(TokenType.REPLACE): 4150 return None 4151 if self._match(TokenType.L_PAREN, advance=False): 4152 return self._parse_wrapped_csv(self._parse_expression) 4153 return self._parse_csv(self._parse_expression) 4154 4155 def _parse_csv( 4156 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4157 ) -> t.List[t.Optional[exp.Expression]]: 4158 parse_result = parse_method() 4159 items = [parse_result] if parse_result is not None else [] 4160 4161 while self._match(sep): 4162 self._add_comments(parse_result) 4163 parse_result = parse_method() 4164 if parse_result is not None: 4165 items.append(parse_result) 4166 4167 return items 4168 4169 def _parse_tokens( 4170 self, parse_method: t.Callable, expressions: t.Dict 4171 ) -> t.Optional[exp.Expression]: 4172 this = parse_method() 4173 4174 while self._match_set(expressions): 4175 this = self.expression( 4176 expressions[self._prev.token_type], 4177 this=this, 4178 comments=self._prev_comments, 4179 expression=parse_method(), 4180 ) 4181 4182 return this 4183 4184 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4185 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4186 4187 def _parse_wrapped_csv( 4188 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4189 ) -> t.List[t.Optional[exp.Expression]]: 4190 return self._parse_wrapped( 4191 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4192 ) 4193 4194 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4195 wrapped = self._match(TokenType.L_PAREN) 4196 if not wrapped and not optional: 4197 self.raise_error("Expecting (") 4198 parse_result = parse_method() 4199 if wrapped: 4200 self._match_r_paren() 4201 return parse_result 4202 4203 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4204 return self._parse_select() or self._parse_set_operations( 4205 self._parse_expression() if alias else self._parse_conjunction() 4206 ) 4207 4208 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4209 return self._parse_query_modifiers( 4210 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4211 ) 4212 4213 def _parse_transaction(self) -> exp.Transaction: 4214 this = None 4215 if self._match_texts(self.TRANSACTION_KIND): 4216 this = self._prev.text 4217 4218 self._match_texts({"TRANSACTION", "WORK"}) 4219 4220 modes = [] 4221 while True: 4222 mode = [] 4223 while self._match(TokenType.VAR): 4224 mode.append(self._prev.text) 4225 4226 if mode: 4227 modes.append(" ".join(mode)) 4228 if not self._match(TokenType.COMMA): 4229 break 4230 4231 return self.expression(exp.Transaction, this=this, modes=modes) 4232 4233 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4234 chain = None 4235 savepoint = None 4236 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4237 4238 self._match_texts({"TRANSACTION", "WORK"}) 4239 4240 if self._match_text_seq("TO"): 4241 self._match_text_seq("SAVEPOINT") 4242 savepoint = self._parse_id_var() 4243 4244 if self._match(TokenType.AND): 4245 chain = not self._match_text_seq("NO") 4246 self._match_text_seq("CHAIN") 4247 4248 if is_rollback: 4249 return self.expression(exp.Rollback, savepoint=savepoint) 4250 4251 return self.expression(exp.Commit, chain=chain) 4252 4253 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4254 if not self._match_text_seq("ADD"): 4255 return None 4256 4257 self._match(TokenType.COLUMN) 4258 exists_column = self._parse_exists(not_=True) 4259 expression = self._parse_column_def(self._parse_field(any_token=True)) 4260 4261 if expression: 4262 expression.set("exists", exists_column) 4263 4264 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4265 if self._match_texts(("FIRST", "AFTER")): 4266 position = self._prev.text 4267 column_position = self.expression( 4268 exp.ColumnPosition, this=self._parse_column(), position=position 4269 ) 4270 expression.set("position", column_position) 4271 4272 return expression 4273 4274 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4275 drop = self._match(TokenType.DROP) and self._parse_drop() 4276 if drop and not isinstance(drop, exp.Command): 4277 drop.set("kind", drop.args.get("kind", "COLUMN")) 4278 return drop 4279 4280 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4281 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4282 return self.expression( 4283 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4284 ) 4285 4286 def _parse_add_constraint(self) -> exp.AddConstraint: 4287 this = None 4288 kind = self._prev.token_type 4289 4290 if kind == TokenType.CONSTRAINT: 4291 this = self._parse_id_var() 4292 4293 if self._match_text_seq("CHECK"): 4294 expression = self._parse_wrapped(self._parse_conjunction) 4295 enforced = self._match_text_seq("ENFORCED") 4296 4297 return self.expression( 4298 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4299 ) 4300 4301 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4302 expression = self._parse_foreign_key() 4303 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4304 expression = self._parse_primary_key() 4305 else: 4306 expression = None 4307 4308 return self.expression(exp.AddConstraint, this=this, expression=expression) 4309 4310 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4311 index = self._index - 1 4312 4313 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4314 return self._parse_csv(self._parse_add_constraint) 4315 4316 self._retreat(index) 4317 return self._parse_csv(self._parse_add_column) 4318 4319 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4320 self._match(TokenType.COLUMN) 4321 column = self._parse_field(any_token=True) 4322 4323 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4324 return self.expression(exp.AlterColumn, this=column, drop=True) 4325 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4326 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4327 4328 self._match_text_seq("SET", "DATA") 4329 return self.expression( 4330 exp.AlterColumn, 4331 this=column, 4332 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4333 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4334 using=self._match(TokenType.USING) and self._parse_conjunction(), 4335 ) 4336 4337 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4338 index = self._index - 1 4339 4340 partition_exists = self._parse_exists() 4341 if self._match(TokenType.PARTITION, advance=False): 4342 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4343 4344 self._retreat(index) 4345 return self._parse_csv(self._parse_drop_column) 4346 4347 def _parse_alter_table_rename(self) -> exp.RenameTable: 4348 self._match_text_seq("TO") 4349 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4350 4351 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4352 start = self._prev 4353 4354 if not self._match(TokenType.TABLE): 4355 return self._parse_as_command(start) 4356 4357 exists = self._parse_exists() 4358 this = self._parse_table(schema=True) 4359 4360 if self._next: 4361 self._advance() 4362 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4363 4364 if parser: 4365 actions = ensure_list(parser(self)) 4366 4367 if not self._curr: 4368 return self.expression( 4369 exp.AlterTable, 4370 this=this, 4371 exists=exists, 4372 actions=actions, 4373 ) 4374 return self._parse_as_command(start) 4375 4376 def _parse_merge(self) -> exp.Merge: 4377 self._match(TokenType.INTO) 4378 target = self._parse_table() 4379 4380 self._match(TokenType.USING) 4381 using = self._parse_table() 4382 4383 self._match(TokenType.ON) 4384 on = self._parse_conjunction() 4385 4386 whens = [] 4387 while self._match(TokenType.WHEN): 4388 matched = not self._match(TokenType.NOT) 4389 self._match_text_seq("MATCHED") 4390 source = ( 4391 False 4392 if self._match_text_seq("BY", "TARGET") 4393 else self._match_text_seq("BY", "SOURCE") 4394 ) 4395 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4396 4397 self._match(TokenType.THEN) 4398 4399 if self._match(TokenType.INSERT): 4400 _this = self._parse_star() 4401 if _this: 4402 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4403 else: 4404 then = self.expression( 4405 exp.Insert, 4406 this=self._parse_value(), 4407 expression=self._match(TokenType.VALUES) and self._parse_value(), 4408 ) 4409 elif self._match(TokenType.UPDATE): 4410 expressions = self._parse_star() 4411 if expressions: 4412 then = self.expression(exp.Update, expressions=expressions) 4413 else: 4414 then = self.expression( 4415 exp.Update, 4416 expressions=self._match(TokenType.SET) 4417 and self._parse_csv(self._parse_equality), 4418 ) 4419 elif self._match(TokenType.DELETE): 4420 then = self.expression(exp.Var, this=self._prev.text) 4421 else: 4422 then = None 4423 4424 whens.append( 4425 self.expression( 4426 exp.When, 4427 matched=matched, 4428 source=source, 4429 condition=condition, 4430 then=then, 4431 ) 4432 ) 4433 4434 return self.expression( 4435 exp.Merge, 4436 this=target, 4437 using=using, 4438 on=on, 4439 expressions=whens, 4440 ) 4441 4442 def _parse_show(self) -> t.Optional[exp.Expression]: 4443 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4444 if parser: 4445 return parser(self) 4446 self._advance() 4447 return self.expression(exp.Show, this=self._prev.text.upper()) 4448 4449 def _parse_set_item_assignment( 4450 self, kind: t.Optional[str] = None 4451 ) -> t.Optional[exp.Expression]: 4452 index = self._index 4453 4454 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4455 return self._parse_set_transaction(global_=kind == "GLOBAL") 4456 4457 left = self._parse_primary() or self._parse_id_var() 4458 4459 if not self._match_texts(("=", "TO")): 4460 self._retreat(index) 4461 return None 4462 4463 right = self._parse_statement() or self._parse_id_var() 4464 this = self.expression(exp.EQ, this=left, expression=right) 4465 4466 return self.expression(exp.SetItem, this=this, kind=kind) 4467 4468 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4469 self._match_text_seq("TRANSACTION") 4470 characteristics = self._parse_csv( 4471 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4472 ) 4473 return self.expression( 4474 exp.SetItem, 4475 expressions=characteristics, 4476 kind="TRANSACTION", 4477 **{"global": global_}, # type: ignore 4478 ) 4479 4480 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4481 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4482 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4483 4484 def _parse_set(self) -> exp.Set | exp.Command: 4485 index = self._index 4486 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4487 4488 if self._curr: 4489 self._retreat(index) 4490 return self._parse_as_command(self._prev) 4491 4492 return set_ 4493 4494 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4495 for option in options: 4496 if self._match_text_seq(*option.split(" ")): 4497 return exp.var(option) 4498 return None 4499 4500 def _parse_as_command(self, start: Token) -> exp.Command: 4501 while self._curr: 4502 self._advance() 4503 text = self._find_sql(start, self._prev) 4504 size = len(start.text) 4505 return exp.Command(this=text[:size], expression=text[size:]) 4506 4507 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4508 settings = [] 4509 4510 self._match_l_paren() 4511 kind = self._parse_id_var() 4512 4513 if self._match(TokenType.L_PAREN): 4514 while True: 4515 key = self._parse_id_var() 4516 value = self._parse_primary() 4517 4518 if not key and value is None: 4519 break 4520 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4521 self._match(TokenType.R_PAREN) 4522 4523 self._match_r_paren() 4524 4525 return self.expression( 4526 exp.DictProperty, 4527 this=this, 4528 kind=kind.this if kind else None, 4529 settings=settings, 4530 ) 4531 4532 def _parse_dict_range(self, this: str) -> exp.DictRange: 4533 self._match_l_paren() 4534 has_min = self._match_text_seq("MIN") 4535 if has_min: 4536 min = self._parse_var() or self._parse_primary() 4537 self._match_text_seq("MAX") 4538 max = self._parse_var() or self._parse_primary() 4539 else: 4540 max = self._parse_var() or self._parse_primary() 4541 min = exp.Literal.number(0) 4542 self._match_r_paren() 4543 return self.expression(exp.DictRange, this=this, min=min, max=max) 4544 4545 def _find_parser( 4546 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4547 ) -> t.Optional[t.Callable]: 4548 if not self._curr: 4549 return None 4550 4551 index = self._index 4552 this = [] 4553 while True: 4554 # The current token might be multiple words 4555 curr = self._curr.text.upper() 4556 key = curr.split(" ") 4557 this.append(curr) 4558 self._advance() 4559 result, trie = in_trie(trie, key) 4560 if result == 0: 4561 break 4562 if result == 2: 4563 subparser = parsers[" ".join(this)] 4564 return subparser 4565 self._retreat(index) 4566 return None 4567 4568 def _match(self, token_type, advance=True, expression=None): 4569 if not self._curr: 4570 return None 4571 4572 if self._curr.token_type == token_type: 4573 if advance: 4574 self._advance() 4575 self._add_comments(expression) 4576 return True 4577 4578 return None 4579 4580 def _match_set(self, types, advance=True): 4581 if not self._curr: 4582 return None 4583 4584 if self._curr.token_type in types: 4585 if advance: 4586 self._advance() 4587 return True 4588 4589 return None 4590 4591 def _match_pair(self, token_type_a, token_type_b, advance=True): 4592 if not self._curr or not self._next: 4593 return None 4594 4595 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4596 if advance: 4597 self._advance(2) 4598 return True 4599 4600 return None 4601 4602 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4603 if not self._match(TokenType.L_PAREN, expression=expression): 4604 self.raise_error("Expecting (") 4605 4606 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4607 if not self._match(TokenType.R_PAREN, expression=expression): 4608 self.raise_error("Expecting )") 4609 4610 def _match_texts(self, texts, advance=True): 4611 if self._curr and self._curr.text.upper() in texts: 4612 if advance: 4613 self._advance() 4614 return True 4615 return False 4616 4617 def _match_text_seq(self, *texts, advance=True): 4618 index = self._index 4619 for text in texts: 4620 if self._curr and self._curr.text.upper() == text: 4621 self._advance() 4622 else: 4623 self._retreat(index) 4624 return False 4625 4626 if not advance: 4627 self._retreat(index) 4628 4629 return True 4630 4631 @t.overload 4632 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4633 ... 4634 4635 @t.overload 4636 def _replace_columns_with_dots( 4637 self, this: t.Optional[exp.Expression] 4638 ) -> t.Optional[exp.Expression]: 4639 ... 4640 4641 def _replace_columns_with_dots(self, this): 4642 if isinstance(this, exp.Dot): 4643 exp.replace_children(this, self._replace_columns_with_dots) 4644 elif isinstance(this, exp.Column): 4645 exp.replace_children(this, self._replace_columns_with_dots) 4646 table = this.args.get("table") 4647 this = ( 4648 self.expression(exp.Dot, this=table, expression=this.this) 4649 if table 4650 else self.expression(exp.Var, this=this.name) 4651 ) 4652 elif isinstance(this, exp.Identifier): 4653 this = self.expression(exp.Var, this=this.name) 4654 4655 return this 4656 4657 def _replace_lambda( 4658 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4659 ) -> t.Optional[exp.Expression]: 4660 if not node: 4661 return node 4662 4663 for column in node.find_all(exp.Column): 4664 if column.parts[0].name in lambda_variables: 4665 dot_or_id = column.to_dot() if column.table else column.this 4666 parent = column.parent 4667 4668 while isinstance(parent, exp.Dot): 4669 if not isinstance(parent.parent, exp.Dot): 4670 parent.replace(dot_or_id) 4671 break 4672 parent = parent.parent 4673 else: 4674 if column is node: 4675 node = dot_or_id 4676 else: 4677 column.replace(dot_or_id) 4678 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
824 def __init__( 825 self, 826 error_level: t.Optional[ErrorLevel] = None, 827 error_message_context: int = 100, 828 max_errors: int = 3, 829 ): 830 self.error_level = error_level or ErrorLevel.IMMEDIATE 831 self.error_message_context = error_message_context 832 self.max_errors = max_errors 833 self.reset()
845 def parse( 846 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 847 ) -> t.List[t.Optional[exp.Expression]]: 848 """ 849 Parses a list of tokens and returns a list of syntax trees, one tree 850 per parsed SQL statement. 851 852 Args: 853 raw_tokens: The list of tokens. 854 sql: The original SQL string, used to produce helpful debug messages. 855 856 Returns: 857 The list of the produced syntax trees. 858 """ 859 return self._parse( 860 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 861 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
863 def parse_into( 864 self, 865 expression_types: exp.IntoType, 866 raw_tokens: t.List[Token], 867 sql: t.Optional[str] = None, 868 ) -> t.List[t.Optional[exp.Expression]]: 869 """ 870 Parses a list of tokens into a given Expression type. If a collection of Expression 871 types is given instead, this method will try to parse the token list into each one 872 of them, stopping at the first for which the parsing succeeds. 873 874 Args: 875 expression_types: The expression type(s) to try and parse the token list into. 876 raw_tokens: The list of tokens. 877 sql: The original SQL string, used to produce helpful debug messages. 878 879 Returns: 880 The target Expression. 881 """ 882 errors = [] 883 for expression_type in ensure_list(expression_types): 884 parser = self.EXPRESSION_PARSERS.get(expression_type) 885 if not parser: 886 raise TypeError(f"No parser registered for {expression_type}") 887 888 try: 889 return self._parse(parser, raw_tokens, sql) 890 except ParseError as e: 891 e.errors[0]["into_expression"] = expression_type 892 errors.append(e) 893 894 raise ParseError( 895 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 896 errors=merge_errors(errors), 897 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
934 def check_errors(self) -> None: 935 """Logs or raises any found errors, depending on the chosen error level setting.""" 936 if self.error_level == ErrorLevel.WARN: 937 for error in self.errors: 938 logger.error(str(error)) 939 elif self.error_level == ErrorLevel.RAISE and self.errors: 940 raise ParseError( 941 concat_messages(self.errors, self.max_errors), 942 errors=merge_errors(self.errors), 943 )
Logs or raises any found errors, depending on the chosen error level setting.
945 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 946 """ 947 Appends an error in the list of recorded errors or raises it, depending on the chosen 948 error level setting. 949 """ 950 token = token or self._curr or self._prev or Token.string("") 951 start = token.start 952 end = token.end + 1 953 start_context = self.sql[max(start - self.error_message_context, 0) : start] 954 highlight = self.sql[start:end] 955 end_context = self.sql[end : end + self.error_message_context] 956 957 error = ParseError.new( 958 f"{message}. Line {token.line}, Col: {token.col}.\n" 959 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 960 description=message, 961 line=token.line, 962 col=token.col, 963 start_context=start_context, 964 highlight=highlight, 965 end_context=end_context, 966 ) 967 968 if self.error_level == ErrorLevel.IMMEDIATE: 969 raise error 970 971 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
973 def expression( 974 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 975 ) -> E: 976 """ 977 Creates a new, validated Expression. 978 979 Args: 980 exp_class: The expression class to instantiate. 981 comments: An optional list of comments to attach to the expression. 982 kwargs: The arguments to set for the expression along with their respective values. 983 984 Returns: 985 The target expression. 986 """ 987 instance = exp_class(**kwargs) 988 instance.add_comments(comments) if comments else self._add_comments(instance) 989 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
996 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 997 """ 998 Validates an Expression, making sure that all its mandatory arguments are set. 999 1000 Args: 1001 expression: The expression to validate. 1002 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1003 1004 Returns: 1005 The validated expression. 1006 """ 1007 if self.error_level != ErrorLevel.IGNORE: 1008 for error_message in expression.error_messages(args): 1009 self.raise_error(error_message) 1010 1011 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.